Repository: tensorflow/estimator Branch: master Commit: 26a7dc2ef7d3 Files: 187 Total size: 3.9 MB Directory structure: gitextract_a04upojl/ ├── .bazelrc ├── .gitignore ├── BUILD ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── WORKSPACE ├── tensorflow_estimator/ │ ├── BUILD │ ├── estimator.bzl │ ├── python/ │ │ └── estimator/ │ │ ├── BUILD │ │ ├── api/ │ │ │ ├── BUILD │ │ │ ├── api_gen.bzl │ │ │ ├── extractor_wrapper.py │ │ │ └── generator_wrapper.py │ │ ├── canned/ │ │ │ ├── __init__.py │ │ │ ├── baseline.py │ │ │ ├── baseline_estimator_test.py │ │ │ ├── baseline_test.py │ │ │ ├── canned_estimator_ds_integration_test.py │ │ │ ├── dnn.py │ │ │ ├── dnn_estimator_test.py │ │ │ ├── dnn_linear_combined.py │ │ │ ├── dnn_linear_combined_estimator_test.py │ │ │ ├── dnn_linear_combined_test.py │ │ │ ├── dnn_test_fc_v2.py │ │ │ ├── dnn_testing_utils.py │ │ │ ├── head.py │ │ │ ├── head_test.py │ │ │ ├── kmeans.py │ │ │ ├── kmeans_test.py │ │ │ ├── linear.py │ │ │ ├── linear_estimator_test.py │ │ │ ├── linear_model_test.py │ │ │ ├── linear_optimizer/ │ │ │ │ ├── BUILD │ │ │ │ ├── __init__.py │ │ │ │ ├── doc/ │ │ │ │ │ └── sdca.ipynb │ │ │ │ └── python/ │ │ │ │ ├── sdca_test.py │ │ │ │ └── utils/ │ │ │ │ ├── sdca_ops.py │ │ │ │ ├── sdca_ops_test.py │ │ │ │ ├── sharded_mutable_dense_hashtable.py │ │ │ │ └── sharded_mutable_dense_hashtable_test.py │ │ │ ├── linear_test.py │ │ │ ├── linear_testing_utils.py │ │ │ ├── metric_keys.py │ │ │ ├── optimizers.py │ │ │ ├── optimizers_test.py │ │ │ ├── optimizers_test_v2.py │ │ │ ├── parsing_utils.py │ │ │ ├── parsing_utils_test.py │ │ │ ├── prediction_keys.py │ │ │ ├── rnn.py │ │ │ ├── rnn_test.py │ │ │ ├── saved_model_estimator.py │ │ │ ├── saved_model_estimator_test.py │ │ │ ├── testdata/ │ │ │ │ └── wire_vocabulary.txt │ │ │ ├── timeseries/ │ │ │ │ ├── BUILD │ │ │ │ ├── ar_model.py │ │ │ │ ├── ar_model_test.py │ │ │ │ ├── ar_model_training_test.py │ │ │ │ ├── estimators.py │ │ │ │ ├── estimators_test.py │ │ │ │ ├── feature_keys.py │ │ │ │ ├── head.py │ │ │ │ ├── head_test.py │ │ │ │ ├── math_utils.py │ │ │ │ ├── math_utils_test.py │ │ │ │ ├── model.py │ │ │ │ ├── model_utils.py │ │ │ │ ├── saved_model_utils.py │ │ │ │ └── state_management.py │ │ │ └── v1/ │ │ │ ├── __init__.py │ │ │ ├── baseline_estimator_test_v1.py │ │ │ ├── baseline_test_v1.py │ │ │ ├── dnn_estimator_test_v1.py │ │ │ ├── dnn_linear_combined_estimator_test_v1.py │ │ │ ├── dnn_linear_combined_test_v1.py │ │ │ ├── dnn_test_fc_v1_v1.py │ │ │ ├── dnn_test_fc_v2_v1.py │ │ │ ├── dnn_testing_utils_v1.py │ │ │ ├── linear_estimator_test_v1.py │ │ │ ├── linear_test_v1.py │ │ │ └── linear_testing_utils_v1.py │ │ ├── distribute_strategy_estimator_integration_test.py │ │ ├── distribute_strategy_estimator_training_test.py │ │ ├── early_stopping.py │ │ ├── early_stopping_test.py │ │ ├── estimator.py │ │ ├── estimator_export.py │ │ ├── estimator_export_test.py │ │ ├── estimator_lib.py │ │ ├── estimator_test.py │ │ ├── export/ │ │ │ ├── __init__.py │ │ │ ├── export.py │ │ │ ├── export_lib.py │ │ │ ├── export_output.py │ │ │ ├── export_test.py │ │ │ ├── function.py │ │ │ └── function_test.py │ │ ├── exporter.py │ │ ├── exporter_test.py │ │ ├── extenders.py │ │ ├── extenders_test.py │ │ ├── gc.py │ │ ├── gc_test.py │ │ ├── head/ │ │ │ ├── __init__.py │ │ │ ├── base_head.py │ │ │ ├── base_head_test.py │ │ │ ├── binary_class_head.py │ │ │ ├── binary_class_head_test.py │ │ │ ├── head_utils.py │ │ │ ├── multi_class_head.py │ │ │ ├── multi_class_head_test.py │ │ │ ├── multi_head.py │ │ │ ├── multi_head_test.py │ │ │ ├── multi_label_head.py │ │ │ ├── multi_label_head_test.py │ │ │ ├── regression_head.py │ │ │ ├── regression_head_test.py │ │ │ ├── sequential_head.py │ │ │ └── sequential_head_test.py │ │ ├── hooks/ │ │ │ ├── __init__.py │ │ │ ├── basic_session_run_hooks.py │ │ │ ├── basic_session_run_hooks_test.py │ │ │ ├── fake_summary_writer.py │ │ │ ├── hooks.py │ │ │ ├── hooks_test.py │ │ │ └── session_run_hook.py │ │ ├── inputs/ │ │ │ ├── __init__.py │ │ │ ├── inputs.py │ │ │ ├── numpy_io.py │ │ │ ├── numpy_io_test.py │ │ │ ├── pandas_io.py │ │ │ ├── pandas_io_test.py │ │ │ └── queues/ │ │ │ ├── __init__.py │ │ │ ├── feeding_functions.py │ │ │ ├── feeding_functions_test.py │ │ │ ├── feeding_queue_runner.py │ │ │ └── feeding_queue_runner_test.py │ │ ├── keras_distribute_strategy_test.py │ │ ├── keras_lib.py │ │ ├── keras_premade_model_test.py │ │ ├── keras_test.py │ │ ├── mode_keys.py │ │ ├── model_fn.py │ │ ├── model_fn_test.py │ │ ├── object_checkpointing_test.py │ │ ├── run_config.py │ │ ├── run_config_test.py │ │ ├── tf_estimator_doctest.py │ │ ├── tools/ │ │ │ ├── __init__.py │ │ │ ├── analytics.py │ │ │ ├── checkpoint_converter.py │ │ │ └── checkpoint_converter_test.py │ │ ├── tpu/ │ │ │ ├── BUILD │ │ │ ├── __init__.py │ │ │ ├── _tpu_estimator_embedding.py │ │ │ ├── autotuning_iterations_per_loop_test.py │ │ │ ├── error_handling.py │ │ │ ├── error_handling_test.py │ │ │ ├── iteration_count_estimator.py │ │ │ ├── spatial_partitioning_api.md │ │ │ ├── tpu_config.py │ │ │ ├── tpu_config_test.py │ │ │ ├── tpu_context.py │ │ │ ├── tpu_enqueue_sequence_test.py │ │ │ ├── tpu_estimator.py │ │ │ ├── tpu_estimator_embedding_test.py │ │ │ ├── tpu_estimator_evaluation_test.py │ │ │ ├── tpu_estimator_export_test.py │ │ │ ├── tpu_estimator_gradients_test.py │ │ │ ├── tpu_estimator_input_v2_test.py │ │ │ ├── tpu_estimator_integration_test.py │ │ │ ├── tpu_estimator_model_parallelism_test.py │ │ │ ├── tpu_estimator_signals_test.py │ │ │ ├── tpu_estimator_test.py │ │ │ └── util.py │ │ ├── training.py │ │ ├── training_test.py │ │ ├── util.py │ │ └── util_test.py │ └── tools/ │ └── pip_package/ │ ├── BUILD │ ├── build_pip_package.sh │ ├── create_pip_helper.py │ └── setup.py └── third_party/ └── py/ ├── BUILD ├── BUILD.tpl └── python_configure.bzl ================================================ FILE CONTENTS ================================================ ================================================ FILE: .bazelrc ================================================ # Default options should come above this line # Put user-specific options in .bazelrc.user try-import %workspace%/.bazelrc.user ================================================ FILE: .gitignore ================================================ # editor files *.swp *~ .vscode/ .DS_Store # bazel /.bazelrc.user /bazel-* # python *.pyc *.pyo __pycache__ *.whl .ipynb_checkpoints ================================================ FILE: BUILD ================================================ # Description: Tensorflow Estimator. licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) ================================================ FILE: CONTRIBUTING.md ================================================ Want to contribute? Great! First, read this page (including the small print at the end). ### Before you contribute Before we can use your code, you must sign the [Google Individual Contributor License Agreement] (https://cla.developers.google.com/about/google-individual) (CLA), which you can do online. The CLA is necessary mainly because you own the copyright to your changes, even after your contribution becomes part of our codebase, so we need your permission to use and distribute your code. We also need to be sure of various other things—for instance that you'll tell us if you know that your code infringes on other people's patents. You don't have to sign the CLA until after you've submitted your code for review and a member has approved it, but you must do it before we can put your code into our codebase. Before you start working on a larger contribution, you should get in touch with us first through the issue tracker with your idea so that we can help out and possibly guide you. Coordinating up front makes it much easier to avoid frustration later on. ### Code reviews All submissions, including submissions by project members, require review. We use Github pull requests for this purpose. ### The small print Contributions made by corporations are covered by a different agreement than the one above, the [Software Grant and Corporate Contributor License Agreement] (https://cla.developers.google.com/about/google-corporate). ================================================ FILE: LICENSE ================================================ Copyright 2018 The TensorFlow Authors. All rights reserved. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2017, The TensorFlow Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ ----------------- | **`Documentation`** | |-----------------| | [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/python/tf/estimator) | TensorFlow Estimator is a high-level TensorFlow API that greatly simplifies machine learning programming. Estimators encapsulate training, evaluation, prediction, and exporting for your model. ## Getting Started See our Estimator [getting started guide](https://www.tensorflow.org/guide/estimator) for an introduction to the Estimator APIs. ## Installation `tf.estimator` is installed when you install the TensorFlow pip package. See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions. ## Developing If you want to build TensorFlow Estimator locally, you will need to [install Bazel](https://docs.bazel.build/versions/master/install.html) and [install TensorFlow](https://www.tensorflow.org/install/pip). ```sh # To build TensorFlow Estimator whl file. bazel build //tensorflow_estimator/tools/pip_package:build_pip_package bazel-bin/tensorflow_estimator/tools/pip_package/build_pip_package /tmp/estimator_pip # To run all Estimator tests bazel test //tensorflow_estimator/... ``` ## Contribution guidelines If you want to contribute to TensorFlow Estimator, be sure to review the [contribution guidelines](CONTRIBUTING.md). **Note that this repository is included as a component of the main TensorFlow package, and any issues encountered while using Estimators should be filed under [TensorFlow GitHub Issues](https://github.com/tensorflow/tensorflow/issues), as we do not separately track issues in this repository. You can link this repository in any issues created as necessary.** Please see [TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions and discussion and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). ## License [Apache License 2.0](LICENSE) ================================================ FILE: WORKSPACE ================================================ workspace(name = "org_tensorflow_estimator") # Use a custom python toolchain to make sure we always use the python binary # provided by PYTHON_BIN_PATH. # This is required due to https://github.com/bazelbuild/bazel/issues/7899, # because --python_path will not work since Bazel 0.27 load("//third_party/py:python_configure.bzl", "python_configure") python_configure(name = "local_config_py_toolchain") register_toolchains("@local_config_py_toolchain//:py_toolchain") ================================================ FILE: tensorflow_estimator/BUILD ================================================ # Placeholder: load py_library # Description: Tensorflow Estimator. load( "//tensorflow_estimator/python/estimator/api:api_gen.bzl", "ESTIMATOR_API_INIT_FILES_V1", "ESTIMATOR_API_INIT_FILES_V2", "generate_apis", ) licenses(["notice"]) package(default_visibility = ["//tensorflow_estimator:internal"]) exports_files(["LICENSE"]) # TODO(mikecase): Clean up. Remove all non estimator packages. package_group( name = "internal", packages = [ "//learning/brain/...", "//learning/deepmind/research/...", "//learning/tfx/models/uplift/estimators/...", "//nlp/nlx/ads/expmatch/model/...", "//nlp/nlx/common/query_bert/...", "//nlp/nlx/i18n/pangloss/...", "//tensorflow_estimator/...", "//third_party/py/tensorflow_privacy/...", "//third_party/tensorflow/python/estimator/...", ], ) # This flag specifies whether Estimator 2.0 API should be built instead # of 1.* API. Note that Estimator 2.0 API is currently under development. config_setting( name = "api_version_2", define_values = {"estimator_api_version": "2"}, ) config_setting( name = "no_estimator_py_deps", define_values = {"no_estimator_py_deps": "true"}, visibility = ["//visibility:public"], ) py_library( name = "tensorflow_estimator", srcs = [ ":root_init_gen", ":estimator_python_api_gen_compat_v1", ":estimator_python_api_gen_compat_v2", # Old API files. Delete once TensorFlow is updated to import from new location. "//tensorflow_estimator/python/estimator/api:estimator_python_api_gen", "//tensorflow_estimator/python/estimator/api:estimator_python_api_gen_compat_v1", "//tensorflow_estimator/python/estimator/api:estimator_python_api_gen_compat_v2", ], srcs_version = "PY3", visibility = [ "//tensorflow_estimator:internal", "//third_party/tensorflow/tools/docs/google:__subpackages__", ], deps = [ "//tensorflow_estimator/python/estimator:estimator_py", ], ) genrule( name = "root_init_gen", srcs = select({ "api_version_2": ["_api/v2/v2.py"], "//conditions:default": ["_api/v1/v1.py"], }), outs = ["__init__.py"], cmd = select({ "api_version_2": "cp $(location :_api/v2/v2.py) $(OUTS)", "//conditions:default": "cp $(location :_api/v1/v1.py) $(OUTS)", }), ) generate_apis( name = "estimator_python_api_gen_compat_v1", api_version = 1, output_dir = "_api/v1/", output_files = ESTIMATOR_API_INIT_FILES_V1, output_package = "tensorflow_estimator._api.v1", root_file_name = "v1.py", ) generate_apis( name = "estimator_python_api_gen_compat_v2", api_version = 2, output_dir = "_api/v2/", output_files = ESTIMATOR_API_INIT_FILES_V2, output_package = "tensorflow_estimator._api.v2", root_file_name = "v2.py", ) ================================================ FILE: tensorflow_estimator/estimator.bzl ================================================ """Estimator common skylark macros.""" # Macro to run Estimator py_tests against pip installation. def py_test(deps = [], **kwargs): native.py_test( deps = select({ "//conditions:default": deps, "//tensorflow_estimator:no_estimator_py_deps": [], }), **kwargs ) def tpu_py_test(**kwargs): # Skip the tpu test for Estimator oss. pass # We are never indexing generated code in the OSS build, but still # return a select() for consistency. def if_indexing_source_code( if_true, # @unused if_false): """Return a select() on whether or not we are building for source code indexing.""" return select({ "//conditions:default": if_false, }) ================================================ FILE: tensorflow_estimator/python/estimator/BUILD ================================================ # Placeholder: load py_library load("//tensorflow_estimator:estimator.bzl", "py_test") package(default_visibility = ["//tensorflow_estimator:internal"]) licenses(["notice"]) py_test( name = "tf_estimator_doctest", srcs = ["tf_estimator_doctest.py"], python_version = "PY3", tags = [ "no_oss_py2", "noasan", "nomsan", "notsan", ], deps = [ ":estimator_py", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", ], ) py_library( name = "estimator_py", srcs = [ "estimator_lib.py", ], srcs_version = "PY3", visibility = ["//visibility:public"], deps = [ ":base_head", ":baseline", ":basic_session_run_hooks", ":binary_class_head", ":checkpoint_converter", ":dnn", ":dnn_linear_combined", ":early_stopping", ":estimator", ":export", ":exporter", ":extenders", ":fake_summary_writer", ":function", ":hooks", ":inputs", ":keras", ":kmeans", ":linear", ":mode_keys", ":model_fn", ":multi_class_head", ":multi_head", ":multi_label_head", ":parsing_utils", ":regression_head", ":rnn", ":run_config", ":saved_model_estimator", ":sequential_head", ":session_run_hook", ":training", "//tensorflow_estimator/python/estimator:expect_tensorboard_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", "//tensorflow_estimator/python/estimator/canned/timeseries:estimators", "//tensorflow_estimator/python/estimator/tpu:tpu_estimator", ], ) py_library( name = "exporter", srcs = ["exporter.py"], srcs_version = "PY3", deps = [ ":estimator_export", ":gc", ":metric_keys", ":util", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "exporter_test", size = "medium", srcs = ["exporter_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":estimator", ":exporter", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "extenders", srcs = ["extenders.py"], srcs_version = "PY3", deps = [ ":estimator", ":estimator_export", ":mode_keys", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "extenders_test", size = "medium", srcs = ["extenders_test.py"], python_version = "PY3", srcs_version = "PY3", tags = ["notsan"], # b/62863147 deps = [ ":extenders", ":linear", ":run_config", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "gc", srcs = ["gc.py"], srcs_version = "PY3", deps = [ "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "gc_test", size = "small", srcs = ["gc_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":gc", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "hooks", srcs = ["hooks/hooks.py"], srcs_version = "PY3", deps = [ ":estimator_export", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "hooks_test", srcs = ["hooks/hooks_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":estimator_py", ":hooks", "//tensorflow_estimator/python/estimator", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "model_fn", srcs = ["model_fn.py"], srcs_version = "PY3", visibility = [ "//tensorflow_estimator:internal", "//third_party/tensorflow/python/tpu:__pkg__", ], deps = [ ":estimator_export", ":mode_keys", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "model_fn_test", size = "small", srcs = ["model_fn_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":export_output", ":model_fn", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "mode_keys", srcs = ["mode_keys.py"], srcs_version = "PY3", deps = [ ":estimator_export", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "training", srcs = ["training.py"], srcs_version = "PY3", deps = [ ":estimator", ":estimator_export", ":exporter", ":run_config", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "training_test", size = "medium", srcs = ["training_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "notap", # TODO(b/170896944): flaky, broken "notsan", ], deps = [ ":dnn", ":estimator", ":exporter", ":inputs", ":run_config", ":training", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "run_config", srcs = ["run_config.py"], srcs_version = "PY3", deps = [ ":estimator_export", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "run_config_test", size = "small", srcs = ["run_config_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":run_config", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "baseline", srcs = ["canned/baseline.py"], srcs_version = "PY3", deps = [ ":estimator", ":estimator_export", ":head", ":head_utils", ":model_fn", ":optimizers", ":regression_head", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "baseline_test", size = "medium", srcs = ["canned/baseline_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "no_pip", "noasan", # test flakily times out in asan mode. "notsan", # b/67510291 "optonly", # flakily times out in fastbuild ], deps = [ ":baseline", ":estimator", ":export_export", ":metric_keys", ":numpy_io", ":pandas_io", ":run_config", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "baseline_test_v1", size = "medium", srcs = ["canned/v1/baseline_test_v1.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "no_pip", "noasan", # test flakily times out in asan mode. "notsan", # b/67510291 "optonly", # flakily times out in fastbuild ], deps = [ ":baseline", ":estimator", ":export_export", ":metric_keys", ":numpy_io", ":pandas_io", ":run_config", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "baseline_estimator_test", size = "medium", srcs = ["canned/baseline_estimator_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "no_pip", "noasan", # test flakily times out in asan mode. "notsan", # b/67510291 "optonly", # flakily times out in fastbuild ], deps = [ ":baseline", ":estimator", ":export_export", ":metric_keys", ":numpy_io", ":regression_head", ":run_config", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "baseline_estimator_test_v1", size = "medium", srcs = ["canned/v1/baseline_estimator_test_v1.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "no_pip", "noasan", # test flakily times out in asan mode. "notsan", # b/67510291 "optonly", # flakily times out in fastbuild ], deps = [ ":baseline", ":estimator", ":export_export", ":metric_keys", ":numpy_io", ":run_config", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "kmeans", srcs = ["canned/kmeans.py"], srcs_version = "PY3", deps = [ ":estimator", ":estimator_export", ":head", ":model_fn", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "kmeans_test", size = "medium", srcs = ["canned/kmeans_test.py"], python_version = "PY3", shard_count = 8, srcs_version = "PY3", tags = [ "notap", # TODO(b/170974352): Flaky timeout ], deps = [ ":inputs", ":kmeans", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "dnn", srcs = ["canned/dnn.py"], srcs_version = "PY3", deps = [ ":estimator", ":estimator_export", ":head", ":head_utils", ":mode_keys", ":optimizers", ":regression_head", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "dnn_testing_utils", srcs = ["canned/dnn_testing_utils.py"], srcs_version = "PY3", visibility = ["//visibility:public"], deps = [ ":estimator", ":head", ":metric_keys", ":mode_keys", ":model_fn", ":numpy_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "dnn_testing_utils_v1", srcs = ["canned/v1/dnn_testing_utils_v1.py"], srcs_version = "PY3", deps = [ ":estimator", ":head", ":metric_keys", ":mode_keys", ":model_fn", ":numpy_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_test_fc_v1_v1", size = "medium", srcs = ["canned/v1/dnn_test_fc_v1_v1.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "no_pip", "notsan", # b/67510291 ], deps = [ ":dnn", ":dnn_testing_utils_v1", ":export_export", ":numpy_io", ":pandas_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_test_fc_v2", size = "medium", srcs = ["canned/dnn_test_fc_v2.py"], python_version = "PY3", shard_count = 8, srcs_version = "PY3", tags = [ "no_pip", "notsan", # b/67510291 ], deps = [ ":dnn", ":dnn_testing_utils", ":export_export", ":numpy_io", ":pandas_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_test_fc_v2_v1", size = "medium", srcs = ["canned/v1/dnn_test_fc_v2_v1.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "no_pip", "notsan", # b/67510291 ], deps = [ ":dnn", ":dnn_testing_utils_v1", ":export_export", ":numpy_io", ":pandas_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_estimator_test", size = "medium", srcs = ["canned/dnn_estimator_test.py"], python_version = "PY3", srcs_version = "PY3", tags = [ "no_pip", "notsan", "optonly", # times out http://b/79220679 ], deps = [ ":dnn", ":dnn_testing_utils", ":export_export", ":multi_class_head", ":numpy_io", ":prediction_keys", ":regression_head", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_estimator_test_v1", size = "medium", srcs = ["canned/v1/dnn_estimator_test_v1.py"], python_version = "PY3", srcs_version = "PY3", tags = [ "no_pip", "notsan", "optonly", # times out http://b/79220679 ], deps = [ ":dnn", ":dnn_testing_utils_v1", ":export_export", ":head", ":numpy_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "dnn_linear_combined", srcs = ["canned/dnn_linear_combined.py"], srcs_version = "PY3", deps = [ ":dnn", ":estimator", ":estimator_export", ":head", ":head_utils", ":linear", ":model_fn", ":optimizers", ":regression_head", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_linear_combined_estimator_test", size = "medium", srcs = ["canned/dnn_linear_combined_estimator_test.py"], python_version = "PY3", shard_count = 3, srcs_version = "PY3", tags = [ "no_pip", "notsan", ], deps = [ ":dnn_linear_combined", ":dnn_testing_utils", ":export_export", ":linear_testing_utils", ":numpy_io", ":prediction_keys", ":regression_head", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_linear_combined_estimator_test_v1", size = "medium", srcs = ["canned/v1/dnn_linear_combined_estimator_test_v1.py"], python_version = "PY3", shard_count = 3, srcs_version = "PY3", tags = [ "no_pip", "notsan", ], deps = [ ":dnn_linear_combined", ":dnn_testing_utils_v1", ":export_export", ":head", ":linear_testing_utils_v1", ":numpy_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_linear_combined_test", size = "medium", srcs = ["canned/dnn_linear_combined_test.py"], python_version = "PY3", shard_count = 32, srcs_version = "PY3", tags = [ "no_oss", # TODO(b/143323557) "no_pip", "notsan", # TODO(b/67510291) ], deps = [ ":dnn_linear_combined", ":dnn_testing_utils", ":export_export", ":linear_testing_utils", ":numpy_io", ":pandas_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "dnn_linear_combined_test_v1", size = "medium", srcs = ["canned/v1/dnn_linear_combined_test_v1.py"], python_version = "PY3", shard_count = 16, srcs_version = "PY3", tags = [ "no_pip", "notsan", # TODO(b/67510291) ], deps = [ ":dnn_linear_combined", ":dnn_testing_utils_v1", ":export_export", ":linear_testing_utils_v1", ":numpy_io", ":pandas_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "checkpoint_converter", srcs = ["tools/checkpoint_converter.py"], srcs_version = "PY3", deps = [ "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "analytics_tools", srcs = ["tools/analytics.py"], srcs_version = "PY3", deps = ["//tensorflow_estimator/python/estimator:expect_tensorflow_installed"], ) py_test( name = "checkpoint_converter_test", srcs = ["tools/checkpoint_converter_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", deps = [ ":checkpoint_converter", ":dnn", ":dnn_linear_combined", ":head", ":linear", ":numpy_io", ":regression_head", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "util", srcs = [ "util.py", ], srcs_version = "PY3", deps = [ "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "util_test", srcs = ["util_test.py"], python_version = "PY3", srcs_version = "PY3", tags = ["notsan"], # b/67510291 deps = [ ":util", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "early_stopping", srcs = [ "early_stopping.py", ], srcs_version = "PY3", deps = [ ":estimator_export", ":export_export", ":model_fn", ":run_config", ":util", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "early_stopping_test", srcs = [ "early_stopping_test.py", ], python_version = "PY3", srcs_version = "PY3", tags = [ "notap", # TODO(b/134928532): Reenable this test. ], deps = [ ":early_stopping", "//tensorflow_estimator/python/estimator", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", ], ) py_library( name = "estimator", srcs = [ "estimator.py", ], srcs_version = "PY3", visibility = [ "//tensorflow_estimator:internal", "//third_party/tensorflow/python/tpu:__pkg__", ], deps = [ ":estimator_export", ":export", ":mode_keys", ":model_fn", ":run_config", ":util", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "estimator_test", srcs = ["estimator_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = ["notsan"], # b/67510291 deps = [ ":estimator", ":estimator_py", ":export", ":mode_keys", ":model_fn", ":numpy_io", ":run_config", # Placeholder for an internal build dep disabling tf2 behavior "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "parsing_utils", srcs = [ "canned/parsing_utils.py", ], srcs_version = "PY3", deps = [ ":estimator_export", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "parsing_utils_test", srcs = ["canned/parsing_utils_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":parsing_utils", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "export_output", srcs = ["export/export_output.py"], srcs_version = "PY3", deps = [ ":estimator_export", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "export", srcs = [ "export/export_lib.py", ], srcs_version = "PY3", deps = [ ":export_export", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "export_export", srcs = [ "export/export.py", ], srcs_version = "PY3", deps = [ ":estimator_export", ":util", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "export_test", size = "small", srcs = ["export/export_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":export_export", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "function", srcs = [ "export/function.py", ], srcs_version = "PY3", deps = [ ":mode_keys", ":model_fn", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "function_test", size = "small", srcs = ["export/function_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":export", ":function", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "head", srcs = ["canned/head.py"], srcs_version = "PY3", deps = [ ":export_output", ":metric_keys", ":model_fn", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "head_test", size = "medium", srcs = ["canned/head_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "manual", "no_pip", "notap", # b/148804861 ], deps = [ ":dnn_testing_utils_v1", ":head", ":metric_keys", ":mode_keys", ":model_fn", ":numpy_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "head_utils", srcs = ["head/head_utils.py"], srcs_version = "PY3", deps = [ ":binary_class_head", ":multi_class_head", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "base_head", srcs = ["head/base_head.py"], srcs_version = "PY3", deps = [ ":estimator_export", ":export_output", ":head", ":metric_keys", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "base_head_test", size = "small", srcs = ["head/base_head_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":base_head_test_lib", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", ], ) py_library( name = "base_head_test_lib", testonly = True, srcs = ["head/base_head_test.py"], srcs_version = "PY3", deps = [ ":base_head", ":binary_class_head", ":head_utils", ":mode_keys", ":model_fn", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "binary_class_head", srcs = ["head/binary_class_head.py"], srcs_version = "PY3", deps = [ ":base_head", ":estimator_export", ":export_output", ":metric_keys", ":mode_keys", ":model_fn", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "binary_class_head_test", size = "medium", srcs = ["head/binary_class_head_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "manual", "no_pip", "notap", # b/148804861 ], deps = [ ":binary_class_head", ":dnn", ":dnn_testing_utils", ":head_utils", ":metric_keys", ":mode_keys", ":model_fn", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "multi_head", srcs = ["head/multi_head.py"], srcs_version = "PY3", deps = [ ":base_head", ":estimator_export", ":export_output", ":metric_keys", ":mode_keys", ":model_fn", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "multi_head_test", size = "medium", srcs = ["head/multi_head_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", deps = [ ":head_utils", ":metric_keys", ":mode_keys", ":multi_head", ":multi_label_head", ":prediction_keys", ":regression_head", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "multi_class_head", srcs = ["head/multi_class_head.py"], srcs_version = "PY3", deps = [ ":base_head", ":estimator_export", ":export_output", ":metric_keys", ":mode_keys", ":model_fn", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "multi_class_head_test", size = "medium", srcs = ["head/multi_class_head_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "no_oss", # TODO(b/202525254): broken on TF 2.7 ], deps = [ ":dnn", ":head_utils", ":metric_keys", ":mode_keys", ":model_fn", ":multi_class_head", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "multi_label_head", srcs = ["head/multi_label_head.py"], srcs_version = "PY3", deps = [ ":base_head", ":estimator_export", ":export_output", ":metric_keys", ":mode_keys", ":model_fn", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "multi_label_head_test", size = "medium", srcs = ["head/multi_label_head_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", deps = [ ":dnn", ":head_utils", ":metric_keys", ":mode_keys", ":model_fn", ":multi_label_head", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "regression_head", srcs = ["head/regression_head.py"], srcs_version = "PY3", deps = [ ":base_head", ":estimator_export", ":export_output", ":metric_keys", ":mode_keys", ":model_fn", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "regression_head_test", size = "medium", srcs = ["head/regression_head_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "manual", "notap", # b/148804861 ], deps = [ ":head_utils", ":metric_keys", ":mode_keys", ":model_fn", ":numpy_io", ":prediction_keys", ":regression_head", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "sequential_head", srcs = ["head/sequential_head.py"], srcs_version = "PY3", deps = [ ":base_head", ":mode_keys", ":multi_head", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "sequential_head_test", size = "medium", srcs = ["head/sequential_head_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":binary_class_head", ":head_utils", ":metric_keys", ":mode_keys", ":model_fn", ":multi_class_head", ":multi_head", ":prediction_keys", ":sequential_head", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "inputs", srcs = ["inputs/inputs.py"], srcs_version = "PY3", deps = [ ":numpy_io", ":pandas_io", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "linear", srcs = ["canned/linear.py"], srcs_version = "PY3", deps = [ ":binary_class_head", ":estimator", ":estimator_export", ":head", ":head_utils", ":optimizers", ":regression_head", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", "//tensorflow_estimator/python/estimator/canned/linear_optimizer:sdca_ops_py", ], ) py_library( name = "linear_testing_utils", srcs = ["canned/linear_testing_utils.py"], srcs_version = "PY3", visibility = ["//visibility:public"], deps = [ ":estimator", ":export_export", ":linear", ":metric_keys", ":numpy_io", ":pandas_io", ":run_config", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "linear_testing_utils_v1", srcs = ["canned/v1/linear_testing_utils_v1.py"], srcs_version = "PY3", deps = [ ":estimator", ":export_export", ":linear", ":metric_keys", ":numpy_io", ":pandas_io", ":run_config", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "linear_estimator_test", size = "medium", srcs = ["canned/linear_estimator_test.py"], python_version = "PY3", srcs_version = "PY3", tags = [ "no_pip", "notsan", ], deps = [ ":export_export", ":linear", ":linear_testing_utils", ":numpy_io", ":prediction_keys", ":regression_head", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "linear_estimator_test_v1", size = "medium", srcs = ["canned/v1/linear_estimator_test_v1.py"], python_version = "PY3", srcs_version = "PY3", tags = [ "no_pip", "notsan", ], deps = [ ":export_export", ":head", ":linear", ":linear_testing_utils_v1", ":numpy_io", ":prediction_keys", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "linear_test", size = "medium", srcs = ["canned/linear_test.py"], python_version = "PY3", shard_count = 8, srcs_version = "PY3", tags = [ "no_pip", "notsan", # b/67510291 ], deps = [ ":linear", ":linear_testing_utils", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) filegroup( name = "vocabulary_testdata", srcs = [ "canned/testdata/wire_vocabulary.txt", ], ) py_test( name = "linear_model_test", size = "medium", srcs = ["canned/linear_model_test.py"], data = [":vocabulary_testdata"], python_version = "PY3", shard_count = 8, srcs_version = "PY3", tags = [ "no_cuda_on_cpu_tap", "no_pip", "no_rocm", "no_windows", "notsan", # b/67510291 ], deps = [ ":linear", ":linear_testing_utils", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "linear_test_v1", size = "medium", srcs = ["canned/v1/linear_test_v1.py"], python_version = "PY3", shard_count = 8, srcs_version = "PY3", tags = [ "no_pip", "notsan", # b/67510291 ], deps = [ ":linear", ":linear_testing_utils_v1", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "metric_keys", srcs = ["canned/metric_keys.py"], srcs_version = "PY3", deps = [ ":model_fn", ], ) py_library( name = "numpy_io", srcs = ["inputs/numpy_io.py"], srcs_version = "PY3", deps = [ ":estimator_export", ":inputs_queues", ], ) py_test( name = "numpy_io_test", size = "small", srcs = ["inputs/numpy_io_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":numpy_io", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "optimizers", srcs = ["canned/optimizers.py"], srcs_version = "PY3", deps = [ ":util", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "optimizers_test", size = "small", srcs = ["canned/optimizers_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":optimizers", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "optimizers_test_v2", size = "small", srcs = ["canned/optimizers_test_v2.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":optimizers", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "object_checkpointing_test", size = "medium", srcs = ["object_checkpointing_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":estimator", ":model_fn", ":optimizers", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "pandas_io", srcs = ["inputs/pandas_io.py"], srcs_version = "PY3", deps = [ ":estimator_export", ":inputs_queues", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", ], ) py_test( name = "pandas_io_test", size = "small", srcs = ["inputs/pandas_io_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":pandas_io", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "prediction_keys", srcs = ["canned/prediction_keys.py"], srcs_version = "PY3", visibility = ["//visibility:public"], deps = [], ) py_library( name = "inputs_queues", srcs = [ "inputs/queues/__init__.py", "inputs/queues/feeding_functions.py", "inputs/queues/feeding_queue_runner.py", ], srcs_version = "PY3", deps = [ "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "feeding_functions_test", size = "small", srcs = [ "inputs/queues/feeding_functions_test.py", ], python_version = "PY3", srcs_version = "PY3", deps = [ ":inputs_queues", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "feeding_queue_runner_test", size = "small", srcs = ["inputs/queues/feeding_queue_runner_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":inputs_queues", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_pandas_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "keras", srcs = ["keras_lib.py"], srcs_version = "PY3", deps = [ ":estimator", ":export", ":mode_keys", ":model_fn", ":run_config", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "keras_test", size = "medium", srcs = ["keras_test.py"], python_version = "PY3", shard_count = 8, srcs_version = "PY3", tags = [ "no_windows", "notsan", # b/67510291 ], deps = [ ":export", ":keras", ":mode_keys", ":numpy_io", ":run_config", "//tensorflow_estimator/python/estimator:expect_h5py_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "keras_premade_model_test", size = "medium", srcs = ["keras_premade_model_test.py"], python_version = "PY3", shard_count = 4, deps = [ ":export", ":keras", ":mode_keys", ":numpy_io", ":run_config", "//tensorflow_estimator/python/estimator:expect_h5py_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "keras_distribute_strategy_test", srcs = ["keras_distribute_strategy_test.py"], python_version = "PY3", srcs_version = "PY3", tags = ["notsan"], deps = [ ":keras", ":run_config", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "saved_model_estimator", srcs = ["canned/saved_model_estimator.py"], srcs_version = "PY3", deps = [ ":estimator", ":estimator_export", ":export", ":mode_keys", ":model_fn", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "saved_model_estimator_test", size = "medium", srcs = ["canned/saved_model_estimator_test.py"], python_version = "PY3", srcs_version = "PY3", tags = [ "notsan", ], deps = [ ":estimator", ":export", ":mode_keys", ":model_fn", ":saved_model_estimator", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "basic_session_run_hooks", srcs = ["hooks/basic_session_run_hooks.py"], srcs_version = "PY3", deps = [ ":estimator_export", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "basic_session_run_hooks_test", size = "medium", srcs = ["hooks/basic_session_run_hooks_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":estimator_py", ":fake_summary_writer", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", ], ) py_library( name = "session_run_hook", srcs = ["hooks/session_run_hook.py"], srcs_version = "PY3", deps = [ ":estimator_export", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "fake_summary_writer", srcs = [ "hooks/fake_summary_writer.py", ], srcs_version = "PY3", deps = [ "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "rnn", srcs = ["canned/rnn.py"], srcs_version = "PY3", deps = [ ":binary_class_head", ":estimator", ":estimator_export", ":multi_class_head", ":optimizers", ":sequential_head", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "rnn_test", size = "medium", srcs = ["canned/rnn_test.py"], python_version = "PY3", srcs_version = "PY3", tags = [ "no_oss", # b/140934549 "no_pip", "noasan", # times out "notsan", "optonly", # times out http://b/79220679 ], deps = [ ":export", ":head", ":metric_keys", ":multi_class_head", ":numpy_io", ":parsing_utils", ":prediction_keys", ":rnn", ":sequential_head", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "estimator_export", srcs = ["estimator_export.py"], srcs_version = "PY3", visibility = ["//tensorflow_estimator:internal"], deps = [ ":expect_tensorflow_installed", ":util", ], ) py_test( name = "estimator_export_test", srcs = ["estimator_export_test.py"], srcs_version = "PY3", deps = [ ":estimator_export", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "expect_absl_installed", srcs_version = "PY3", # This is a dummy rule used as a absl dependency in open-source. # We expect absl to already be installed on the system, e.g. via # `pip install absl` visibility = ["//visibility:public"], ) py_library( name = "expect_numpy_installed", srcs_version = "PY3", # This is a dummy rule used as a numpy dependency in open-source. # We expect numpy to already be installed on the system, e.g. via # `pip install numpy` visibility = ["//visibility:public"], ) py_library( name = "expect_pandas_installed", # This is a dummy rule used as a pandas dependency in open-source. # We expect pandas to already be installed on the system, e.g. via # `pip install pandas` visibility = ["//visibility:public"], ) py_library( name = "expect_h5py_installed", srcs_version = "PY3", # This is a dummy rule used as a numpy dependency in open-source. # We expect h5py to already be installed on the system, e.g. via # `pip install h5py' visibility = ["//visibility:public"], ) py_library( name = "expect_six_installed", srcs_version = "PY3", # This is a dummy rule used as a numpy dependency in open-source. # We expect six to already be installed on the system, e.g. via # `pip install six` visibility = ["//visibility:public"], ) py_library( name = "expect_tensorboard_installed", srcs_version = "PY3", # This is a dummy rule used as a tensorboard dependency in open-source. # We expect tensorboard to already be installed on the system, e.g. via # `pip install tensorboard`. visibility = ["//visibility:public"], ) py_library( name = "expect_tensorflow_installed", srcs_version = "PY3", # This is a dummy rule used as a numpy dependency in open-source. # We expect tensorflow to already be installed on the system, e.g. via # `pip install tensorflow` or `pip install tensorflow_gpu` visibility = ["//visibility:public"], ) py_library( name = "expect_tensorflow_keras_installed", srcs_version = "PY3", # This is a dummy rule used as a numpy dependency in open-source. # We expect tensorflow to already be installed on the system, e.g. via # `pip install tensorflow` or `pip install tensorflow_gpu` visibility = ["//visibility:public"], ) py_library( name = "expect_proto_cpp_installed", srcs_version = "PY3", # This is a dummy rule used as a numpy dependency in open-source. # We expect protobuf cpp python to already be installed on the system. visibility = ["//visibility:public"], ) # The following targets are emulating cuda_py_test from //third_party/tensorflow:tensorflow.google.bzl # cuda_py_test cannot be used directly because the bzl file cannot be imported into tensorflow_estimator py_test( name = "distribute_strategy_estimator_integration_test", size = "medium", srcs = ["distribute_strategy_estimator_integration_test.py"], main = "distribute_strategy_estimator_integration_test.py", python_version = "PY3", srcs_version = "PY3", tags = [ "tf_integration_test", ], deps = [ ":estimator_py", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "distribute_strategy_estimator_integration_test_gpu", size = "medium", srcs = ["distribute_strategy_estimator_integration_test.py"], main = "distribute_strategy_estimator_integration_test.py", python_version = "PY3", srcs_version = "PY3", tags = [ "cuda", "gpu", "multi_and_single_gpu", "requires-gpu-nvidia", "tf_integration_test", ], deps = [ ":estimator_py", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "distribute_strategy_estimator_training_test", size = "medium", srcs = ["distribute_strategy_estimator_training_test.py"], main = "distribute_strategy_estimator_training_test.py", python_version = "PY3", shard_count = 48, srcs_version = "PY3", tags = [ "no_oss", # b/140933379 # TODO(b/118768923): Re-enable {a,m,t}san test. "noasan", "nomsan", "notsan", ], deps = [ ":estimator_py", # Placeholder for an internal build dep disabling tf2 behavior "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "distribute_strategy_estimator_training_test_gpu", size = "medium", srcs = ["distribute_strategy_estimator_training_test.py"], main = "distribute_strategy_estimator_training_test.py", python_version = "PY3", shard_count = 48, srcs_version = "PY3", tags = [ # TODO(b/118768923): Re-enable {a,m,t}san test. "noasan", "nomsan", "notsan", "cuda", "requires-gpu-nvidia", "gpu", "multi_and_single_gpu", ], deps = [ ":estimator_py", # Placeholder for an internal build dep disabling tf2 behavior "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "canned_estimator_ds_integration_test", size = "medium", srcs = ["canned/canned_estimator_ds_integration_test.py"], python_version = "PY3", srcs_version = "PY3", tags = [ "notap", # TODO(b/161835009): Re-enable. ], deps = [ ":estimator_py", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "canned_estimator_ds_integration_test_gpu", size = "medium", srcs = ["canned/canned_estimator_ds_integration_test.py"], main = "canned/canned_estimator_ds_integration_test.py", python_version = "PY3", srcs_version = "PY3", tags = [ "cuda", "gpu", "multi_and_single_gpu", "requires-gpu-nvidia", "tf_integration_test", ], deps = [ ":estimator_py", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) ================================================ FILE: tensorflow_estimator/python/estimator/api/BUILD ================================================ # Placeholder: load aliased py_binary load("//tensorflow_estimator/python/estimator/api:api_gen.bzl", "ESTIMATOR_API_INIT_FILES_V1", "ESTIMATOR_API_INIT_FILES_V2", "generate_apis") package(default_visibility = ["//tensorflow_estimator:internal"]) licenses(["notice"]) # This flag specifies whether Estimator 2.0 API should be built instead # of 1.* API. Note that Estimator 2.0 API is currently under development. config_setting( name = "api_version_2", define_values = {"estimator_api_version": "2"}, ) py_binary( name = "extractor_wrapper", srcs = ["extractor_wrapper.py"], visibility = ["//visibility:public"], deps = [ "//tensorflow_estimator/python/estimator:expect_absl_installed", # absl:app ], ) py_binary( name = "generator_wrapper", srcs = ["generator_wrapper.py"], visibility = ["//visibility:public"], deps = [ "//tensorflow_estimator/python/estimator:expect_absl_installed", # absl:app ], ) genrule( name = "estimator_python_api_gen", srcs = select({ "api_version_2": ["_v2/v2.py"], "//conditions:default": ["_v1/v1.py"], }), outs = ["__init__.py"], cmd = select({ "api_version_2": "cp $(location :_v2/v2.py) $(OUTS)", "//conditions:default": "cp $(location :_v1/v1.py) $(OUTS)", }), ) generate_apis( name = "estimator_python_api_gen_compat_v1", api_version = 1, output_dir = "_v1/", output_files = ESTIMATOR_API_INIT_FILES_V1, output_package = "tensorflow_estimator.python.estimator.api._v1", root_file_name = "v1.py", visibility = ["//visibility:public"], ) generate_apis( name = "estimator_python_api_gen_compat_v2", api_version = 2, output_dir = "_v2/", output_files = ESTIMATOR_API_INIT_FILES_V2, output_package = "tensorflow_estimator.python.estimator.api._v2", root_file_name = "v2.py", visibility = ["//visibility:public"], ) ================================================ FILE: tensorflow_estimator/python/estimator/api/api_gen.bzl ================================================ """Targets for generating TensorFlow Estimator Python API __init__.py files. This bzl file is copied with slight modifications from tensorflow/python/tools/api/generator2/generate_api.bzl so that we can avoid needing to depend on TF source code in Bazel build. It should be noted that because this file is executed during the build, and it imports TensorFlow code, that installing TensorFlow python package is required to Bazel build Estimator. """ # Placeholder: load PyInfo load("//tensorflow_estimator:estimator.bzl", "if_indexing_source_code") _TARGET_PATTERNS = [ "//tensorflow_estimator:", "//tensorflow_estimator/", ] _DECORATOR = "tensorflow_estimator.python.estimator.estimator_export.estimator_export" _MODULE_PREFIX = "" ESTIMATOR_API_INIT_FILES_V1 = [ "__init__.py", "estimator/__init__.py", "estimator/experimental/__init__.py", "estimator/export/__init__.py", "estimator/inputs/__init__.py", "estimator/tpu/__init__.py", "estimator/tpu/experimental/__init__.py", ] ESTIMATOR_API_INIT_FILES_V2 = [ "__init__.py", "estimator/__init__.py", "estimator/experimental/__init__.py", "estimator/export/__init__.py", "estimator/inputs/__init__.py", ] def _any_match(label): full_target = "//" + label.package + ":" + label.name for pattern in _TARGET_PATTERNS: if pattern in full_target: return True return False def _join(path, *others): result = path for p in others: if not result or result.endswith("/"): result += p else: result += "/" + p return result def _api_info_init(*, transitive_api): if type(transitive_api) != type(depset()): fail("ApiInfo.transitive_api must be a depset") return {"transitive_api": transitive_api} ApiInfo, _new_api_info = provider( doc = "Provider for API symbols and docstrings extracted from Python files.", fields = { "transitive_api": "depset of files with extracted API.", }, init = _api_info_init, ) def _py_files(f): if f.basename.endswith(".py") or f.basename.endswith(".py3"): return f.path return None def _merge_py_info( deps, direct_sources = None, direct_imports = None, has_py2_only_sources = False, has_py3_only_sources = False, uses_shared_libraries = False): transitive_sources = [] transitive_imports = [] for dep in deps: if PyInfo in dep: transitive_sources.append(dep[PyInfo].transitive_sources) transitive_imports.append(dep[PyInfo].imports) has_py2_only_sources = has_py2_only_sources or dep[PyInfo].has_py2_only_sources has_py3_only_sources = has_py3_only_sources or dep[PyInfo].has_py3_only_sources uses_shared_libraries = uses_shared_libraries or dep[PyInfo].uses_shared_libraries return PyInfo( transitive_sources = depset(direct = direct_sources, transitive = transitive_sources), imports = depset(direct = direct_imports, transitive = transitive_imports), has_py2_only_sources = has_py2_only_sources, has_py3_only_sources = has_py3_only_sources, uses_shared_libraries = uses_shared_libraries, ) def _merge_api_info( deps, direct_api = None): transitive_api = [] for dep in deps: if ApiInfo in dep: transitive_api.append(dep[ApiInfo].transitive_api) return ApiInfo(transitive_api = depset(direct = direct_api, transitive = transitive_api)) def _api_extractor_impl(target, ctx): direct_api = [] # Make sure the rule has a non-empty srcs attribute. if ( _any_match(target.label) and hasattr(ctx.rule.attr, "srcs") and ctx.rule.attr.srcs ): output = ctx.actions.declare_file("_".join([ target.label.name, "extracted_tensorflow_estimator_api.json", ])) args = ctx.actions.args() args.set_param_file_format("multiline") args.use_param_file("--flagfile=%s") args.add("--output", output) args.add("--decorator", _DECORATOR) args.add("--api_name", "tensorflow_estimator") args.add_all(ctx.rule.files.srcs, expand_directories = True, map_each = _py_files) ctx.actions.run( mnemonic = "ExtractAPI", executable = ctx.executable._extractor_bin, inputs = ctx.rule.files.srcs, outputs = [output], arguments = [args], progress_message = "Extracting tensorflow_estimator APIs for %{label} to %{output}.", ) direct_api.append(output) return [ _merge_api_info(ctx.rule.attr.deps if hasattr(ctx.rule.attr, "deps") else [], direct_api = direct_api), ] api_extractor = aspect( doc = "Extracts the exported API for the given target and its dependencies.", implementation = _api_extractor_impl, attr_aspects = ["deps"], provides = [ApiInfo], # Currently the Python rules do not correctly advertise their providers. # required_providers = [PyInfo], attrs = { "_extractor_bin": attr.label( default = Label("//tensorflow_estimator/python/estimator/api:extractor_wrapper"), executable = True, cfg = "exec", ), }, ) def _extract_api_impl(ctx): return [ _merge_api_info(ctx.attr.deps), _merge_py_info(ctx.attr.deps), ] extract_api = rule( doc = "Extract Python API for all targets in transitive dependencies.", implementation = _extract_api_impl, attrs = { "deps": attr.label_list( doc = "Targets to extract API from.", allow_empty = False, aspects = [api_extractor], providers = [PyInfo], mandatory = True, ), }, provides = [ApiInfo, PyInfo], ) def _generate_api_impl(ctx): args = ctx.actions.args() args.set_param_file_format("multiline") args.use_param_file("--flagfile=%s") args.add_joined("--output_files", ctx.outputs.output_files, join_with = ",") args.add("--output_dir", _join(ctx.bin_dir.path, ctx.label.package, ctx.attr.output_dir)) if ctx.file.root_init_template: args.add("--root_init_template", ctx.file.root_init_template) args.add("--apiversion", ctx.attr.api_version) args.add_joined("--compat_api_versions", ctx.attr.compat_api_versions, join_with = ",") args.add_joined("--compat_init_templates", ctx.files.compat_init_templates, join_with = ",") args.add("--output_package", ctx.attr.output_package) args.add_joined("--packages_to_ignore", ctx.attr.packages_to_ignore, join_with = ",") if _MODULE_PREFIX: args.add("--module_prefix", _MODULE_PREFIX) if ctx.attr.use_lazy_loading: args.add("--use_lazy_loading") else: args.add("--nouse_lazy_loading") if ctx.attr.proxy_module_root: args.add("--proxy_module_root", ctx.attr.proxy_module_root) args.add_joined("--file_prefixes_to_strip", [ctx.bin_dir.path, ctx.genfiles_dir.path], join_with = ",") if ctx.attr.root_file_name: args.add("--root_file_name", ctx.attr.root_file_name) inputs = depset(transitive = [ dep[ApiInfo].transitive_api for dep in ctx.attr.deps ]) args.add_all( inputs, expand_directories = True, ) transitive_inputs = [inputs] if ctx.attr.root_init_template: transitive_inputs.append(ctx.attr.root_init_template.files) ctx.actions.run( mnemonic = "GenerateAPI", executable = ctx.executable._generator_bin, inputs = depset( direct = ctx.files.compat_init_templates, transitive = transitive_inputs, ), outputs = ctx.outputs.output_files, arguments = [args], progress_message = "Generating APIs for %{label} to %{output}.", ) generate_api = rule( doc = "Generate Python API for all targets in transitive dependencies.", implementation = _generate_api_impl, attrs = { "deps": attr.label_list( doc = "extract_api targets to generate API from.", allow_empty = True, providers = [ApiInfo, PyInfo], mandatory = True, ), "root_init_template": attr.label( doc = "Template for the top level __init__.py file", allow_single_file = True, ), "api_version": attr.int( doc = "The API version to generate (1 or 2)", values = [1, 2], ), "compat_api_versions": attr.int_list( doc = "Additional versions to generate in compat/ subdirectory.", ), "compat_init_templates": attr.label_list( doc = "Template for top-level __init__files under compat modules. This list must be " + "in the same order as the list of versions in compat_apiversions", allow_files = True, ), "output_package": attr.string( doc = "Root output package.", ), "output_dir": attr.string( doc = "Subdirectory to output API to. If non-empty, must end with '/'.", ), "proxy_module_root": attr.string( doc = "Module root for proxy-import format. If specified, proxy files with " + "`from proxy_module_root.proxy_module import *` will be created to enable " + "import resolution under TensorFlow.", ), "output_files": attr.output_list( doc = "List of __init__.py files that should be generated. This list should include " + "file name for every module exported using tf_export. For e.g. if an op is " + "decorated with @tf_export('module1.module2', 'module3'). Then, output_files " + "should include module1/module2/__init__.py and module3/__init__.py.", ), "use_lazy_loading": attr.bool( doc = "If true, lazy load imports in the generated API rather then imporing them all statically.", ), "packages_to_ignore": attr.string_list( doc = "List of packages to ignore tf_exports from.", ), "root_file_name": attr.string( doc = "The file name that should be generated for the top level API.", ), "_generator_bin": attr.label( default = Label("//tensorflow_estimator/python/estimator/api:generator_wrapper"), executable = True, cfg = "exec", ), }, ) def generate_apis( name, deps = [ "//tensorflow_estimator/python/estimator:estimator_py", # "//third_party/tensorflow/lite/python:analyzer", # "//third_party/tensorflow/lite/python:lite", # "//third_party/tensorflow/lite/python/authoring", ], output_files = ESTIMATOR_API_INIT_FILES_V2, root_init_template = None, api_version = 2, compat_api_versions = [], compat_init_templates = [], output_package = "tensorflow_estimator.python.estimator.api", output_dir = "", proxy_module_root = None, packages_to_ignore = [], root_file_name = "__init__.py", visibility = ["//visibility:private"]): """Generate TensorFlow APIs for a set of libraries. Args: name: name of generate_api target. deps: python_library targets to serve as roots for extracting APIs. output_files: The list of files that the API generator is exected to create. root_init_template: The template for the top level __init__.py file generated. "#API IMPORTS PLACEHOLDER" comment will be replaced with imports. api_version: THhe API version to generate. (1 or 2) compat_api_versions: Additional versions to generate in compat/ subdirectory. compat_init_templates: Template for top level __init__.py files under the compat modules. The list must be in the same order as the list of versions in 'compat_api_versions' output_package: Root output package. output_dir: Directory where the generated output files are placed. This should be a prefix of every directory in 'output_files' proxy_module_root: Module root for proxy-import format. If specified, proxy files with `from proxy_module_root.proxy_module import *` will be created to enable import resolution under TensorFlow. packages_to_ignore: List of packages to ignore tf_exports from. root_file_name: The file name that should be generated for the top level API. visibility: Visibility of the target containing the generated files. """ extract_name = name + ".extract-tensorflow-estimator" extract_api( name = extract_name, deps = deps, visibility = ["//visibility:private"], ) if proxy_module_root != None: # Avoid conflicts between the __init__.py file of TensorFlow and proxy module. output_files = [f for f in output_files if f != "__init__.py"] if root_file_name != None: output_files = [f if f != "__init__.py" else root_file_name for f in output_files] all_output_files = [_join(output_dir, f) for f in output_files] generate_api( name = name, deps = [":" + extract_name], output_files = all_output_files, output_dir = output_dir, root_init_template = root_init_template, compat_api_versions = compat_api_versions, compat_init_templates = compat_init_templates, api_version = api_version, proxy_module_root = proxy_module_root, visibility = visibility, packages_to_ignore = packages_to_ignore, use_lazy_loading = False, output_package = output_package, root_file_name = root_file_name, ) ================================================ FILE: tensorflow_estimator/python/estimator/api/extractor_wrapper.py ================================================ # Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Thin wrapper to call TensorFlow's API extractor script.""" from absl import app from tensorflow.python.tools.api.generator2.extractor import extractor if __name__ == "__main__": app.run(extractor.main) ================================================ FILE: tensorflow_estimator/python/estimator/api/generator_wrapper.py ================================================ # Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Thin wrapper to call TensorFlow's API generator script.""" from absl import app from tensorflow.python.tools.api.generator2.generator import generator if __name__ == "__main__": app.run(generator.main) ================================================ FILE: tensorflow_estimator/python/estimator/canned/__init__.py ================================================ ================================================ FILE: tensorflow_estimator/python/estimator/canned/baseline.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Baseline estimators. Baseline estimators are bias-only estimators that can be used for debugging and as simple baselines. Example: ``` # Build BaselineClassifier classifier = BaselineClassifier(n_classes=3) # Input builders def input_fn_train(): # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval(): # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass # Fit model. classifier.train(input_fn=input_fn_train) # Evaluate cross entropy between the test and train labels. loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] # predict outputs the probability distribution of the classes as seen in # training. predictions = classifier.predict(new_samples) ``` """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import tensorflow as tf from tensorflow.python.feature_column import feature_column as feature_column_v1 from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import optimizers from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.head import head_utils from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys # The default learning rate of 0.3 is a historical artifact of the initial # implementation, but seems a reasonable choice. _LEARNING_RATE = 0.3 def _get_weight_column_key(weight_column): if weight_column is None: return None if isinstance(weight_column, six.string_types): return weight_column if not isinstance(weight_column, feature_column_v1._NumericColumn): # pylint: disable=protected-access raise TypeError('Weight column must be either a string or _NumericColumn.' ' Given type: {}.'.format(type(weight_column))) return weight_column.key() def _get_weight_column_key_v2(weight_column): if weight_column is None: return None if isinstance(weight_column, six.string_types): return weight_column if not isinstance(weight_column, feature_column_v2.NumericColumn): raise TypeError('Weight column must be either a string or NumericColumn. ' 'Given type: {}.'.format(type(weight_column))) return weight_column.key() def _get_batch_size_and_size_checks(features, weight_column_key): """Returns batch_size and size_checks.""" size_checks = [] batch_size = None # The first dimension is assumed to be a batch size and must be consistent # among all of the features. for key, feature in features.items(): # Skip weight_column to ensure we don't add size checks to it. # These would introduce a dependency on the weight at serving time. if key == weight_column_key: continue first_dim = tf.compat.v1.shape(feature)[0] if batch_size is None: batch_size = first_dim else: size_checks.append( tf.compat.v1.debugging.assert_equal(batch_size, first_dim)) return size_checks, batch_size def _baseline_logit_fn_builder(num_outputs, weight_column=None): """Function builder for a baseline logit_fn. Args: num_outputs: Number of outputs for the model. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It will be multiplied by the loss of the example. Returns: A logit_fn (see below). """ def baseline_logit_fn(features): """Baseline model logit_fn. The baseline model simply learns a bias, so the output logits are a `Variable` with one weight for each output that learns the bias for the corresponding output. Args: features: The first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or dict with `Tensor` values. Returns: A `Tensor` representing the logits. """ weight_column_key = _get_weight_column_key(weight_column) size_checks, batch_size = _get_batch_size_and_size_checks( features, weight_column_key) with tf.control_dependencies(size_checks): with tf.compat.v1.variable_scope('baseline'): bias = tf.compat.v1.get_variable( 'bias', shape=[num_outputs], initializer=tf.compat.v1.initializers.zeros) return tf.math.multiply(bias, tf.ones([batch_size, num_outputs])) return baseline_logit_fn def _baseline_model_fn(features, labels, mode, head, optimizer, weight_column=None, config=None): """Model_fn for baseline models. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of labels that are compatible with the `Head` instance. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use `FtrlOptimizer` with a default learning rate of 0.3. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It will be multiplied by the loss of the example. config: `RunConfig` object to configure the runtime settings. Raises: KeyError: If weight column is specified but not present. ValueError: If features is an empty dictionary. Returns: An `EstimatorSpec` instance. """ del config # Unused. logit_fn = _baseline_logit_fn_builder(head.logits_dimension, weight_column) logits = logit_fn(features) def train_op_fn(loss): opt = optimizers.get_optimizer_instance( optimizer, learning_rate=_LEARNING_RATE) return opt.minimize(loss, global_step=tf.compat.v1.train.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, logits=logits, labels=labels, train_op_fn=train_op_fn) def _baseline_model_fn_builder_v2(features, num_outputs, weight_column=None): """Function builder for a baseline logit_fn. Args: features: The first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or dict with `Tensor` values. num_outputs: Number of outputs for the model. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It will be multiplied by the loss of the example. Returns: A list of trainable variables and a `Tensor` representing the logits. """ weight_column_key = _get_weight_column_key_v2(weight_column) size_checks, batch_size = _get_batch_size_and_size_checks( features, weight_column_key) with tf.control_dependencies(size_checks): with ops.name_scope('baseline'): bias = tf.Variable(initial_value=tf.zeros([num_outputs]), name='bias') logits = tf.math.multiply(bias, tf.ones([batch_size, num_outputs])) return [bias], logits def _baseline_model_fn_v2( features, labels, mode, head, optimizer, weight_column=None, config=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE): """Model_fn for baseline models. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of labels that are compatible with the `Head` instance. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use `FtrlOptimizer` with a default learning rate of 0.3. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It will be multiplied by the loss of the example. config: `RunConfig` object to configure the runtime settings. loss_reduction: One of `tf_keras.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Raises: KeyError: If weight column is specified but not present. ValueError: If features is an empty dictionary. Returns: An `EstimatorSpec` instance. """ del config # Unused. trainable_variables, logits = _baseline_model_fn_builder_v2( features, head.logits_dimension, weight_column) # In TRAIN mode, create optimizer and assign global_step variable to # optimizer.iterations to make global_step increased correctly, as Hooks # relies on global step as step counter. if mode == ModeKeys.TRAIN: opt = optimizers.get_optimizer_instance_v2( optimizer, learning_rate=_LEARNING_RATE) opt.iterations = tf.compat.v1.train.get_or_create_global_step() def train_op_fn(loss): # Scale loss by number of replicas. if loss_reduction == tf.losses.Reduction.SUM_OVER_BATCH_SIZE: num_replicas = tf.distribute.get_strategy().num_replicas_in_sync if num_replicas > 1: loss *= (1. / num_replicas) return opt.get_updates(loss, trainable_variables)[0] return head.create_estimator_spec( features=features, mode=mode, logits=logits, labels=labels, train_op_fn=train_op_fn) @estimator_export('estimator.BaselineClassifier', v1=[]) class BaselineClassifierV2(estimator.EstimatorV2): """A classifier that can establish a simple baseline. This classifier ignores feature values and will learn to predict the average value of each label. For single-label problems, this will predict the probability distribution of the classes as seen in the labels. For multi-label problems, this will predict the fraction of examples that are positive for each class. Example: ```python # Build BaselineClassifier classifier = tf.estimator.BaselineClassifier(n_classes=3) # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass # Fit model. classifier.train(input_fn=input_fn_train) # Evaluate cross entropy between the test and train labels. loss = classifier.evaluate(input_fn=input_fn_eval)["loss"] # predict outputs the probability distribution of the classes as seen in # training. predictions = classifier.predict(new_samples) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Ftrl', config=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE): """Initializes a BaselineClassifier instance. Args: model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: number of label classes. Default is binary classification. It must be greater than 1. Note: Class labels are integers representing the class index (i.e. values from 0 to n_classes-1). For arbitrary label values (e.g. string labels), convert to class indices first. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It will be multiplied by the loss of the example. label_vocabulary: Optional list of strings with size `[n_classes]` defining the label vocabulary. Only supported for `n_classes` > 2. optimizer: String, `tf_keras.optimizers.*` object, or callable that creates the optimizer to use for training. If not specified, will use `Ftrl` as the default optimizer. config: `RunConfig` object to configure the runtime settings. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: A `BaselineClassifier` estimator. Raises: ValueError: If `n_classes` < 2. """ head = head_utils.binary_or_multi_class_head( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) def _model_fn(features, labels, mode, config): return _baseline_model_fn_v2( features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, weight_column=weight_column, config=config, loss_reduction=loss_reduction) super(BaselineClassifierV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export(v1=['estimator.BaselineClassifier']) # pylint: disable=missing-docstring class BaselineClassifier(estimator.Estimator): __doc__ = BaselineClassifierV2.__doc__.replace('SUM_OVER_BATCH_SIZE', 'SUM') def __init__(self, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Ftrl', config=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM): head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): return _baseline_model_fn( features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, weight_column=weight_column, config=config) super(BaselineClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export('estimator.BaselineEstimator', v1=[]) class BaselineEstimatorV2(estimator.EstimatorV2): """An estimator that can establish a simple baseline. The estimator uses a user-specified head. This estimator ignores feature values and will learn to predict the average value of each label. E.g. for single-label classification problems, this will predict the probability distribution of the classes as seen in the labels. For multi-label classification problems, it will predict the ratio of examples that contain each class. Example: ```python # Build baseline multi-label classifier. estimator = tf.estimator.BaselineEstimator( head=tf.estimator.MultiLabelHead(n_classes=3)) # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass # Fit model. estimator.train(input_fn=input_fn_train) # Evaluates cross entropy between the test and train labels. loss = estimator.evaluate(input_fn=input_fn_eval)["loss"] # For each class, predicts the ratio of training examples that contain the # class. predictions = estimator.predict(new_samples) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is specified in the `head` constructor (and not None) for the head passed to BaselineEstimator's constructor, a feature with `key=weight_column` whose value is a `Tensor`. """ def __init__(self, head, model_dir=None, optimizer='Ftrl', config=None): """Initializes a BaselineEstimator instance. Args: head: A `Head` instance constructed with a method such as `tf.estimator.MultiLabelHead`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. optimizer: String, `tf_keras.optimizers.*` object, or callable that creates the optimizer to use for training. If not specified, will use `Ftrl` as the default optimizer. config: `RunConfig` object to configure the runtime settings. """ def _model_fn(features, labels, mode, config): return _baseline_model_fn_v2( features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, config=config) super(BaselineEstimatorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export(v1=['estimator.BaselineEstimator']) # pylint: disable=missing-docstring class BaselineEstimator(estimator.Estimator): __doc__ = BaselineEstimatorV2.__doc__ def __init__(self, head, model_dir=None, optimizer='Ftrl', config=None): def _model_fn(features, labels, mode, config): return _baseline_model_fn( features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, config=config) super(BaselineEstimator, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export('estimator.BaselineRegressor', v1=[]) class BaselineRegressorV2(estimator.EstimatorV2): """A regressor that can establish a simple baseline. This regressor ignores feature values and will learn to predict the average value of each label. Example: ```python # Build BaselineRegressor regressor = tf.estimator.BaselineRegressor() # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass # Fit model. regressor.train(input_fn=input_fn_train) # Evaluate squared-loss between the test and train targets. loss = regressor.evaluate(input_fn=input_fn_eval)["loss"] # predict outputs the mean value seen during training. predictions = regressor.predict(new_samples) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, model_dir=None, label_dimension=1, weight_column=None, optimizer='Ftrl', config=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE): """Initializes a BaselineRegressor instance. Args: model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. label_dimension: Number of regression targets per example. This is the size of the last dimension of the labels and logits `Tensor` objects (typically, these have shape `[batch_size, label_dimension]`). weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It will be multiplied by the loss of the example. optimizer: String, `tf_keras.optimizers.*` object, or callable that creates the optimizer to use for training. If not specified, will use `Ftrl` as the default optimizer. config: `RunConfig` object to configure the runtime settings. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: A `BaselineRegressor` estimator. """ head = regression_head.RegressionHead( label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) def _model_fn(features, labels, mode, config): return _baseline_model_fn_v2( features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, config=config) super(BaselineRegressorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export(v1=['estimator.BaselineRegressor']) # pylint: disable=missing-docstring class BaselineRegressor(estimator.Estimator): __doc__ = BaselineRegressorV2.__doc__.replace('SUM_OVER_BATCH_SIZE', 'SUM') def __init__(self, model_dir=None, label_dimension=1, weight_column=None, optimizer='Ftrl', config=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM): head = head_lib._regression_head( # pylint: disable=protected-access label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) def _model_fn(features, labels, mode, config): return _baseline_model_fn( features=features, labels=labels, mode=mode, head=head, optimizer=optimizer, config=config) super(BaselineRegressor, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) ================================================ FILE: tensorflow_estimator/python/estimator/canned/baseline_estimator_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for BaselineEstimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import baseline from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.inputs import numpy_io # Names of variables created by model. BIAS_NAME = 'baseline/bias' def assert_close(expected, actual, rtol=1e-04, name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs(expected - actual, 'diff') / tf.math.abs(expected) rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=('Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), name=scope) def save_variables_to_ckpt(model_dir): init_all_op = [tf.compat.v1.initializers.global_variables()] with tf.compat.v1.Session() as sess: sess.run(init_all_op) tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) def _baseline_estimator_fn(weight_column=None, label_dimension=1, **kwargs): return baseline.BaselineEstimatorV2( head=regression_head.RegressionHead( weight_column=weight_column, label_dimension=label_dimension, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE), **kwargs) def mock_optimizer_v2(testcase, expected_loss=None): """Creates a mock optimizer to test the train method. Args: testcase: A TestCase instance. expected_loss: If given, will assert the loss value. Returns: A mock Optimizer. """ expected_var_names = ['%s:0' % BIAS_NAME] class _Optimizer(tf_keras.optimizers.legacy.Optimizer): def get_updates(self, loss, params): trainable_vars = params testcase.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. testcase.assertEquals(0, loss.shape.ndims) if expected_loss is None: if self.iterations is not None: return [self.iterations.assign_add(1).op] return [tf.no_op()] assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): if self.iterations is not None: return [self.iterations.assign_add(1).op] return [tf.no_op()] def get_config(self): config = super(_Optimizer, self).get_config() return config optimizer = _Optimizer(name='my_optimizer') return optimizer class BaselineEstimatorEvaluationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_evaluation_batch(self): """Tests evaluation for batch_size==2.""" with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir) eval_metrics = baseline_estimator.evaluate( input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1) # Logit is bias = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the sum over batch size = (9 + 9) / 2 = 9 # Average loss is the average over batch = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 9., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_weights(self): """Tests evaluation with weights.""" with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} labels = ((10.,), (10.,)) return features, labels baseline_estimator = _baseline_estimator_fn( weight_column='weights', model_dir=self._model_dir) eval_metrics = baseline_estimator.evaluate(input_fn=_input_fn, steps=1) # Logit is bias = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the weighted sum over batch size= (9 + 2*9) / 2 = 13.5 # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 13.5, metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_for_multi_dimensions(self): label_dim = 2 with tf.Graph().as_default(): tf.Variable([46.0, 58.0], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_estimator = _baseline_estimator_fn( label_dimension=label_dim, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={ 'age': np.array([[2., 4., 5.]]), }, y=np.array([[46., 58.]]), batch_size=1, num_epochs=None, shuffle=False) eval_metrics = baseline_estimator.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is bias which is [46, 58] self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) class BaselineEstimatorPredictTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_1d(self): """Tests predict when all variables are one-dimensional.""" with tf.Graph().as_default(): tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[2.]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = baseline_estimator.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x * weight + bias = 2. * 10. + .2 = 20.2 self.assertAllClose([[.2]], predicted_scores) def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 with tf.Graph().as_default(): tf.Variable( # shape=[label_dimension] [.2, .4, .6], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_estimator = _baseline_estimator_fn( label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = baseline_estimator.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = bias, shape=[batch_size, label_dimension] self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]], predicted_scores) class BaselineEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = _baseline_estimator_fn( label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) class BaselineEstimatorTrainingTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _assert_checkpoint(self, label_dimension, expected_global_step, expected_bias=None): shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([label_dimension], shapes[BIAS_NAME]) if expected_bias is not None: self.assertEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def testFromScratch(self): # Create BaselineRegressor. label = 5. age = 17 # loss = (logits - label)^2 = (0 - 5.)^2 = 25. mock_optimizer = mock_optimizer_v2(self, expected_loss=25.) baseline_estimator = _baseline_estimator_fn( model_dir=self._model_dir, optimizer=mock_optimizer) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_estimator.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self.assertEqual( num_steps, baseline_estimator.get_variable_value(mock_optimizer.iterations.name)) self._assert_checkpoint( label_dimension=1, expected_global_step=num_steps, expected_bias=[0.]) def testFromCheckpoint(self): # Create initial checkpoint. bias = 7.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = bias = 6. # loss = (logits - label)^2 = (7 - 5)^2 = 4 mock_optimizer = mock_optimizer_v2(self, expected_loss=4.) baseline_estimator = _baseline_estimator_fn( model_dir=self._model_dir, optimizer=mock_optimizer) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_estimator.train( input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps) self.assertEqual( initial_global_step + num_steps, baseline_estimator.get_variable_value(mock_optimizer.iterations.name)) self._assert_checkpoint( label_dimension=1, expected_global_step=initial_global_step + num_steps, expected_bias=[bias]) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/baseline_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for baseline.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import os import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import baseline from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False # pylint rules which are disabled by default for test files. # pylint: disable=invalid-name,protected-access,missing-docstring # Names of variables created by model. BIAS_NAME = 'baseline/bias' def assert_close(expected, actual, rtol=1e-04, name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs(expected - actual, 'diff') / tf.math.abs(expected) rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=('Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), name=scope) def save_variables_to_ckpt(model_dir): init_all_op = [tf.compat.v1.initializers.global_variables()] with tf.compat.v1.Session() as sess: sess.run(init_all_op) tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) def queue_parsed_features(feature_map): tensors_to_enqueue = [] keys = [] for key, tensor in six.iteritems(feature_map): keys.append(key) tensors_to_enqueue.append(tensor) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = tf.queue.FIFOQueue(capacity=100, dtypes=queue_dtypes) tf.compat.v1.train.queue_runner.add_queue_runner( tf.compat.v1.train.queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} def sorted_key_dict(unsorted_dict): return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} def sigmoid(x): return 1 / (1 + np.exp(-1.0 * x)) def _baseline_regressor_fn(*args, **kwargs): return baseline.BaselineRegressorV2(*args, **kwargs) def _baseline_classifier_fn(*args, **kwargs): return baseline.BaselineClassifierV2(*args, **kwargs) def mock_optimizer_v2(testcase, expected_loss=None): """Creates a mock optimizer to test the train method. Args: testcase: A TestCase instance. expected_loss: If given, will assert the loss value. Returns: A mock Optimizer. """ expected_var_names = ['%s:0' % BIAS_NAME] class _Optimizer(tf_keras.optimizers.legacy.Optimizer): def get_updates(self, loss, params): trainable_vars = params testcase.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. testcase.assertEquals(0, loss.shape.ndims) if expected_loss is None: if self.iterations is not None: return [self.iterations.assign_add(1).op] return [tf.no_op()] assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): if self.iterations is not None: return [self.iterations.assign_add(1).op] return [tf.no_op()] def get_config(self): config = super(_Optimizer, self).get_config() return config optimizer = _Optimizer(name='my_optimizer') return optimizer # Tests for Baseline Regressor. # TODO(b/36813849): Add tests with dynamic shape inputs using placeholders. class BaselineRegressorEvaluationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_evaluation_for_simple_data(self): with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) eval_metrics = baseline_regressor.evaluate( input_fn=lambda: ({ 'age': ((1,),) }, ((10.,),)), steps=1) # Logit is bias = 13, while label is 10. Loss is 3**2 = 9. self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 9., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_batch(self): """Tests evaluation for batch_size==2.""" with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) eval_metrics = baseline_regressor.evaluate( input_fn=lambda: ({ 'age': ((1,), (1,)) }, ((10.,), (10.,))), steps=1) # Logit is bias = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the sum over batch size = (9 + 9) / 2 = 9 # Average loss is the average over batch = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 9., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_weights(self): """Tests evaluation with weights.""" with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} labels = ((10.,), (10.,)) return features, labels baseline_regressor = _baseline_regressor_fn( weight_column='weights', model_dir=self._model_dir) eval_metrics = baseline_regressor.evaluate(input_fn=_input_fn, steps=1) # Logit is bias = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the weighted sum over batch size = (9 + 2*9) / 2 = 13.5 # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 13.5, metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_for_multi_dimensions(self): label_dim = 2 with tf.Graph().as_default(): tf.Variable([46.0, 58.0], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn( label_dimension=label_dim, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={ 'age': np.array([[2., 4., 5.]]), }, y=np.array([[46., 58.]]), batch_size=1, num_epochs=None, shuffle=False) eval_metrics = baseline_regressor.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is bias which is [46, 58] self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) class BaselineRegressorPredictTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_1d(self): """Tests predict when all variables are one-dimensional.""" with tf.Graph().as_default(): tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[2.]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = baseline_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x * weight + bias = 2. * 10. + .2 = 20.2 self.assertAllClose([[.2]], predicted_scores) def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 with tf.Graph().as_default(): tf.Variable( # shape=[label_dimension] [.2, .4, .6], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn( label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = baseline_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = bias, shape=[batch_size, label_dimension] self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]], predicted_scores) class BaselineRegressorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = _baseline_regressor_fn( label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return # Pandas DataFrame natually supports 1 dim data only. label_dimension = 1 input_dimension = label_dimension batch_size = 10 data = np.array([1., 2., 3., 4.], dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) prediction_length = 4 train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList( value=datum[:label_dimension])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) class BaselineRegressorTrainingTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _assert_checkpoint(self, label_dimension, expected_global_step, expected_bias=None): shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([label_dimension], shapes[BIAS_NAME]) if expected_bias is not None: self.assertEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def testFromScratchWithDefaultOptimizer(self): # Create BaselineRegressor. label = 5. age = 17 baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 baseline_regressor.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self._assert_checkpoint(label_dimension=1, expected_global_step=num_steps) def testTrainWithOneDimLabel(self): label_dimension = 1 batch_size = 20 est = _baseline_regressor_fn( label_dimension=label_dimension, model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(label_dimension=1, expected_global_step=200) def testTrainWithOneDimWeight(self): label_dimension = 1 batch_size = 20 est = _baseline_regressor_fn( label_dimension=label_dimension, weight_column='w', model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_1 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(label_dimension=1, expected_global_step=200) def testFromScratch(self): # Create BaselineRegressor. label = 5. age = 17 # loss = (logits - label)^2 = (0 - 5.)^2 = 25. mock_optimizer = mock_optimizer_v2(self, expected_loss=25.) baseline_regressor = _baseline_regressor_fn( model_dir=self._model_dir, optimizer=mock_optimizer) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_regressor.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual( num_steps, baseline_regressor.get_variable_value(mock_optimizer.iterations.name)) self._assert_checkpoint( label_dimension=1, expected_global_step=num_steps, expected_bias=[0.]) def testFromCheckpoint(self): # Create initial checkpoint. bias = 7.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = bias = 6. # loss = (logits - label)^2 = (7 - 5)^2 = 4 mock_optimizer = mock_optimizer_v2(self, expected_loss=4.) baseline_regressor = _baseline_regressor_fn( model_dir=self._model_dir, optimizer=mock_optimizer) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_regressor.train( input_fn=lambda: ({ 'age': ((17,),) }, ((5.,),)), steps=num_steps) self.assertEqual( initial_global_step + num_steps, baseline_regressor.get_variable_value(mock_optimizer.iterations.name)) self._assert_checkpoint( label_dimension=1, expected_global_step=initial_global_step + num_steps, expected_bias=[bias]) def testFromCheckpointMultiBatch(self): # Create initial checkpoint. bias = 5.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = bias # logits[0] = 5. # logits[1] = 5. # loss = (sum(logits - label)^2 = (5 - 5)^2 + (5 - 3)^2) / 2 (batch size) # loss = 2 mock_optimizer = mock_optimizer_v2(self, expected_loss=2.) baseline_regressor = _baseline_regressor_fn( model_dir=self._model_dir, optimizer=mock_optimizer) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_regressor.train( input_fn=lambda: ({ 'age': ((17,), (15,)) }, ((5.,), (3.,))), steps=num_steps) self.assertEqual( initial_global_step + num_steps, baseline_regressor.get_variable_value(mock_optimizer.iterations.name)) self._assert_checkpoint( label_dimension=1, expected_global_step=initial_global_step + num_steps, expected_bias=bias) # Tests for Baseline Classifier. class BaselineClassifierTrainingTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _assert_checkpoint(self, n_classes, expected_global_step, expected_bias=None): logits_dimension = n_classes if n_classes > 2 else 1 shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([logits_dimension], shapes[BIAS_NAME]) if expected_bias is not None: self.assertAllEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def _testFromScratchWithDefaultOptimizer(self, n_classes): label = 0 age = 17 est = baseline.BaselineClassifierV2( n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self._assert_checkpoint(n_classes, num_steps) def testBinaryClassesFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=2) def testMultiClassesFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=4) def _testTrainWithTwoDimsLabel(self, n_classes): batch_size = 20 est = baseline.BaselineClassifierV2( n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) data_rank_2 = np.array([[0], [1]]) self.assertEqual((2,), data_rank_1.shape) self.assertEqual((2, 1), data_rank_2.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_2, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithTwoDimsLabel(self): self._testTrainWithTwoDimsLabel(n_classes=2) def testMultiClassesTrainWithTwoDimsLabel(self): self._testTrainWithTwoDimsLabel(n_classes=4) def _testTrainWithOneDimLabel(self, n_classes): batch_size = 20 est = baseline.BaselineClassifierV2( n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) self.assertEqual((2,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithOneDimLabel(self): self._testTrainWithOneDimLabel(n_classes=2) def testMultiClassesTrainWithOneDimLabel(self): self._testTrainWithOneDimLabel(n_classes=4) def _testTrainWithTwoDimsWeight(self, n_classes): batch_size = 20 est = baseline.BaselineClassifierV2( weight_column='w', n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) data_rank_2 = np.array([[0], [1]]) self.assertEqual((2,), data_rank_1.shape) self.assertEqual((2, 1), data_rank_2.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_2 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithTwoDimsWeight(self): self._testTrainWithTwoDimsWeight(n_classes=2) def testMultiClassesTrainWithTwoDimsWeight(self): self._testTrainWithTwoDimsWeight(n_classes=4) def _testTrainWithOneDimWeight(self, n_classes): batch_size = 20 est = baseline.BaselineClassifierV2( weight_column='w', n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) self.assertEqual((2,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_1 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithOneDimWeight(self): self._testTrainWithOneDimWeight(n_classes=2) def testMultiClassesTrainWithOneDimWeight(self): self._testTrainWithOneDimWeight(n_classes=4) def _testFromScratch(self, n_classes): label = 1 age = 17 # For binary classifier: # loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are # all zero initially) and label = 1 so, # loss = 1 * -log ( sigmoid(logits) ) = 0.69315 # For multi class classifier: # loss = cross_entropy(logits, label) where logits are all 0s (weights are # all zero initially) and label = 1 so, # loss = 1 * -log ( 1.0 / n_classes ) # For this particular test case, as logits are same, the formula # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases. mock_optimizer = mock_optimizer_v2( self, expected_loss=-1 * math.log(1.0 / n_classes)) est = baseline.BaselineClassifierV2( n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(num_steps, est.get_variable_value(mock_optimizer.iterations.name)) self._assert_checkpoint( n_classes, expected_global_step=num_steps, expected_bias=[0.] if n_classes == 2 else [.0] * n_classes) def testBinaryClassesFromScratch(self): self._testFromScratch(n_classes=2) def testMultiClassesFromScratch(self): self._testFromScratch(n_classes=4) def _testFromCheckpoint(self, n_classes): # Create initial checkpoint. label = 1 age = 17 bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # For binary classifier: # logits = bias = -1. # loss = sigmoid_cross_entropy(logits, label) # so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133 # For multi class classifier: # loss = cross_entropy(logits, label) # where logits = bias and label = 1 # so, loss = 1 * -log ( softmax(logits)[1] ) if n_classes == 2: expected_loss = 1.3133 else: logits = bias logits_exp = np.exp(logits) softmax = logits_exp / logits_exp.sum() expected_loss = -1 * math.log(softmax[label]) mock_optimizer = mock_optimizer_v2(self, expected_loss=expected_loss) est = baseline.BaselineClassifierV2( n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(initial_global_step + num_steps, est.get_variable_value(mock_optimizer.iterations.name)) self._assert_checkpoint( n_classes, expected_global_step=initial_global_step + num_steps, expected_bias=bias) def testBinaryClassesFromCheckpoint(self): self._testFromCheckpoint(n_classes=2) def testMultiClassesFromCheckpoint(self): self._testFromCheckpoint(n_classes=4) def _testFromCheckpointFloatLabels(self, n_classes): """Tests float labels for binary classification.""" # Create initial checkpoint. if n_classes > 2: return label = 0.8 age = 17 bias = [-1.0] initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = bias = -1. # loss = sigmoid_cross_entropy(logits, label) # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617 mock_optimizer = mock_optimizer_v2(self, expected_loss=1.1132617) est = baseline.BaselineClassifierV2( n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(initial_global_step + num_steps, est.get_variable_value(mock_optimizer.iterations.name)) def testBinaryClassesFromCheckpointFloatLabels(self): self._testFromCheckpointFloatLabels(n_classes=2) def testMultiClassesFromCheckpointFloatLabels(self): self._testFromCheckpointFloatLabels(n_classes=4) def _testFromCheckpointMultiBatch(self, n_classes): # Create initial checkpoint. label = [1, 0] age = [17, 18.5] batch_size = 2 # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # For binary classifier: # logits = bias # logits[0] = -1. # logits[1] = -1. # loss = sigmoid_cross_entropy(logits, label) # so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133 # loss[1] = (1 - 0) * -log ( 1- sigmoid(-1) ) = 0.3132 # For multi class classifier: # loss = cross_entropy(logits, label) # where logits = bias and label = [1, 0] # so, loss = 1 * -log ( softmax(logits)[label] ) if n_classes == 2: expected_loss = (1.3133 + 0.3132) / 2 else: # Expand logits since batch_size=2 logits = bias * np.ones(shape=(2, 1)) logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) expected_loss = (expected_loss_0 + expected_loss_1) / 2 mock_optimizer = mock_optimizer_v2(self, expected_loss=expected_loss) est = baseline.BaselineClassifierV2( n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train(input_fn=lambda: ({'age': (age)}, (label)), steps=num_steps) self.assertEqual(initial_global_step + num_steps, est.get_variable_value(mock_optimizer.iterations.name)) self._assert_checkpoint( n_classes, expected_global_step=initial_global_step + num_steps, expected_bias=bias) def testBinaryClassesFromCheckpointMultiBatch(self): self._testFromCheckpointMultiBatch(n_classes=2) def testMultiClassesFromCheckpointMultiBatch(self): self._testFromCheckpointMultiBatch(n_classes=4) class BaselineClassifierEvaluationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _test_evaluation_for_simple_data(self, n_classes): label = 1 age = 1. bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = _baseline_classifier_fn( n_classes=n_classes, model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=1) if n_classes == 2: # Binary classes: loss = -log(sigmoid(-1)) / batch size = 1.3133 # Prediction = sigmoid(-1) = 0.2689 expected_metrics = { metric_keys.MetricKeys.LOSS: 1.3133, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: 1.3133, metric_keys.MetricKeys.ACCURACY: 0., metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, metric_keys.MetricKeys.AUC: 0., metric_keys.MetricKeys.AUC_PR: 1., } else: # Multi classes: loss = 1 * -log ( softmax(logits)[label] ) logits = bias logits_exp = np.exp(logits) softmax = logits_exp / logits_exp.sum() expected_loss = -1 * math.log(softmax[label]) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.ACCURACY: 0., } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_for_simple_data(self): self._test_evaluation_for_simple_data(n_classes=2) def test_multi_classes_evaluation_for_simple_data(self): self._test_evaluation_for_simple_data(n_classes=4) def _test_evaluation_batch(self, n_classes): """Tests evaluation for batch_size==2.""" label = [1, 0] age = [17., 18.] bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = _baseline_classifier_fn( n_classes=n_classes, model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': (age) }, (label)), steps=1) if n_classes == 2: # Logits are (-1., -1.) labels are (1, 0). # Loss is # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 # Prediction = sigmoid(-1) = 0.2689 expected_loss = (1.3133 + 0.3132) / 2 # batch size expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.ACCURACY: 0.5, metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 0.5, } else: # Expand logits since batch_size=2 logits = bias * np.ones(shape=(2, 1)) logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) expected_loss = (expected_loss_0 + expected_loss_1) / 2 # batch size expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.ACCURACY: 0.5, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_batch(self): self._test_evaluation_batch(n_classes=2) def test_multi_classes_evaluation_batch(self): self._test_evaluation_batch(n_classes=4) def _test_evaluation_weights(self, n_classes): """Tests evaluation with weights.""" label = [1, 0] age = [17., 18.] weights = [1., 2.] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = _baseline_classifier_fn( n_classes=n_classes, weight_column='w', model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': (age), 'w': (weights) }, (label)), steps=1) if n_classes == 2: # Logits are (-1., -1.) labels are (1, 0). # Loss is # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 # weights = [1., 2.] expected_loss = (1.3133 * 1. + 0.3132 * 2.) / 2 # batch size loss_mean = (1.3133 * 1. + 0.3132 * 2.) / (1.0 + 2.0) label_mean = np.average(label, weights=weights) logits = [-1, -1] logistics = sigmoid(np.array(logits)) predictions_mean = np.average(logistics, weights=weights) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, metric_keys.MetricKeys.LABEL_MEAN: label_mean, metric_keys.MetricKeys.ACCURACY_BASELINE: (max(label_mean, 1 - label_mean)), metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 0.33333, } else: # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) # Expand logits since batch_size=2 logits = bias * np.ones(shape=(2, 1)) logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) loss_mean = np.average([expected_loss_0, expected_loss_1], weights=weights) expected_loss = (loss_mean * np.sum(weights)) / 2 # batch size expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_weights(self): self._test_evaluation_weights(n_classes=2) def test_multi_classes_evaluation_weights(self): self._test_evaluation_weights(n_classes=4) class BaselineClassifierPredictTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _testPredictions(self, n_classes, label_vocabulary, label_output_fn): """Tests predict when all variables are one-dimensional.""" age = 1. bias = [10.0] if n_classes == 2 else [10.0] * n_classes with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = _baseline_classifier_fn( label_vocabulary=label_vocabulary, n_classes=n_classes, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'age': np.array([[age]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = list(est.predict(input_fn=predict_input_fn)) if n_classes == 2: scalar_logits = bias[0] two_classes_logits = [0, scalar_logits] two_classes_logits_exp = np.exp(two_classes_logits) softmax = two_classes_logits_exp / two_classes_logits_exp.sum() expected_predictions = { 'class_ids': [1], 'all_class_ids': [0, 1], 'classes': [label_output_fn(1)], 'all_classes': [label_output_fn(0), label_output_fn(1)], 'logistic': [sigmoid(np.array(scalar_logits))], 'logits': [scalar_logits], 'probabilities': softmax, } else: onedim_logits = np.array(bias) class_ids = onedim_logits.argmax() all_class_ids = list(range(len(onedim_logits))) logits_exp = np.exp(onedim_logits) softmax = logits_exp / logits_exp.sum() expected_predictions = { 'class_ids': [class_ids], 'all_class_ids': all_class_ids, 'classes': [label_output_fn(class_ids)], 'all_classes': [label_output_fn(i) for i in all_class_ids], 'logits': onedim_logits, 'probabilities': softmax, } self.assertEqual(1, len(predictions)) # assertAllClose cannot handle byte type. self.assertEqual(expected_predictions['classes'], predictions[0]['classes']) expected_predictions.pop('classes') predictions[0].pop('classes') self.assertAllEqual(expected_predictions['all_classes'], predictions[0]['all_classes']) expected_predictions.pop('all_classes') predictions[0].pop('all_classes') self.assertAllClose( sorted_key_dict(expected_predictions), sorted_key_dict(predictions[0])) def testBinaryClassesWithoutLabelVocabulary(self): n_classes = 2 self._testPredictions( n_classes, label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def testBinaryClassesWithLabelVocabulary(self): n_classes = 2 self._testPredictions( n_classes, label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) def testMultiClassesWithoutLabelVocabulary(self): n_classes = 4 self._testPredictions( n_classes, label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def testMultiClassesWithLabelVocabulary(self): n_classes = 4 self._testPredictions( n_classes, label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) class BaselineClassifierIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, prediction_length): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = _baseline_classifier_fn( n_classes=n_classes, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['classes'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, 1), predictions.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _test_numpy_input_fn(self, n_classes): """Tests complete flow with numpy_input_fn.""" input_dimension = 4 batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) target = np.array([1] * batch_size) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=target, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=target, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( n_classes=n_classes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_numpy_input_fn(self): self._test_numpy_input_fn(n_classes=2) def test_multi_classes_numpy_input_fn(self): self._test_numpy_input_fn(n_classes=4) def _test_pandas_input_fn(self, n_classes): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return # Pandas DataFrame natually supports 1 dim data only. input_dimension = 1 batch_size = 10 data = np.array([1., 2., 3., 4.], dtype=np.float32) target = np.array([1, 0, 1, 0], dtype=np.int32) x = pd.DataFrame({'x': data}) y = pd.Series(target) prediction_length = 4 train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( n_classes=n_classes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_pandas_input_fn(self): self._test_pandas_input_fn(n_classes=2) def test_multi_classes_pandas_input_fn(self): self._test_pandas_input_fn(n_classes=4) def _test_input_fn_from_parse_example(self, n_classes): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) target = np.array([1] * batch_size, dtype=np.int64) serialized_examples = [] for x, y in zip(data, target): example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=x)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[y])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( n_classes=n_classes, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_input_fn_from_parse_example(self): self._test_input_fn_from_parse_example(n_classes=2) def test_multi_classes_input_fn_from_parse_example(self): self._test_input_fn_from_parse_example(n_classes=4) # Tests for Baseline logit_fn. class BaselineLogitFnTest(tf.test.TestCase): def test_basic_logit_correctness(self): """baseline_logit_fn simply returns the bias variable.""" with tf.Graph().as_default(): bias_var, logits = baseline._baseline_model_fn_builder_v2( features={'age': [[23.], [31.]]}, num_outputs=2) with tf.compat.v1.Session() as sess: sess.run([tf.compat.v1.initializers.global_variables()]) self.assertAllClose([[0., 0.], [0., 0.]], logits.eval()) sess.run(bias_var[0].assign([10., 5.])) self.assertAllClose([[10., 5.], [10., 5.]], logits.eval()) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/canned_estimator_ds_integration_test.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests canned estimators with distribution strategy.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import inspect import tempfile from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow_estimator.python.estimator import run_config from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import dnn_linear_combined from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.extenders import add_metrics class CannedEstimatorDistributionStrategyTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): super(CannedEstimatorDistributionStrategyTest, self).setUp() np.random.seed(1337) tf.compat.v1.random.set_random_seed(1337) self._model_dir = tempfile.mkdtemp() def dataset_input_fn(self, x, y, batch_size, shuffle): def input_fn(): dataset = tf.compat.v1.data.Dataset.from_tensor_slices((x, y)) if shuffle: dataset = dataset.shuffle(batch_size) dataset = dataset.repeat(10).batch(batch_size) return dataset return input_fn @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( mode=['graph', 'eager'], distribution=[ tf.compat.v2.__internal__.distribute.combinations.one_device_strategy, tf.compat.v2.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, tf.compat.v2.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus, ], estimator_cls=[ dnn_linear_combined.DNNLinearCombinedRegressorV2, dnn.DNNRegressorV2, linear.LinearRegressorV2, ])) def test_canned_estimator(self, distribution, estimator_cls): label_dimension = 2 batch_size = 10 # Adding one extra row (+ label_dimension) to test the last partial batch # use case. data = np.linspace( 0., 2., batch_size * label_dimension + label_dimension, dtype=np.float32) data = data.reshape(batch_size + 1, label_dimension) fc = tf.feature_column.numeric_column('x', shape=(2,)) # Set kwargs based on the current canned estimator class. estimator_kw_args = { 'model_dir': self._model_dir, 'label_dimension': 2, } cls_args = inspect.getargspec(estimator_cls.__init__).args if 'hidden_units' in cls_args: estimator_kw_args['hidden_units'] = [2, 2] elif 'dnn_hidden_units' in cls_args: estimator_kw_args['dnn_hidden_units'] = [2, 2] if 'optimizer' in cls_args: estimator_kw_args['optimizer'] = 'SGD' else: estimator_kw_args['linear_optimizer'] = 'SGD' estimator_kw_args['dnn_optimizer'] = 'SGD' if 'feature_columns' in cls_args: estimator_kw_args['feature_columns'] = [fc] else: estimator_kw_args['linear_feature_columns'] = [fc] estimator_kw_args['dnn_feature_columns'] = [fc] def my_metrics(features): metric = tf_keras.metrics.Mean() metric.update_state(features['x']) return {'mean_x': metric} # Create a canned estimator and train to save a checkpoint. input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) canned_est = estimator_cls(**estimator_kw_args) canned_est.train(input_fn=input_fn) # Create a second canned estimator, warm-started from the first. del estimator_kw_args['model_dir'] estimator_kw_args['warm_start_from'] = canned_est.model_dir warm_started_canned_est = estimator_cls(**estimator_kw_args) warm_started_canned_est.train(input_fn=input_fn) # Create a third canned estimator, warm-started from the first. input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=False) estimator_kw_args['config'] = run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution) warm_started_canned_est_with_ds = estimator_cls(**estimator_kw_args) warm_started_canned_est_with_ds.train(input_fn=input_fn) for variable_name in warm_started_canned_est.get_variable_names(): self.assertAllClose( warm_started_canned_est_with_ds.get_variable_value(variable_name), warm_started_canned_est.get_variable_value(variable_name)) warm_started_canned_est = add_metrics(warm_started_canned_est, my_metrics) warm_started_canned_est_with_ds = add_metrics( warm_started_canned_est_with_ds, my_metrics) scores = warm_started_canned_est.evaluate(input_fn) scores_with_ds = warm_started_canned_est_with_ds.evaluate(input_fn) self.assertAlmostEqual(scores['loss'], scores_with_ds['loss'], 5) self.assertAlmostEqual(scores['mean_x'], scores_with_ds['mean_x'], 5) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/dnn.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Deep Neural Network estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import tensorflow as tf from tensorflow.python.feature_column import feature_column from tensorflow.python.feature_column import feature_column_lib from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.util import tf_keras_v1 from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import optimizers from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.head import head_utils from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys # The default learning rate of 0.05 is a historical artifact of the initial # implementation, but seems a reasonable choice. _LEARNING_RATE = 0.05 def _add_hidden_layer_summary(value, tag): tf.compat.v1.summary.scalar('%s/fraction_of_zero_values' % tag, tf.math.zero_fraction(value)) tf.compat.v1.summary.histogram('%s/activation' % tag, value) @estimator_export(v1=['estimator.experimental.dnn_logit_fn_builder']) def dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn, dropout, input_layer_partitioner, batch_norm): """Function builder for a dnn logit_fn. Args: units: An int indicating the dimension of the logit layer. In the MultiHead case, this should be the sum of all component Heads' logit dimensions. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. batch_norm: Whether to use batch normalization after each hidden layer. Returns: A logit_fn (see below). Raises: ValueError: If units is not an int. """ if not isinstance(units, six.integer_types): raise ValueError('units must be an int. Given type: {}'.format( type(units))) def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ dnn_model = _DNNModel( units, hidden_units, feature_columns, activation_fn, dropout, input_layer_partitioner, batch_norm, name='dnn') return dnn_model(features, mode) return dnn_logit_fn def dnn_logit_fn_builder_v2(units, hidden_units, feature_columns, activation_fn, dropout, batch_norm): """Function builder for a dnn logit_fn. Args: units: An int indicating the dimension of the logit layer. In the MultiHead case, this should be the sum of all component Heads' logit dimensions. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. batch_norm: Whether to use batch normalization after each hidden layer. Returns: A logit_fn (see below). Raises: ValueError: If units is not an int. """ if not isinstance(units, six.integer_types): raise ValueError('units must be an int. Given type: {}'.format( type(units))) def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ dnn_model = _DNNModelV2( units, hidden_units, feature_columns, activation_fn, dropout, batch_norm, name='dnn') return dnn_model(features, mode) return dnn_logit_fn def _get_previous_name_scope(): current_name_scope = tf.compat.v2.__internal__.get_name_scope() return current_name_scope.rsplit('/', 1)[0] + '/' class _DNNModel(tf_keras.Model): """A DNN Model.""" def __init__(self, units, hidden_units, feature_columns, activation_fn, dropout, input_layer_partitioner, batch_norm, name=None, **kwargs): super(_DNNModel, self).__init__(name=name, **kwargs) if feature_column_lib.is_feature_column_v2(feature_columns): self._input_layer = tf_keras_v1.layers.DenseFeatures( feature_columns=feature_columns, name='input_layer') else: self._input_layer = feature_column.InputLayer( feature_columns=feature_columns, name='input_layer', create_scope_now=False) self._add_layer(self._input_layer, 'input_layer') self._dropout = dropout self._batch_norm = batch_norm self._hidden_layers = [] self._dropout_layers = [] self._batch_norm_layers = [] self._hidden_layer_scope_names = [] for layer_id, num_hidden_units in enumerate(hidden_units): with tf.compat.v1.variable_scope( 'hiddenlayer_%d' % layer_id ) as hidden_layer_scope: hidden_layer = tf_keras_v1.__internal__.legacy.layers.Dense( units=num_hidden_units, activation=activation_fn, kernel_initializer=tf.compat.v1.glorot_uniform_initializer(), name=hidden_layer_scope, _scope=hidden_layer_scope, ) self._add_layer(hidden_layer, hidden_layer_scope.name) self._hidden_layer_scope_names.append(hidden_layer_scope.name) self._hidden_layers.append(hidden_layer) if self._dropout is not None: dropout_layer = tf_keras_v1.__internal__.legacy.layers.Dropout( rate=self._dropout ) self._add_layer(dropout_layer, dropout_layer.name) self._dropout_layers.append(dropout_layer) if self._batch_norm: batch_norm_layer = tf_keras_v1.__internal__.legacy.layers.BatchNormalization( # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, trainable=True, name='batchnorm_%d' % layer_id, _scope='batchnorm_%d' % layer_id) self._add_layer(batch_norm_layer, batch_norm_layer.name) self._batch_norm_layers.append(batch_norm_layer) with tf.compat.v1.variable_scope('logits') as logits_scope: self._logits_layer = tf_keras_v1.__internal__.legacy.layers.Dense( units=units, activation=None, kernel_initializer=tf.compat.v1.glorot_uniform_initializer(), name=logits_scope, _scope=logits_scope) self._add_layer(self._logits_layer, logits_scope.name) self._logits_scope_name = logits_scope.name self._input_layer_partitioner = input_layer_partitioner def call(self, features, mode): is_training = mode == ModeKeys.TRAIN # The Keras training.Model adds a name_scope with the name of the model # which modifies the constructed graph. Hence we add another name_scope # here which is the one before the training.Model one was applied. # TODO(rohanj): Remove this in TF 2.0 (b/116728605) with ops.name_scope(name=_get_previous_name_scope()): # TODO(rohanj): Remove dependence on variable scope for partitioning. with tf.compat.v1.variable_scope( 'input_from_feature_columns', partitioner=self._input_layer_partitioner): try: net = self._input_layer(features, training=is_training) except TypeError: net = self._input_layer(features) for i in range(len(self._hidden_layers)): net = self._hidden_layers[i](net) if self._dropout is not None and is_training: net = self._dropout_layers[i](net, training=True) if self._batch_norm: net = self._batch_norm_layers[i](net, training=is_training) _add_hidden_layer_summary(net, self._hidden_layer_scope_names[i]) logits = self._logits_layer(net) _add_hidden_layer_summary(logits, self._logits_scope_name) return logits def _add_layer(self, layer, layer_name): # "Magic" required for keras.Model classes to track all the variables in # a list of layers.Layer objects. # TODO(ashankar): Figure out API so user code doesn't have to do this. setattr(self, layer_name, layer) def _name_from_scope_name(name): """Returns the name of an op given the name of its scope. Args: name: the name of the scope. Returns: the name of the op (equal to scope name minus any trailing slash). """ return name[:-1] if (name and name[-1] == '/') else name class _DNNModelV2(tf_keras.Model): """A DNN Model.""" def __init__(self, units, hidden_units, feature_columns, activation_fn, dropout, batch_norm, name=None, **kwargs): super(_DNNModelV2, self).__init__(name=name, **kwargs) with ops.name_scope( 'input_from_feature_columns') as input_feature_column_scope: layer_name = input_feature_column_scope + 'input_layer' if feature_column_lib.is_feature_column_v2(feature_columns): self._input_layer = tf_keras.layers.DenseFeatures( feature_columns=feature_columns, name=layer_name) else: raise ValueError( 'Received a feature column from TensorFlow v1, but this is a ' 'TensorFlow v2 Estimator. Please either use v2 feature columns ' '(accessible via tf.feature_column.* in TF 2.x) with this ' 'Estimator, or switch to a v1 Estimator for use with v1 feature ' 'columns (accessible via tf.compat.v1.estimator.* and ' 'tf.compat.v1.feature_column.*, respectively.') self._dropout = dropout self._batch_norm = batch_norm self._hidden_layers = [] self._dropout_layers = [] self._batch_norm_layers = [] self._hidden_layer_scope_names = [] for layer_id, num_hidden_units in enumerate(hidden_units): with ops.name_scope('hiddenlayer_%d' % layer_id) as hidden_layer_scope: # Get scope name without the trailing slash. hidden_shared_name = _name_from_scope_name(hidden_layer_scope) hidden_layer = tf_keras.layers.Dense( units=num_hidden_units, activation=activation_fn, kernel_initializer=tf.compat.v1.glorot_uniform_initializer(), name=hidden_shared_name) self._hidden_layer_scope_names.append(hidden_shared_name) self._hidden_layers.append(hidden_layer) if self._dropout is not None: dropout_layer = tf_keras.layers.Dropout(rate=self._dropout) self._dropout_layers.append(dropout_layer) if self._batch_norm: batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id # TODO(scottzhu): Change back to use BatchNormalization when the # cleanup is done. batch_norm_layer = tf_keras.layers.BatchNormalization( # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, trainable=True, name=batch_norm_name) self._batch_norm_layers.append(batch_norm_layer) with ops.name_scope('logits') as logits_scope: logits_shared_name = _name_from_scope_name(logits_scope) self._logits_layer = tf_keras.layers.Dense( units=units, activation=None, kernel_initializer=tf.compat.v1.glorot_uniform_initializer(), name=logits_shared_name) self._logits_scope_name = logits_shared_name def call(self, features, mode): is_training = mode == ModeKeys.TRAIN try: net = self._input_layer(features, training=is_training) except TypeError: net = self._input_layer(features) for i in range(len(self._hidden_layers)): net = self._hidden_layers[i](net) if self._dropout is not None and is_training: net = self._dropout_layers[i](net, training=True) if self._batch_norm: net = self._batch_norm_layers[i](net, training=is_training) _add_hidden_layer_summary(net, self._hidden_layer_scope_names[i]) logits = self._logits_layer(net) _add_hidden_layer_summary(logits, self._logits_scope_name) return logits def _validate_features(features): if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) def _get_dnn_estimator_spec(use_tpu, head, features, labels, mode, logits, optimizer): """Get EstimatorSpec for DNN Model.""" if use_tpu: return head._create_tpu_estimator_spec( # pylint: disable=protected-access features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits) else: return head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits) def _dnn_model_fn(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=None, config=None, use_tpu=False, batch_norm=False): """Deep Neural Net model_fn v1. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. use_tpu: Whether to make a DNN model able to run on TPU. Will make function return a `_TPUEstimatorSpec` instance and disable variable partitioning. batch_norm: Whether to use batch normalization after each hidden layer. Returns: An `EstimatorSpec` instance. Raises: ValueError: If features has the wrong type. """ optimizer = optimizers.get_optimizer_instance( optimizer, learning_rate=_LEARNING_RATE) _validate_features(features) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = (None if use_tpu else tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with tf.compat.v1.variable_scope( 'dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( None if use_tpu else tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) logit_fn = dnn_logit_fn_builder( units=head.logits_dimension, hidden_units=hidden_units, feature_columns=feature_columns, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, batch_norm=batch_norm) logits = logit_fn(features=features, mode=mode) return _get_dnn_estimator_spec(use_tpu, head, features, labels, mode, logits, optimizer) def _dnn_model_fn_builder_v2(units, hidden_units, feature_columns, activation_fn, dropout, batch_norm, features, mode): """Function builder for dnn logits, trainable variables and update ops. Args: units: An int indicating the dimension of the logit layer. In the MultiHead case, this should be the sum of all component Heads' logit dimensions. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. batch_norm: Whether to use batch normalization after each hidden layer. features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. A list of trainable variables. A list of update ops. Raises: ValueError: If units is not an int. """ if not isinstance(units, six.integer_types): raise ValueError('units must be an int. Given type: {}'.format( type(units))) dnn_model = _DNNModelV2( units, hidden_units, feature_columns, activation_fn, dropout, batch_norm, name='dnn') logits = dnn_model(features, mode) trainable_variables = dnn_model.trainable_variables update_ops = dnn_model.updates return logits, trainable_variables, update_ops def dnn_model_fn_v2(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, config=None, use_tpu=False, batch_norm=False): """Deep Neural Net model_fn v2. This function is different than _dnn_model_fn_v1 in the way it handles the optimizer when a String optimizer name is passed. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `base_head.Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf_keras.optimizers.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer. If it is String, the default learning rate of the optimizer will be used. If it is String, and optimizer does not have a default learning rate, then, a fixed learning rate of 0.05 is used. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. config: `RunConfig` object to configure the runtime settings. use_tpu: Whether to make a DNN model able to run on TPU. Will make function return a `_TPUEstimatorSpec` instance and disable variable partitioning. batch_norm: Whether to use batch normalization after each hidden layer. Returns: An `EstimatorSpec` instance. Raises: ValueError: If features has the wrong type. """ _validate_features(features) del config logits, trainable_variables, update_ops = _dnn_model_fn_builder_v2( units=head.logits_dimension, hidden_units=hidden_units, feature_columns=feature_columns, activation_fn=activation_fn, dropout=dropout, batch_norm=batch_norm, features=features, mode=mode) # In TRAIN mode, create optimizer and assign global_step variable to # optimizer.iterations to make global_step increased correctly, as Hooks # relies on global step as step counter. if mode == ModeKeys.TRAIN: optimizer = optimizers.get_optimizer_instance_v2(optimizer) optimizer.iterations = tf.compat.v1.train.get_or_create_global_step() # Create EstimatorSpec. if use_tpu: estimator_spec_fn = head._create_tpu_estimator_spec # pylint: disable=protected-access else: estimator_spec_fn = head.create_estimator_spec # pylint: disable=protected-access return estimator_spec_fn( features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits, trainable_variables=trainable_variables, update_ops=update_ops) @estimator_export('estimator.DNNClassifier', v1=[]) class DNNClassifierV2(estimator.EstimatorV2): """A classifier for TensorFlow DNN models. Example: ```python categorical_feature_a = categorical_column_with_hash_bucket(...) categorical_feature_b = categorical_column_with_hash_bucket(...) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) categorical_feature_b_emb = embedding_column( categorical_column=categorical_feature_b, ...) estimator = tf.estimator.DNNClassifier( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256]) # Or estimator using the ProximalAdagradOptimizer optimizer with # regularization. estimator = tf.estimator.DNNClassifier( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], optimizer=tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001 )) # Or estimator using an optimizer with a learning rate decay. estimator = tf.estimator.DNNClassifier( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], optimizer=lambda: tf_keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96)) # Or estimator with warm-starting from a previous checkpoint. estimator = tf.estimator.DNNClassifier( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], warm_start_from="/path/to/checkpoint/dir") # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train) metrics = estimator.evaluate(input_fn=input_fn_eval) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. * for each `column` in `feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using softmax cross entropy. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__( self, hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, config=None, warm_start_from=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, batch_norm=False, ): """Initializes a `DNNClassifier` instance. Args: hidden_units: Iterable of number hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf_keras.optimizers.*` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', SGD'), or callable. Defaults to Adagrad optimizer. activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. dropout: When not `None`, the probability we will drop out a given coordinate. config: `RunConfig` object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. batch_norm: Whether to use batch normalization after each hidden layer. """ head = head_utils.binary_or_multi_class_head( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN') def _model_fn(features, labels, mode, config): """Call the defined shared dnn_model_fn_v2.""" return dnn_model_fn_v2( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, config=config, batch_norm=batch_norm) super(DNNClassifierV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.DNNClassifier']) # pylint: disable=missing-docstring class DNNClassifier(estimator.Estimator): __doc__ = DNNClassifierV2.__doc__.replace('SUM_OVER_BATCH_SIZE', 'SUM') def __init__( self, hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, batch_norm=False, ): head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Classifier').set('DNN') def _model_fn(features, labels, mode, config): """Call the defined shared dnn_model_fn.""" return _dnn_model_fn( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm) super(DNNClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export('estimator.DNNEstimator', v1=[]) class DNNEstimatorV2(estimator.EstimatorV2): """An estimator for TensorFlow DNN models with user-specified head. Example: ```python sparse_feature_a = sparse_column_with_hash_bucket(...) sparse_feature_b = sparse_column_with_hash_bucket(...) sparse_feature_a_emb = embedding_column(sparse_id_column=sparse_feature_a, ...) sparse_feature_b_emb = embedding_column(sparse_id_column=sparse_feature_b, ...) estimator = tf.estimator.DNNEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb], hidden_units=[1024, 512, 256]) # Or estimator using the ProximalAdagradOptimizer optimizer with # regularization. estimator = tf.estimator.DNNEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb], hidden_units=[1024, 512, 256], optimizer=tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001 )) # Or estimator using an optimizer with a learning rate decay. estimator = tf.estimator.DNNEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb], hidden_units=[1024, 512, 256], optimizer=lambda: tf_keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96)) # Or estimator with warm-starting from a previous checkpoint. estimator = tf.estimator.DNNEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb], hidden_units=[1024, 512, 256], warm_start_from="/path/to/checkpoint/dir") # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train) metrics = estimator.evaluate(input_fn=input_fn_eval) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. * for each `column` in `feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss and predicted output are determined by the specified head. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, head, hidden_units, feature_columns, model_dir=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, config=None, warm_start_from=None, batch_norm=False): """Initializes a `DNNEstimator` instance. Args: head: A `_Head` instance constructed with a method such as `tf.contrib.estimator.multi_label_head`. hidden_units: Iterable of number hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. optimizer: An instance of `tf_keras.optimizers.*` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', SGD'), or callable. Defaults to Adagrad optimizer. activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. dropout: When not `None`, the probability we will drop out a given coordinate. config: `RunConfig` object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. batch_norm: Whether to use batch normalization after each hidden layer. """ def _model_fn(features, labels, mode, config): """Call the defined shared dnn_model_fn_v2.""" return dnn_model_fn_v2( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, config=config, batch_norm=batch_norm) estimator._canned_estimator_api_gauge.get_cell('Estimator').set('DNN') # pylint: disable=protected-access super(DNNEstimatorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.DNNEstimator']) # pylint: disable=missing-docstring class DNNEstimator(estimator.Estimator): __doc__ = DNNEstimatorV2.__doc__ def __init__(self, head, hidden_units, feature_columns, model_dir=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=None, config=None, warm_start_from=None, batch_norm=False): def _model_fn(features, labels, mode, config): """Call the defined shared _dnn_model_fn.""" return _dnn_model_fn( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm) estimator._canned_estimator_api_gauge.get_cell('Estimator').set('DNN') # pylint: disable=protected-access super(DNNEstimator, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export('estimator.DNNRegressor', v1=[]) class DNNRegressorV2(estimator.EstimatorV2): """A regressor for TensorFlow DNN models. Example: ```python categorical_feature_a = categorical_column_with_hash_bucket(...) categorical_feature_b = categorical_column_with_hash_bucket(...) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) categorical_feature_b_emb = embedding_column( categorical_column=categorical_feature_b, ...) estimator = tf.estimator.DNNRegressor( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256]) # Or estimator using the ProximalAdagradOptimizer optimizer with # regularization. estimator = tf.estimator.DNNRegressor( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], optimizer=tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001 )) # Or estimator using an optimizer with a learning rate decay. estimator = tf.estimator.DNNRegressor( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], optimizer=lambda: tf_keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96)) # Or estimator with warm-starting from a previous checkpoint. estimator = tf.estimator.DNNRegressor( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], warm_start_from="/path/to/checkpoint/dir") # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train) metrics = estimator.evaluate(input_fn=input_fn_eval) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. * for each `column` in `feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using mean squared error. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__( self, hidden_units, feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, config=None, warm_start_from=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, batch_norm=False, ): """Initializes a `DNNRegressor` instance. Args: hidden_units: Iterable of number hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. label_dimension: Number of regression targets per example. This is the size of the last dimension of the labels and logits `Tensor` objects (typically, these have shape `[batch_size, label_dimension]`). weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. optimizer: An instance of `tf_keras.optimizers.*` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', SGD'), or callable. Defaults to Adagrad optimizer. activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. dropout: When not `None`, the probability we will drop out a given coordinate. config: `RunConfig` object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. batch_norm: Whether to use batch normalization after each hidden layer. """ head = regression_head.RegressionHead( label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Regressor').set('DNN') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the defined shared dnn_model_fn_v2.""" return dnn_model_fn_v2( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, config=config, batch_norm=batch_norm) super(DNNRegressorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.DNNRegressor']) # pylint: disable=missing-docstring class DNNRegressor(estimator.Estimator): __doc__ = DNNRegressorV2.__doc__.replace('SUM_OVER_BATCH_SIZE', 'SUM') def __init__( self, hidden_units, feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, batch_norm=False, ): head = head_lib._regression_head( # pylint: disable=protected-access label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Regressor').set('DNN') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the defined shared _dnn_model_fn.""" return _dnn_model_fn( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm) super(DNNRegressor, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) ================================================ FILE: tensorflow_estimator/python/estimator/canned/dnn_estimator_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for DNNEstimatorV2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import dnn_testing_utils from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.head import multi_class_head from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.inputs import numpy_io def _dnn_estimator_fn(weight_column=None, label_dimension=1, **kwargs): """Returns a DNNEstimator that uses regression_head.""" return dnn.DNNEstimatorV2( head=regression_head.RegressionHead( weight_column=weight_column, label_dimension=label_dimension, # Tests in core (from which this test inherits) test the sum loss. loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE), **kwargs) def _dnn_estimator_classifier_fn(n_classes=3, **kwargs): return dnn.DNNEstimatorV2( head=multi_class_head.MultiClassHead(n_classes=n_classes), **kwargs) class DNNLogitFnBuilderTest(tf.test.TestCase): def testLongInPy2(self): if six.PY2: ret = dnn.dnn_logit_fn_builder( long(1), None, None, None, None, None, None) self.assertTrue(callable(ret)) class DNNEstimatorEvaluateTest(dnn_testing_utils.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_estimator_fn) class DNNEstimatorPredictTest(dnn_testing_utils.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorPredictTest.__init__( self, _dnn_estimator_fn) class DNNEstimatorTrainTest(dnn_testing_utils.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorTrainTest.__init__( self, _dnn_estimator_fn) class DNNEstimatorWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNWarmStartingTest.__init__( self, _dnn_estimator_classifier_fn, _dnn_estimator_fn) class DNNEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, optimizer='Adagrad'): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNEstimatorV2( head=regression_head.RegressionHead(label_dimension=label_dimension), hidden_units=(2, 2), feature_columns=feature_columns, optimizer=optimizer, model_dir=self._model_dir) # Train num_steps = 10 est.train(train_input_fn, steps=num_steps) # Evaluate scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # Predict predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # Export feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _create_input_fn(self, label_dimension, batch_size): """Creates input_fn for integration test.""" data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) return train_input_fn, eval_input_fn, predict_input_fn def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_numpy_input_fn_with_optimizer_instance(self): """Tests complete flow with optimizer_v2 instance.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, optimizer=tf_keras.optimizers.legacy.Adagrad( 0.01)) # Test with optimizer_v2 instance if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/dnn_linear_combined.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """TensorFlow estimators for Linear and DNN joined training models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import six import tensorflow as tf from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.canned import optimizers from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.head import head_utils from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys # The default learning rates are a historical artifact of the initial # implementation. _DNN_LEARNING_RATE = 0.001 _LINEAR_LEARNING_RATE = 0.005 def _check_no_sync_replicas_optimizer(optimizer): if isinstance(optimizer, tf.compat.v1.train.SyncReplicasOptimizer): raise ValueError( 'SyncReplicasOptimizer does not support multi optimizers case. ' 'Therefore, it is not supported in DNNLinearCombined model. ' 'If you want to use this optimizer, please use either DNN or Linear ' 'model.') def _linear_learning_rate(num_linear_feature_columns): """Returns the default learning rate of the linear model. The calculation is a historical artifact of this initial implementation, but has proven a reasonable choice. Args: num_linear_feature_columns: The number of feature columns of the linear model. Returns: A float. """ default_learning_rate = 1. / math.sqrt(num_linear_feature_columns) return min(_LINEAR_LEARNING_RATE, default_learning_rate) def _add_layer_summary(value, tag): tf.compat.v1.summary.scalar('%s/fraction_of_zero_values' % tag, tf.math.zero_fraction(value)) tf.compat.v1.summary.histogram('%s/activation' % tag, value) def _validate_feature_columns(linear_feature_columns, dnn_feature_columns): """Validates feature columns DNNLinearCombinedRegressor.""" linear_feature_columns = linear_feature_columns or [] dnn_feature_columns = dnn_feature_columns or [] feature_columns = (list(linear_feature_columns) + list(dnn_feature_columns)) if not feature_columns: raise ValueError('Either linear_feature_columns or dnn_feature_columns ' 'must be defined.') return feature_columns def _dnn_linear_combined_model_fn_v2( features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, config=None, batch_norm=False, linear_sparse_combiner='sum', loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. config: `RunConfig` object to configure the runtime settings. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". loss_reduction: One of `tf_keras.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: An `EstimatorSpec` instance. Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') del config # Build DNN Logits. if not dnn_feature_columns: dnn_logits = None else: if mode == ModeKeys.TRAIN: dnn_optimizer = optimizers.get_optimizer_instance_v2( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_logits, dnn_trainable_variables, dnn_update_ops = ( dnn._dnn_model_fn_builder_v2( # pylint: disable=protected-access units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, batch_norm=batch_norm, features=features, mode=mode)) if not linear_feature_columns: linear_logits = None else: if mode == ModeKeys.TRAIN: linear_optimizer = optimizers.get_optimizer_instance_v2( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) linear_logits, linear_trainable_variables = ( linear._linear_model_fn_builder_v2( # pylint: disable=protected-access units=head.logits_dimension, feature_columns=linear_feature_columns, sparse_combiner=linear_sparse_combiner, features=features)) _add_layer_summary(linear_logits, 'linear') # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] # Scale loss by number of replicas. if loss_reduction == tf.losses.Reduction.SUM_OVER_BATCH_SIZE: num_replicas = tf.distribute.get_strategy().num_replicas_in_sync if num_replicas > 1: loss *= (1. / num_replicas) if dnn_logits is not None: train_ops.extend(dnn_optimizer.get_updates(loss, dnn_trainable_variables)) if dnn_update_ops is not None: train_ops.extend(dnn_update_ops) if linear_logits is not None: train_ops.extend( linear_optimizer.get_updates(loss, linear_trainable_variables)) train_op = tf.group(*train_ops) return train_op # In TRAIN mode, asssign global_step variable to optimizer.iterations to # make global_step increased correctly, as Hooks relies on global step as # step counter. Note that, Only one model's optimizer needs this assignment. if mode == ModeKeys.TRAIN: if dnn_logits is not None: dnn_optimizer.iterations = tf.compat.v1.train.get_or_create_global_step() else: linear_optimizer.iterations = \ tf.compat.v1.train.get_or_create_global_step() return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None, batch_norm=False, linear_sparse_combiner='sum'): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". Returns: An `EstimatorSpec` instance. Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = ( tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with tf.compat.v1.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as scope: dnn_absolute_scope = scope.name dnn_logit_fn = dnn.dnn_logit_fn_builder( units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, batch_norm=batch_norm, input_layer_partitioner=input_layer_partitioner) dnn_logits = dnn_logit_fn(features=features, mode=mode) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with tf.compat.v1.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_absolute_scope = scope.name logit_fn = linear.linear_logit_fn_builder( units=head.logits_dimension, feature_columns=linear_feature_columns, sparse_combiner=linear_sparse_combiner) linear_logits = logit_fn(features=features) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = tf.compat.v1.train.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_absolute_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=linear_absolute_scope))) train_op = tf.group(*train_ops) with tf.control_dependencies([train_op]): return tf.compat.v1.assign_add(global_step, 1).op return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) @estimator_export('estimator.DNNLinearCombinedClassifier', v1=[]) class DNNLinearCombinedClassifierV2(estimator.EstimatorV2): """An estimator for TensorFlow Linear and DNN joined classification models. Note: This estimator is also known as wide-n-deep. Example: ```python numeric_feature = numeric_column(...) categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) categorical_feature_b_emb = embedding_column( categorical_id_column=categorical_feature_b, ...) estimator = tf.estimator.DNNLinearCombinedClassifier( # wide settings linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], linear_optimizer=tf_keras.optimizers.Ftrl(...), # deep settings dnn_feature_columns=[ categorical_feature_a_emb, categorical_feature_b_emb, numeric_feature], dnn_hidden_units=[1000, 500, 100], dnn_optimizer=tf_keras.optimizers.Adagrad(...), # warm-start settings warm_start_from="/path/to/checkpoint/dir") # To apply L1 and L2 regularization, you can set dnn_optimizer to: tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.001) # To apply learning rate decay, you can set dnn_optimizer to a callable: lambda: tf_keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96) # It is the same for linear_optimizer. # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train, steps=100) metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using softmax cross entropy. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, n_classes=2, weight_column=None, label_vocabulary=None, config=None, warm_start_from=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, batch_norm=False, linear_sparse_combiner='sum'): """Initializes a DNNLinearCombinedClassifier instance. Args: model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. linear_optimizer: An instance of `tf_keras.optimizers.*` used to apply gradients to the linear part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. dnn_feature_columns: An iterable containing all the feature columns used by deep part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. dnn_optimizer: An instance of `tf_keras.optimizers.*` used to apply gradients to the deep part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. dnn_hidden_units: List of hidden units per layer. All layers are fully connected. dnn_activation_fn: Activation function applied to each layer. If None, will use `tf.nn.relu`. dnn_dropout: When not None, the probability we will drop out a given coordinate. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. config: RunConfig object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. For more details, see `tf.feature_column.linear_model`. Raises: ValueError: If both linear_feature_columns and dnn_features_columns are empty at the same time. """ self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) head = head_utils.binary_or_multi_class_head( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Classifier').set( # pylint: disable=protected-access 'DNNLinearCombined') def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn_v2( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner, loss_reduction=loss_reduction) super(DNNLinearCombinedClassifierV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.DNNLinearCombinedClassifier']) # pylint: disable=missing-docstring class DNNLinearCombinedClassifier(estimator.Estimator): __doc__ = DNNLinearCombinedClassifierV2.__doc__.replace( 'SUM_OVER_BATCH_SIZE', 'SUM') def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, n_classes=2, weight_column=None, label_vocabulary=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, batch_norm=False, linear_sparse_combiner='sum'): self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Classifier').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) def _init_dnn_linear_combined_estimator(head, linear_feature_columns, linear_optimizer, dnn_feature_columns, dnn_optimizer, dnn_hidden_units, dnn_activation_fn, dnn_dropout, input_layer_partitioner, linear_sparse_combiner): """Helper function for the initialization of DNNLinearCombinedEstimator.""" linear_feature_columns = linear_feature_columns or [] dnn_feature_columns = dnn_feature_columns or [] feature_columns = (list(linear_feature_columns) + list(dnn_feature_columns)) if not feature_columns: raise ValueError('Either linear_feature_columns or dnn_feature_columns ' 'must be defined.') def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, linear_sparse_combiner=linear_sparse_combiner) return feature_columns, _model_fn @estimator_export('estimator.DNNLinearCombinedEstimator', v1=[]) class DNNLinearCombinedEstimatorV2(estimator.EstimatorV2): """An estimator for TensorFlow Linear and DNN joined models with custom head. Note: This estimator is also known as wide-n-deep. Example: ```python numeric_feature = numeric_column(...) categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) categorical_feature_b_emb = embedding_column( categorical_column=categorical_feature_b, ...) estimator = tf.estimator.DNNLinearCombinedEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), # wide settings linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], linear_optimizer=tf_keras.optimizers.Ftrl(...), # deep settings dnn_feature_columns=[ categorical_feature_a_emb, categorical_feature_b_emb, numeric_feature], dnn_hidden_units=[1000, 500, 100], dnn_optimizer=tf_keras.optimizers.Adagrad(...)) # To apply L1 and L2 regularization, you can set dnn_optimizer to: tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.001) # To apply learning rate decay, you can set dnn_optimizer to a callable: lambda: tf_keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96) # It is the same for linear_optimizer. # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train, steps=100) metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using mean squared error. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, head, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, config=None, batch_norm=False, linear_sparse_combiner='sum'): """Initializes a DNNLinearCombinedEstimator instance. Args: head: A `Head` instance constructed with a method such as `tf.estimator.MultiLabelHead`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. linear_optimizer: An instance of `tf_keras.optimizers.*` used to apply gradients to the linear part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. dnn_feature_columns: An iterable containing all the feature columns used by deep part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. dnn_optimizer: An instance of `tf_keras.optimizers.*` used to apply gradients to the deep part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. dnn_hidden_units: List of hidden units per layer. All layers are fully connected. dnn_activation_fn: Activation function applied to each layer. If None, will use `tf.nn.relu`. dnn_dropout: When not None, the probability we will drop out a given coordinate. config: RunConfig object to configure the runtime settings. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. For more details, see `tf.feature_column.linear_model`. Raises: ValueError: If both linear_feature_columns and dnn_features_columns are empty at the same time. """ self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) estimator._canned_estimator_api_gauge.get_cell('Estimator').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn_v2( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedEstimatorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export(v1=['estimator.DNNLinearCombinedEstimator']) # pylint: disable=missing-docstring class DNNLinearCombinedEstimator(estimator.Estimator): __doc__ = DNNLinearCombinedEstimatorV2.__doc__ def __init__(self, head, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None, batch_norm=False, linear_sparse_combiner='sum'): self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) estimator._canned_estimator_api_gauge.get_cell('Estimator').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedEstimator, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export('estimator.DNNLinearCombinedRegressor', v1=[]) class DNNLinearCombinedRegressorV2(estimator.EstimatorV2): """An estimator for TensorFlow Linear and DNN joined models for regression. Note: This estimator is also known as wide-n-deep. Example: ```python numeric_feature = numeric_column(...) categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) categorical_feature_b_emb = embedding_column( categorical_column=categorical_feature_b, ...) estimator = tf.estimator.DNNLinearCombinedRegressor( # wide settings linear_feature_columns=[categorical_feature_a_x_categorical_feature_b], linear_optimizer=tf_keras.optimizers.Ftrl(...), # deep settings dnn_feature_columns=[ categorical_feature_a_emb, categorical_feature_b_emb, numeric_feature], dnn_hidden_units=[1000, 500, 100], dnn_optimizer=tf_keras.optimizers.Adagrad(...), # warm-start settings warm_start_from="/path/to/checkpoint/dir") # To apply L1 and L2 regularization, you can set dnn_optimizer to: tf.compat.v1.train.ProximalAdagradOptimizer( learning_rate=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.001) # To apply learning rate decay, you can set dnn_optimizer to a callable: lambda: tf_keras.optimizers.Adam( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96) # It is the same for linear_optimizer. # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train, steps=100) metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * for each `column` in `dnn_feature_columns` + `linear_feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using mean squared error. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, label_dimension=1, weight_column=None, config=None, warm_start_from=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, batch_norm=False, linear_sparse_combiner='sum'): """Initializes a DNNLinearCombinedRegressor instance. Args: model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. linear_optimizer: An instance of `tf_keras.optimizers.*` used to apply gradients to the linear part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. dnn_feature_columns: An iterable containing all the feature columns used by deep part of the model. All items in the set must be instances of classes derived from `FeatureColumn`. dnn_optimizer: An instance of `tf_keras.optimizers.*` used to apply gradients to the deep part of the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. dnn_hidden_units: List of hidden units per layer. All layers are fully connected. dnn_activation_fn: Activation function applied to each layer. If None, will use `tf.nn.relu`. dnn_dropout: When not None, the probability we will drop out a given coordinate. label_dimension: Number of regression targets per example. This is the size of the last dimension of the labels and logits `Tensor` objects (typically, these have shape `[batch_size, label_dimension]`). weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. config: RunConfig object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. batch_norm: Whether to use batch normalization after each hidden layer. linear_sparse_combiner: A string specifying how to reduce the linear model if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. For more details, see `tf.feature_column.linear_model`. Raises: ValueError: If both linear_feature_columns and dnn_features_columns are empty at the same time. """ self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) head = regression_head.RegressionHead( label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Regressor').set( 'DNNLinearCombined') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn_v2( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedRegressorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.DNNLinearCombinedRegressor']) # pylint: disable=missing-docstring class DNNLinearCombinedRegressor(estimator.Estimator): __doc__ = DNNLinearCombinedRegressorV2.__doc__.replace( 'SUM_OVER_BATCH_SIZE', 'SUM') def __init__(self, model_dir=None, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=tf.nn.relu, dnn_dropout=None, label_dimension=1, weight_column=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, batch_norm=False, linear_sparse_combiner='sum'): self._feature_columns = _validate_feature_columns( linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns) estimator._canned_estimator_api_gauge.get_cell('Regressor').set( 'DNNLinearCombined') # pylint: disable=protected-access head = head_lib._regression_head( # pylint: disable=protected-access label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) def _model_fn(features, labels, mode, config): """Call the _dnn_linear_combined_model_fn.""" return _dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=linear_feature_columns, linear_optimizer=linear_optimizer, dnn_feature_columns=dnn_feature_columns, dnn_optimizer=dnn_optimizer, dnn_hidden_units=dnn_hidden_units, dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm, linear_sparse_combiner=linear_sparse_combiner) super(DNNLinearCombinedRegressor, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) ================================================ FILE: tensorflow_estimator/python/estimator/canned/dnn_linear_combined_estimator_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for DNNLinearCombinedEstimatorV2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import dnn_linear_combined from tensorflow_estimator.python.estimator.canned import dnn_testing_utils from tensorflow_estimator.python.estimator.canned import linear_testing_utils from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.inputs import numpy_io def _dnn_only_estimator_fn(hidden_units, feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, config=None): return dnn_linear_combined.DNNLinearCombinedEstimatorV2( head=regression_head.RegressionHead( weight_column=weight_column, label_dimension=label_dimension, # Tests in core (from which this test inherits) test the sum loss. loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE), model_dir=model_dir, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, dnn_hidden_units=hidden_units, dnn_activation_fn=activation_fn, dnn_dropout=dropout, config=config) class DNNOnlyEstimatorEvaluateTest( dnn_testing_utils.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_only_estimator_fn) class DNNOnlyEstimatorPredictTest(dnn_testing_utils.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorPredictTest.__init__( self, _dnn_only_estimator_fn) class DNNOnlyEstimatorTrainTest(dnn_testing_utils.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorTrainTest.__init__( self, _dnn_only_estimator_fn) def _linear_only_estimator_fn(feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Ftrl', config=None, sparse_combiner='sum'): return dnn_linear_combined.DNNLinearCombinedEstimatorV2( head=regression_head.RegressionHead( weight_column=weight_column, label_dimension=label_dimension, # Tests in core (from which this test inherits) test the sum loss. loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE), model_dir=model_dir, linear_feature_columns=feature_columns, linear_optimizer=optimizer, config=config, linear_sparse_combiner=sparse_combiner) class LinearOnlyEstimatorEvaluateTest( linear_testing_utils.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__( self, _linear_only_estimator_fn) class LinearOnlyEstimatorPredictTest( linear_testing_utils.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorPredictTest.__init__( self, _linear_only_estimator_fn) class LinearOnlyEstimatorTrainTest( linear_testing_utils.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorTrainingTest.__init__( self, _linear_only_estimator_fn) class DNNLinearCombinedEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, dnn_optimizer='Adagrad', linear_optimizer='Ftrl'): linear_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns est = dnn_linear_combined.DNNLinearCombinedEstimatorV2( head=regression_head.RegressionHead(label_dimension=label_dimension), linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, dnn_hidden_units=(2, 2), model_dir=self._model_dir, dnn_optimizer=dnn_optimizer, linear_optimizer=linear_optimizer) # Train num_steps = 10 est.train(train_input_fn, steps=num_steps) # Evaluate scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # Predict predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # Export feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _create_input_fn(self, label_dimension, batch_size): """Creates input_fn for integration test.""" data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) return train_input_fn, eval_input_fn, predict_input_fn def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_numpy_input_fn_with_optimizer_instance(self): """Tests complete flow with optimizer_v2 instance.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, dnn_optimizer=tf_keras.optimizers.legacy.Adagrad(0.01), linear_optimizer=tf_keras.optimizers.legacy.Ftrl(0.01)) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/dnn_linear_combined_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for v2 version of dnn_linear_combined.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile from absl.testing import parameterized import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.feature_column import feature_column from tensorflow.python.feature_column import feature_column_v2 from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import dnn_linear_combined from tensorflow_estimator.python.estimator.canned import dnn_testing_utils from tensorflow_estimator.python.estimator.canned import linear_testing_utils from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False class DNNOnlyModelFnTest(dnn_testing_utils.BaseDNNModelFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNModelFnTest.__init__(self, self._dnn_only_model_fn) def _dnn_only_model_fn(self, features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, config=None): return dnn_linear_combined._dnn_linear_combined_model_fn_v2( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=[], dnn_hidden_units=hidden_units, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, dnn_activation_fn=activation_fn, dnn_dropout=dropout, config=config) # A function to mimic linear-regressor init reuse same tests. def _linear_regressor_fn(feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Ftrl', config=None, sparse_combiner='sum'): return dnn_linear_combined.DNNLinearCombinedRegressorV2( model_dir=model_dir, linear_feature_columns=feature_columns, linear_optimizer=optimizer, label_dimension=label_dimension, weight_column=weight_column, config=config, linear_sparse_combiner=sparse_combiner) class LinearOnlyRegressorEvaluationV2Test( linear_testing_utils.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) class LinearOnlyRegressorPredictV2Test( linear_testing_utils.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorPredictTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) class LinearOnlyRegressorIntegrationV2Test( linear_testing_utils.BaseLinearRegressorIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) class LinearOnlyRegressorTrainingV2Test( linear_testing_utils.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorTrainingTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) def _linear_classifier_fn(feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Ftrl', config=None, sparse_combiner='sum'): return dnn_linear_combined.DNNLinearCombinedClassifierV2( model_dir=model_dir, linear_feature_columns=feature_columns, linear_optimizer=optimizer, n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, config=config, linear_sparse_combiner=sparse_combiner) class LinearOnlyClassifierTrainingV2Test( linear_testing_utils.BaseLinearClassifierTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearClassifierTrainingTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) class LinearOnlyClassifierClassesEvaluationV2Test( linear_testing_utils.BaseLinearClassifierEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) class LinearOnlyClassifierPredictV2Test( linear_testing_utils.BaseLinearClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearClassifierPredictTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) class LinearOnlyClassifierIntegrationV2Test( linear_testing_utils.BaseLinearClassifierIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) @parameterized.parameters((feature_column_v2,)) class DNNLinearCombinedRegressorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow_helper(self, linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): est = dnn_linear_combined.DNNLinearCombinedRegressorV2( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): linear_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size) def _test_complete_flow_dnn_fc_v1(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): del fc_impl linear_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size) def _test_complete_flow_linear_fc_v1(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): del fc_impl linear_feature_columns = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size) def _test_numpy_input_fn_helper(self, fc_impl, fn_to_run): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) fn_to_run( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, fc_impl=fc_impl) def test_numpy_input_fn_basic(self, fc_impl): self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow) def test_numpy_input_fn_dnn_fc_v1(self, fc_impl): with self.assertRaisesRegexp( ValueError, r'Received a feature column from TensorFlow v1'): self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_dnn_fc_v1) def test_numpy_input_fn_linear_fc_v1(self, fc_impl): with self.assertRaisesRegexp( ValueError, r'Received a feature column from TensorFlow v1'): self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_linear_fc_v1) def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return label_dimension = 1 batch_size = 10 data = np.linspace(0., 2., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) fn_to_run( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, fc_impl=fc_impl) def test_pandas_input_fn_basic(self, fc_impl): self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow) def test_pandas_input_fn_dnn_fc_v1(self, fc_impl): with self.assertRaisesRegexp( ValueError, r'Received a feature column from TensorFlow v1'): self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_dnn_fc_v1) def test_pandas_input_fn_linear_fc_v1(self, fc_impl): with self.assertRaisesRegexp( ValueError, r'Received a feature column from TensorFlow v1'): self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_linear_fc_v1) def _test_input_fn_from_parse_example_helper(self, fc_impl, fn_to_run): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) features.pop('y') return features, None fn_to_run( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, fc_impl=fc_impl) def test_input_fn_from_parse_example_basic(self, fc_impl): self._test_input_fn_from_parse_example_helper(fc_impl, self._test_complete_flow) def test_input_fn_from_parse_example_dnn_fc_v1(self, fc_impl): with self.assertRaisesRegexp( ValueError, r'Received a feature column from TensorFlow v1'): self._test_input_fn_from_parse_example_helper( fc_impl, self._test_complete_flow_dnn_fc_v1) def test_input_fn_from_parse_example_linear_fc_v1(self, fc_impl): with self.assertRaisesRegexp( ValueError, r'Received a feature column from TensorFlow v1'): self._test_input_fn_from_parse_example_helper( fc_impl, self._test_complete_flow_linear_fc_v1) # A function to mimic dnn-classifier init reuse same tests. def _dnn_classifier_fn(hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', config=None): return dnn_linear_combined.DNNLinearCombinedClassifierV2( model_dir=model_dir, dnn_hidden_units=hidden_units, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, config=config) class DNNOnlyClassifierEvaluateV2Test( dnn_testing_utils.BaseDNNClassifierEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) class DNNOnlyClassifierPredictV2Test( dnn_testing_utils.BaseDNNClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNClassifierPredictTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) class DNNOnlyClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNClassifierTrainTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) # A function to mimic dnn-regressor init reuse same tests. def _dnn_regressor_fn(hidden_units, feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Adagrad', config=None): return dnn_linear_combined.DNNLinearCombinedRegressorV2( model_dir=model_dir, dnn_hidden_units=hidden_units, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, label_dimension=label_dimension, weight_column=weight_column, config=config) class DNNOnlyRegressorEvaluateV2Test( dnn_testing_utils.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) class DNNOnlyRegressorPredictV2Test( dnn_testing_utils.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorPredictTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) class DNNOnlyRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorTrainTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) @parameterized.parameters((feature_column_v2,)) class DNNLinearCombinedClassifierIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _as_label(self, data_in_float): return np.rint(data_in_float).astype(np.int64) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size, fc_impl): linear_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns est = dnn_linear_combined.DNNLinearCombinedClassifierV2( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self, fc_impl): """Tests complete flow with numpy_input_fn.""" n_classes = 3 input_dimension = 2 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) y_data = self._as_label(np.reshape(data[:batch_size], (batch_size, 1))) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size, fc_impl=fc_impl) def test_pandas_input_fn(self, fc_impl): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return input_dimension = 1 n_classes = 2 batch_size = 10 data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(self._as_label(data)) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size, fc_impl=fc_impl) def test_input_fn_from_parse_example(self, fc_impl): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 n_classes = 3 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List( value=self._as_label(datum[:1]))), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size, fc_impl=fc_impl) @parameterized.parameters((feature_column_v2,)) class DNNLinearCombinedTests(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def test_train_op_calls_both_dnn_and_linear(self, fc_impl): dnn_opt = tf_keras.optimizers.legacy.SGD(1.) linear_opt = tf_keras.optimizers.legacy.SGD(1.) x_column = fc_impl.numeric_column('x') input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[0.], [1.]])}, y=np.array([[0.], [1.]]), batch_size=1, shuffle=False) est = dnn_linear_combined.DNNLinearCombinedClassifierV2( linear_feature_columns=[x_column], # verifies linear_optimizer is used only for linear part. linear_optimizer=linear_opt, dnn_hidden_units=(2, 2), dnn_feature_columns=[x_column], # verifies dnn_optimizer is used only for dnn part. dnn_optimizer=dnn_opt, model_dir=self._model_dir) num_steps = 1 est.train(input_fn, steps=num_steps) # verifies train_op fires linear minimize op self.assertEqual(num_steps, est.get_variable_value(linear_opt.iterations.name)) # verifies train_op fires dnn optmizer self.assertEqual(num_steps, est.get_variable_value(dnn_opt.iterations.name)) def test_dnn_and_linear_logits_are_added(self, fc_impl): with tf.Graph().as_default(): tf.Variable([[1.0]], name='linear/linear_model/x/weights') tf.Variable([2.0], name='linear/linear_model/bias_weights') tf.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel') tf.Variable([4.0], name='dnn/hiddenlayer_0/bias') tf.Variable([[5.0]], name='dnn/logits/kernel') tf.Variable([6.0], name='dnn/logits/bias') tf.Variable(1, name='global_step', dtype=tf.dtypes.int64) linear_testing_utils.save_variables_to_ckpt(self._model_dir) x_column = fc_impl.numeric_column('x') est = dnn_linear_combined.DNNLinearCombinedRegressorV2( linear_feature_columns=[x_column], dnn_hidden_units=[1], dnn_feature_columns=[x_column], model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # linear logits = 10*1 + 2 = 12 # dnn logits = (10*3 + 4)*5 + 6 = 176 # logits = dnn + linear = 176 + 12 = 188 self.assertAllClose({ prediction_keys.PredictionKeys.PREDICTIONS: [188.], }, next(est.predict(input_fn=input_fn))) @parameterized.parameters((feature_column_v2,)) class DNNLinearCombinedWarmStartingTest(tf.test.TestCase): def setUp(self): # Create a directory to save our old checkpoint and vocabularies to. self._ckpt_and_vocab_dir = tempfile.mkdtemp() # Make a dummy input_fn. def _input_fn(): features = { 'age': [[23.], [31.]], 'city': [['Palo Alto'], ['Mountain View']], } return features, [0, 1] self._input_fn = _input_fn def tearDown(self): # Clean up checkpoint / vocab dir. tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._ckpt_and_vocab_dir) def test_classifier_basic_warm_starting(self, fc_impl): """Tests correctness of DNNLinearCombinedClassifier default warm-start.""" age = fc_impl.numeric_column('age') city = fc_impl.embedding_column( fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedClassifier and train to save a checkpoint. dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifierV2( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, n_classes=4, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedClassifier, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). # To avoid optimizer naming issue during warm start, when to create the # optimizer instance, the dnn_optimizer needs to be created first # before the linear_optimizer, since this is the order pre-defined # in the model function. # Create a default graph context to make sure the optimizer instance is # created within Graph v1 to make it consistent with estimator Graph. with tf.Graph().as_default(): warm_started_dnn_lc_classifier = ( dnn_linear_combined.DNNLinearCombinedClassifierV2( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], n_classes=4, dnn_optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), linear_optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), warm_start_from=dnn_lc_classifier.model_dir)) warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_classifier.get_variable_names(): if 'learning_rate' in variable_name: self.assertAllClose( 0.0, warm_started_dnn_lc_classifier.get_variable_value(variable_name)) else: self.assertAllClose( dnn_lc_classifier.get_variable_value(variable_name), warm_started_dnn_lc_classifier.get_variable_value(variable_name)) def test_regressor_basic_warm_starting(self, fc_impl): """Tests correctness of DNNLinearCombinedRegressor default warm-start.""" age = fc_impl.numeric_column('age') city = fc_impl.embedding_column( fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedRegressor and train to save a checkpoint. dnn_lc_regressor = dnn_linear_combined.DNNLinearCombinedRegressorV2( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedRegressor, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). # To avoid optimizer naming issue during warm start, when to create the # optimizer instance, the dnn_optimizer needs to be created first # before the linear_optimizer, since this is the order pre-defined # in the model function. # Create a default graph context to make sure the optimizer instance is # created within Graph v1 to make it consistent with estimator Graph. with tf.Graph().as_default(): warm_started_dnn_lc_regressor = ( dnn_linear_combined.DNNLinearCombinedRegressorV2( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], dnn_optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), linear_optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), warm_start_from=dnn_lc_regressor.model_dir)) warm_started_dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_regressor.get_variable_names(): if 'learning_rate' in variable_name: self.assertAllClose( 0.0, warm_started_dnn_lc_regressor.get_variable_value(variable_name)) else: self.assertAllClose( dnn_lc_regressor.get_variable_value(variable_name), warm_started_dnn_lc_regressor.get_variable_value(variable_name)) def test_warm_starting_selective_variables(self, fc_impl): """Tests selecting variables to warm-start.""" age = fc_impl.numeric_column('age') city = fc_impl.embedding_column( fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedClassifier and train to save a checkpoint. dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifierV2( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, n_classes=4, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedClassifier, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_classifier = ( dnn_linear_combined.DNNLinearCombinedClassifierV2( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], n_classes=4, linear_optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), dnn_optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), # The provided regular expression will only warm-start the deep # portion of the model. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_lc_classifier.model_dir, vars_to_warm_start='.*(dnn).*'))) warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_classifier.get_variable_names(): if 'dnn' in variable_name: if 'learning_rate' in variable_name: self.assertAllClose( 0.0, warm_started_dnn_lc_classifier.get_variable_value(variable_name)) else: self.assertAllClose( dnn_lc_classifier.get_variable_value(variable_name), warm_started_dnn_lc_classifier.get_variable_value(variable_name)) elif 'linear' in variable_name: linear_values = warm_started_dnn_lc_classifier.get_variable_value( variable_name) # Since they're not warm-started, the linear weights will be # zero-initialized. self.assertAllClose(np.zeros_like(linear_values), linear_values) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/dnn_test_fc_v2.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for dnn.py with feature_column_v2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile from unittest.mock import patch from absl.testing import parameterized import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.feature_column import feature_column_v2 from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import dnn_testing_utils from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False def _dnn_classifier_fn(*args, **kwargs): return dnn.DNNClassifierV2(*args, **kwargs) class DNNModelFnV2Test(dnn_testing_utils.BaseDNNModelFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNModelFnTest.__init__( self, dnn.dnn_model_fn_v2, fc_impl=feature_column_v2) class DNNLogitFnV2Test(dnn_testing_utils.BaseDNNLogitFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNLogitFnTest.__init__( self, dnn.dnn_logit_fn_builder_v2, fc_impl=feature_column_v2) class DNNWarmStartingV2Test(dnn_testing_utils.BaseDNNWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNWarmStartingTest.__init__( self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column_v2) class DNNClassifierEvaluateV2Test( dnn_testing_utils.BaseDNNClassifierEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) class DNNClassifierPredictV2Test(dnn_testing_utils.BaseDNNClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNClassifierPredictTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) class DNNClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNClassifierTrainTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) def _dnn_regressor_fn(*args, **kwargs): return dnn.DNNRegressorV2(*args, **kwargs) class DNNRegressorEvaluateV2Test(dnn_testing_utils.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) class DNNRegressorPredictV2Test(dnn_testing_utils.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorPredictTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) class DNNRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils.BaseDNNRegressorTrainTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) def _queue_parsed_features(feature_map): tensors_to_enqueue = [] keys = [] for key, tensor in six.iteritems(feature_map): keys.append(key) tensors_to_enqueue.append(tensor) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = tf.queue.FIFOQueue(capacity=100, dtypes=queue_dtypes) tf.compat.v1.train.queue_runner.add_queue_runner( tf.compat.v1.train.queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} class DNNRegressorIntegrationTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNRegressorV2( hidden_units=(2, 2), feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return label_dimension = 1 batch_size = 10 data = np.linspace(0., 2., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) class DNNClassifierIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _as_label(self, data_in_float): return np.rint(data_in_float).astype(np.int64) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNClassifierV2( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" n_classes = 3 input_dimension = 2 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return input_dimension = 1 n_classes = 3 batch_size = 10 data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(self._as_label(data)) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 n_classes = 3 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List( value=self._as_label(datum[:1]))), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) class DNNTrainingMode(tf.test.TestCase): """Tests that training mode propagates to feature columns correctly.""" def setUp(self): self._model_dir = tempfile.mkdtemp() self._label_dimension = 1 self._batch_size = 10 def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _create_data(self): data = np.linspace( 0., 2., self._batch_size * self._label_dimension, dtype=np.float32) return data.reshape(self._batch_size, self._label_dimension) def _get_estimator(self): feature_columns = [ tf.feature_column.numeric_column('x', shape=(self._label_dimension,)) ] return dnn.DNNRegressorV2( hidden_units=(2, 2), feature_columns=feature_columns, label_dimension=self._label_dimension, model_dir=self._model_dir) def test_train_vs_eval_mode(self): data = self._create_data() train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=self._batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=self._batch_size, shuffle=False) est = self._get_estimator() with patch.object( tf_keras.layers.DenseFeatures, 'call', return_value=data) as mock_dense_features_call: est.train(train_input_fn, steps=10) est.evaluate(eval_input_fn) train_args, eval_args = mock_dense_features_call.call_args_list # DenseFeature should have been called with training = True in train. _, train_training_kwarg = train_args self.assertTrue(train_training_kwarg['training']) # DenseFeature should have been called with training = False in eval. _, eval_training_kwarg = eval_args self.assertFalse(eval_training_kwarg['training']) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/dnn_testing_utils.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utils to be used in testing DNN estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.head import base_head from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.mode_keys import ModeKeys # pylint rules which are disabled by default for test files. # pylint: disable=invalid-name,protected-access,missing-docstring # Names of variables created by model. LEARNING_RATE_NAME = 'dnn/regression_head/dnn/learning_rate' HIDDEN_WEIGHTS_NAME_PATTERN = 'dnn/hiddenlayer_%d/kernel' HIDDEN_BIASES_NAME_PATTERN = 'dnn/hiddenlayer_%d/bias' BATCH_NORM_BETA_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/beta' BATCH_NORM_GAMMA_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/gamma' BATCH_NORM_MEAN_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/moving_mean' BATCH_NORM_VARIANCE_NAME_PATTERN = ( 'dnn/hiddenlayer_%d/batchnorm_%d/moving_variance') LOGITS_WEIGHTS_NAME = 'dnn/logits/kernel' LOGITS_BIASES_NAME = 'dnn/logits/bias' OCCUPATION_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/' 'occupation_embedding/embedding_weights') CITY_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/' 'city_embedding/embedding_weights') def assert_close(expected, actual, rtol=1e-04, message='', name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs((expected - actual) / expected, 'diff') rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=('Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), summarize=expected.get_shape().num_elements(), name=scope) def create_checkpoint(weights_and_biases, global_step, model_dir, batch_norm_vars=None): """Create checkpoint file with provided model weights. Args: weights_and_biases: Iterable of tuples of weight and bias values. global_step: Initial global step to save in checkpoint. model_dir: Directory into which checkpoint is saved. batch_norm_vars: Variables used for batch normalization. """ weights, biases = zip(*weights_and_biases) if batch_norm_vars: assert len(batch_norm_vars) == len(weights_and_biases) - 1 (bn_betas, bn_gammas, bn_means, bn_variances) = zip(*batch_norm_vars) model_weights = {} # Hidden layer weights. for i in range(0, len(weights) - 1): model_weights[HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i] model_weights[HIDDEN_BIASES_NAME_PATTERN % i] = biases[i] if batch_norm_vars: model_weights[BATCH_NORM_BETA_NAME_PATTERN % (i, i)] = bn_betas[i] model_weights[BATCH_NORM_GAMMA_NAME_PATTERN % (i, i)] = bn_gammas[i] model_weights[BATCH_NORM_MEAN_NAME_PATTERN % (i, i)] = bn_means[i] model_weights[BATCH_NORM_VARIANCE_NAME_PATTERN % (i, i)] = bn_variances[i] # Output layer weights. model_weights[LOGITS_WEIGHTS_NAME] = weights[-1] model_weights[LOGITS_BIASES_NAME] = biases[-1] with tf.Graph().as_default(): # Create model variables. for k, v in six.iteritems(model_weights): tf.Variable(v, name=k, dtype=tf.dtypes.float32) # Create non-model variables. global_step_var = tf.compat.v1.train.create_global_step() # Initialize vars and save checkpoint. with tf.compat.v1.Session() as sess: tf.compat.v1.initializers.global_variables().run() global_step_var.assign(global_step).eval() tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) def mock_head(testcase, hidden_units, logits_dimension, expected_logits): """Returns a mock head that validates logits values and variable names.""" hidden_weights_names = [(HIDDEN_WEIGHTS_NAME_PATTERN + ':0') % i for i in range(len(hidden_units))] hidden_biases_names = [ (HIDDEN_BIASES_NAME_PATTERN + ':0') % i for i in range(len(hidden_units)) ] expected_var_names = ( hidden_weights_names + hidden_biases_names + [LOGITS_WEIGHTS_NAME + ':0', LOGITS_BIASES_NAME + ':0']) def _create_tpu_estimator_spec(features, mode, logits, labels, trainable_variables=None, train_op_fn=None, optimizer=None, update_ops=None): del features, labels # Not used. trainable_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) testcase.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) loss = tf.constant(1.) assert_logits = assert_close( expected_logits, logits, message='Failed for mode={}. '.format(mode)) with tf.control_dependencies([assert_logits]): if mode == ModeKeys.TRAIN: if train_op_fn is not None: train_op = train_op_fn(loss) elif optimizer is not None: train_op = optimizer.get_updates(loss, trainable_variables) if update_ops is not None: train_op = tf.group(train_op, *update_ops) return model_fn._TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op) elif mode == ModeKeys.EVAL: return model_fn._TPUEstimatorSpec(mode=mode, loss=tf.identity(loss)) elif mode == ModeKeys.PREDICT: return model_fn._TPUEstimatorSpec( mode=mode, predictions={'logits': tf.identity(logits)}) else: testcase.fail('Invalid mode: {}'.format(mode)) def _create_estimator_spec(features, mode, logits, labels, trainable_variables=None, train_op_fn=None, optimizer=None, update_ops=None): tpu_spec = _create_tpu_estimator_spec(features, mode, logits, labels, trainable_variables, train_op_fn, optimizer, update_ops) return tpu_spec.as_estimator_spec() head = tf.compat.v1.test.mock.NonCallableMagicMock(spec=base_head.Head) head.logits_dimension = logits_dimension head._create_tpu_estimator_spec = tf.compat.v1.test.mock.MagicMock( wraps=_create_tpu_estimator_spec) head.create_estimator_spec = tf.compat.v1.test.mock.MagicMock( wraps=_create_estimator_spec) return head def mock_optimizer(testcase, hidden_units, expected_loss=None): """Creates a mock optimizer to test the train method. Args: testcase: A TestCase instance. hidden_units: Iterable of integer sizes for the hidden layers. expected_loss: If given, will assert the loss value. Returns: A mock Optimizer. """ hidden_weights_names = [(HIDDEN_WEIGHTS_NAME_PATTERN + ':0') % i for i in range(len(hidden_units))] hidden_biases_names = [ (HIDDEN_BIASES_NAME_PATTERN + ':0') % i for i in range(len(hidden_units)) ] expected_var_names = ( hidden_weights_names + hidden_biases_names + [LOGITS_WEIGHTS_NAME + ':0', LOGITS_BIASES_NAME + ':0']) class _Optimizer(tf_keras.optimizers.legacy.Optimizer): def get_updates(self, loss, params): trainable_vars = params testcase.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. testcase.assertEquals(0, loss.shape.ndims) if expected_loss is None: if self.iterations is not None: return [self.iterations.assign_add(1).op] return [tf.no_op()] assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): if self.iterations is not None: return [self.iterations.assign_add(1).op] return [tf.no_op()] def get_config(self): config = super(_Optimizer, self).get_config() return config optimizer = _Optimizer(name='my_optimizer') return optimizer class BaseDNNModelFnTest(object): """Tests that _dnn_model_fn passes expected logits to mock head.""" def __init__(self, dnn_model_fn, fc_impl=feature_column_v2): self._dnn_model_fn = dnn_model_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_logits(self, mode, hidden_units, logits_dimension, inputs, expected_logits): """Tests that the expected logits are passed to mock head.""" with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = self._dnn_model_fn( features={'age': tf.constant(inputs)}, labels=tf.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], optimizer=mock_optimizer(self, hidden_units)) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode)) def test_one_dim_logits(self): """Tests one-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]] = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]] logits = [[-1*2.38 +1*0 +0.3]] = [[-2.08]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=1, inputs=[[10.]], expected_logits=[[-2.08]]) def test_multi_dim_logits(self): """Tests multi-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]] = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]] logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38]] = [[-2.08, 2.08, 1.19]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10.]], expected_logits=[[-2.08, 2.08, 1.19]]) def test_multi_example_multi_dim_logits(self): """Tests multiple examples and multi-dimensional logits. input_layer = [[10], [5]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)], [relu(0.6*5 +0.1), relu(0.5*5 -0.1)]] = [[6.1, 4.9], [3.1, 2.4]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)], [relu(1*3.1 -0.8*2.4 +0.2), relu(0.8*3.1 -1*2.4 -0.1)]] = [[2.38, 0], [1.38, 0]] logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38], [-1*1.38 +0.3, 1*1.38 -0.3, 0.5*1.38]] = [[-2.08, 2.08, 1.19], [-1.08, 1.08, 0.69]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10.], [5.]], expected_logits=[[-2.08, 2.08, 1.19], [-1.08, 1.08, .69]]) def test_multi_dim_input_one_dim_logits(self): """Tests multi-dimensional inputs and one-dimensional logits. input_layer = [[10, 8]] hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]] = [[1.3, 0.9]] hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]] = [[0.78, relu(-0.06)]] = [[0.78, 0]] logits = [[-1*0.78 +1*0 +0.3]] = [[-0.48]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=1, inputs=[[10., 8.]], expected_logits=[[-0.48]]) def test_multi_dim_input_multi_dim_logits(self): """Tests multi-dimensional inputs and multi-dimensional logits. input_layer = [[10, 8]] hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]] = [[1.3, 0.9]] hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]] = [[0.78, relu(-0.06)]] = [[0.78, 0]] logits = [[-1*0.78 + 0.3, 1*0.78 -0.3, 0.5*0.78]] = [[-0.48, 0.48, 0.39]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10., 8.]], expected_logits=[[-0.48, 0.48, 0.39]]) def test_multi_feature_column_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = self._dnn_model_fn( features={ 'age': tf.constant(inputs[0]), 'height': tf.constant(inputs[1]) }, labels=tf.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column('age'), self._fc_impl.numeric_column('height') ], optimizer=mock_optimizer(self, hidden_units)) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode)) def test_multi_feature_column_mix_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = self._dnn_model_fn( features={ 'age': tf.constant(inputs[0]), 'height': tf.constant(inputs[1]) }, labels=tf.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ tf.feature_column.numeric_column('age'), tf.feature_column.numeric_column('height') ], optimizer=mock_optimizer(self, hidden_units)) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode)) def test_features_tensor_raises_value_error(self): """Tests that passing a Tensor for features raises a ValueError.""" hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[0, 0, 0]] with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) with self.assertRaisesRegexp(ValueError, 'features should be a dict'): self._dnn_model_fn( features=tf.constant(inputs), labels=tf.constant([[1]]), mode=ModeKeys.TRAIN, head=head, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], optimizer=mock_optimizer(self, hidden_units)) class BaseDNNLogitFnTest(object): """Tests correctness of logits calculated from _dnn_logit_fn_builder.""" def __init__(self, dnn_logit_fn_builder, fc_impl=feature_column_v2): self._dnn_logit_fn_builder = dnn_logit_fn_builder self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_logits(self, mode, hidden_units, logits_dimension, inputs, expected_logits, batch_norm=False): """Tests that the expected logits are calculated.""" with tf.Graph().as_default(): # Global step needed for MonitoredSession, which is in turn used to # explicitly set variable weights through a checkpoint. tf.compat.v1.train.create_global_step() logit_fn = self._dnn_logit_fn_builder( units=logits_dimension, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], activation_fn=tf.nn.relu, dropout=None, batch_norm=batch_norm) logits = logit_fn(features={'age': tf.constant(inputs)}, mode=mode) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits)) def test_one_dim_logits(self): """Tests one-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]] = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]] logits = [[-1*2.38 +1*0 +0.3]] = [[-2.08]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=1, inputs=[[10.]], expected_logits=[[-2.08]]) def test_one_dim_logits_with_batch_norm(self): """Tests one-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +1), relu(0.5*10 -1)]] = [[7, 4]] hidden_layer_0 = [[relu(0.6*20 +1), relu(0.5*20 -1)]] = [[13, 9]] batch_norm_0, training (epsilon = 0.001): mean1 = 1/2*(7+13) = 10, variance1 = 1/2*(3^2+3^2) = 9 x11 = (7-10)/sqrt(9+0.001) = -0.999944449, x21 = (13-10)/sqrt(9+0.001) = 0.999944449, mean2 = 1/2*(4+9) = 6.5, variance2 = 1/2*(2.5^2+.2.5^2) = 6.25 x12 = (4-6.5)/sqrt(6.25+0.001) = -0.99992001, x22 = (9-6.5)/sqrt(6.25+0.001) = 0.99992001, logits = [[-1*(-0.999944449) + 2*(-0.99992001) + 0.3], [-1*0.999944449 + 2*0.99992001 + 0.3]] = [[-0.699895571],[1.299895571]] batch_norm_0, not training (epsilon = 0.001): moving_mean1 = 0, moving_variance1 = 1 x11 = (7-0)/sqrt(1+0.001) = 6.996502623, x21 = (13-0)/sqrt(1+0.001) = 12.993504871, moving_mean2 = 0, moving_variance2 = 1 x12 = (4-0)/sqrt(1+0.001) = 3.998001499, x22 = (9-0)/sqrt(1+0.001) = 8.995503372, logits = [[-1*6.996502623 + 2*3.998001499 + 0.3], [-1*12.993504871 + 2*8.995503372 + 0.3]] = [[1.299500375],[5.297501873]] """ base_global_step = 100 create_checkpoint( ( ([[.6, .5]], [1., -1.]), ([[-1.], [2.]], [.3]), ), base_global_step, self._model_dir, batch_norm_vars=( [ [0, 0], # beta. [1, 1], # gamma. [0, 0], # moving mean. [1, 1], # moving variance. ],)) self._test_logits( ModeKeys.TRAIN, hidden_units=[2], logits_dimension=1, inputs=[[10.], [20.]], expected_logits=[[-0.699895571], [1.299895571]], batch_norm=True) for mode in [ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=[2], logits_dimension=1, inputs=[[10.], [20.]], expected_logits=[[1.299500375], [5.297501873]], batch_norm=True) def test_multi_dim_logits(self): """Tests multi-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]] = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]] logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38]] = [[-2.08, 2.08, 1.19]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10.]], expected_logits=[[-2.08, 2.08, 1.19]]) def test_multi_example_multi_dim_logits(self): """Tests multiple examples and multi-dimensional logits. input_layer = [[10], [5]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)], [relu(0.6*5 +0.1), relu(0.5*5 -0.1)]] = [[6.1, 4.9], [3.1, 2.4]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)], [relu(1*3.1 -0.8*2.4 +0.2), relu(0.8*3.1 -1*2.4 -0.1)]] = [[2.38, 0], [1.38, 0]] logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38], [-1*1.38 +0.3, 1*1.38 -0.3, 0.5*1.38]] = [[-2.08, 2.08, 1.19], [-1.08, 1.08, 0.69]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10.], [5.]], expected_logits=[[-2.08, 2.08, 1.19], [-1.08, 1.08, .69]]) def test_multi_dim_input_one_dim_logits(self): """Tests multi-dimensional inputs and one-dimensional logits. input_layer = [[10, 8]] hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]] = [[1.3, 0.9]] hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]] = [[0.78, relu(-0.06)]] = [[0.78, 0]] logits = [[-1*0.78 +1*0 +0.3]] = [[-0.48]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=1, inputs=[[10., 8.]], expected_logits=[[-0.48]]) def test_multi_dim_input_multi_dim_logits(self): """Tests multi-dimensional inputs and multi-dimensional logits. input_layer = [[10, 8]] hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]] = [[1.3, 0.9]] hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]] = [[0.78, relu(-0.06)]] = [[0.78, 0]] logits = [[-1*0.78 + 0.3, 1*0.78 -0.3, 0.5*0.78]] = [[-0.48, 0.48, 0.39]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10., 8.]], expected_logits=[[-0.48, 0.48, 0.39]]) def test_multi_feature_column_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: with tf.Graph().as_default(): # Global step needed for MonitoredSession, which is in turn used to # explicitly set variable weights through a checkpoint. tf.compat.v1.train.create_global_step() logit_fn = self._dnn_logit_fn_builder( units=logits_dimension, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column('age'), self._fc_impl.numeric_column('height') ], activation_fn=tf.nn.relu, dropout=None, batch_norm=False) logits = logit_fn( features={ 'age': tf.constant(inputs[0]), 'height': tf.constant(inputs[1]) }, mode=mode) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits)) def test_multi_feature_column_mix_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: with tf.Graph().as_default(): # Global step needed for MonitoredSession, which is in turn used to # explicitly set variable weights through a checkpoint. tf.compat.v1.train.create_global_step() logit_fn = self._dnn_logit_fn_builder( units=logits_dimension, hidden_units=hidden_units, feature_columns=[ tf.feature_column.numeric_column('age'), tf.feature_column.numeric_column('height') ], activation_fn=tf.nn.relu, dropout=None, batch_norm=False) logits = logit_fn( features={ 'age': tf.constant(inputs[0]), 'height': tf.constant(inputs[1]) }, mode=mode) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits)) class BaseDNNWarmStartingTest(object): def __init__(self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column_v2): self._dnn_classifier_fn = _dnn_classifier_fn self._dnn_regressor_fn = _dnn_regressor_fn self._fc_impl = fc_impl def setUp(self): # Create a directory to save our old checkpoint and vocabularies to. self._ckpt_and_vocab_dir = tempfile.mkdtemp() # Reset the default graph in each test method to avoid the Keras optimizer # naming issue during warm starting. tf.compat.v1.reset_default_graph() # Make a dummy input_fn. def _input_fn(): features = { 'city': [['Palo Alto'], ['Mountain View']], 'locality': [['Palo Alto'], ['Mountain View']], 'occupation': [['doctor'], ['consultant']] } return features, [0, 1] self._input_fn = _input_fn def tearDown(self): # Clean up checkpoint / vocab dir. tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._ckpt_and_vocab_dir) def assertAllNotClose(self, t1, t2): """Helper assert for arrays.""" sum_of_abs_diff = 0.0 for x, y in zip(t1, t2): try: for a, b in zip(x, y): sum_of_abs_diff += abs(b - a) except TypeError: sum_of_abs_diff += abs(y - x) self.assertGreater(sum_of_abs_diff, 0) def test_classifier_basic_warm_starting(self): """Tests correctness of DNNClassifier default warm-start.""" city = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNClassifier and train to save a checkpoint. dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], n_classes=4, optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), warm_start_from=dnn_classifier.model_dir) warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_classifier.get_variable_names(): # Learning rate is also checkpointed in V2 optimizer. So we need to make # sure it uses the new value after warm started. if 'learning_rate' in variable_name: self.assertAllClose( 0.0, warm_started_dnn_classifier.get_variable_value(variable_name)) else: self.assertAllClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) def test_regressor_basic_warm_starting(self): """Tests correctness of DNNRegressor default warm-start.""" city = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNRegressor and train to save a checkpoint. dnn_regressor = self._dnn_regressor_fn( hidden_units=[256, 128], feature_columns=[city], model_dir=self._ckpt_and_vocab_dir, optimizer='SGD') dnn_regressor.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNRegressor, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_dnn_regressor = self._dnn_regressor_fn( hidden_units=[256, 128], feature_columns=[city], optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), warm_start_from=dnn_regressor.model_dir) warm_started_dnn_regressor.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_regressor.get_variable_names(): # Learning rate is also checkpointed in V2 optimizer. So we need to make # sure it uses the new value after warm started. if 'learning_rate' in variable_name: self.assertAllClose( 0.0, warm_started_dnn_regressor.get_variable_value(variable_name)) else: self.assertAllClose( dnn_regressor.get_variable_value(variable_name), warm_started_dnn_regressor.get_variable_value(variable_name)) def test_warm_starting_selective_variables(self): """Tests selecting variables to warm-start.""" city = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNClassifier and train to save a checkpoint. dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], n_classes=4, optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), # The provided regular expression will only warm-start the city # embedding, not the kernels and biases of the hidden weights. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, vars_to_warm_start='.*(city).*')) warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_classifier.get_variable_names(): if 'city' in variable_name: self.assertAllClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) elif 'bias' in variable_name: # Hidden layer biases are zero-initialized. bias_values = warm_started_dnn_classifier.get_variable_value( variable_name) self.assertAllClose(np.zeros_like(bias_values), bias_values) elif 'kernel' in variable_name: # We can't override the glorot uniform initializer used for the kernels # in the dense layers, so just make sure we're not getting the same # values from the old checkpoint. self.assertAllNotClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) def test_warm_starting_with_vocab_remapping(self): """Tests warm-starting with vocab remapping.""" vocab_list = ['doctor', 'lawyer', 'consultant'] vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab') with open(vocab_file, 'w') as f: f.write('\n'.join(vocab_list)) occupation = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_file( 'occupation', vocabulary_file=vocab_file, vocabulary_size=len(vocab_list)), dimension=2) # Create a DNNClassifier and train to save a checkpoint. dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[occupation], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). Use a new FeatureColumn with a # different vocabulary for occupation. new_vocab_list = ['doctor', 'consultant', 'engineer'] new_vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'new_occupation_vocab') with open(new_vocab_file, 'w') as f: f.write('\n'.join(new_vocab_list)) new_occupation = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_file( 'occupation', vocabulary_file=new_vocab_file, vocabulary_size=len(new_vocab_list)), dimension=2) # We can create our VocabInfo object from the new and old occupation # FeatureColumn's. occupation_vocab_info = estimator.VocabInfo( new_vocab=new_occupation.categorical_column.vocabulary_file, new_vocab_size=new_occupation.categorical_column.vocabulary_size, num_oov_buckets=new_occupation.categorical_column.num_oov_buckets, old_vocab=occupation.categorical_column.vocabulary_file, old_vocab_size=occupation.categorical_column.vocabulary_size, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. backup_initializer=tf.compat.v1.initializers.random_uniform( minval=0.39, maxval=0.39)) warm_started_dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[occupation], n_classes=4, optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, var_name_to_vocab_info={ OCCUPATION_EMBEDDING_NAME: occupation_vocab_info }, # Explicitly providing None here will only warm-start variables # referenced in var_name_to_vocab_info (no hidden weights will be # warmstarted). vars_to_warm_start=None)) warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # 'doctor' was ID-0 and still ID-0. self.assertAllClose( dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[0, :], warm_started_dnn_classifier.get_variable_value( OCCUPATION_EMBEDDING_NAME)[0, :]) # 'consultant' was ID-2 and now ID-1. self.assertAllClose( dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[2, :], warm_started_dnn_classifier.get_variable_value( OCCUPATION_EMBEDDING_NAME)[1, :]) # 'engineer' is a new entry and should be initialized with the # backup_initializer in VocabInfo. self.assertAllClose([0.39] * 2, warm_started_dnn_classifier.get_variable_value( OCCUPATION_EMBEDDING_NAME)[2, :]) for variable_name in warm_started_dnn_classifier.get_variable_names(): if 'bias' in variable_name: # Hidden layer biases are zero-initialized. bias_values = warm_started_dnn_classifier.get_variable_value( variable_name) self.assertAllClose(np.zeros_like(bias_values), bias_values) elif 'kernel' in variable_name: # We can't override the glorot uniform initializer used for the kernels # in the dense layers, so just make sure we're not getting the same # values from the old checkpoint. self.assertAllNotClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) def test_warm_starting_with_naming_change(self): """Tests warm-starting with a Tensor name remapping.""" locality = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'locality', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNClassifier and train to save a checkpoint. dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[locality], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). city = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) warm_started_dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], n_classes=4, optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), # The 'city' variable correspond to the 'locality' variable in the # previous model. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, var_name_to_prev_var_name={ CITY_EMBEDDING_NAME: CITY_EMBEDDING_NAME.replace('city', 'locality') })) warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_classifier.get_variable_names(): if 'city' in variable_name: self.assertAllClose( dnn_classifier.get_variable_value( CITY_EMBEDDING_NAME.replace('city', 'locality')), warm_started_dnn_classifier.get_variable_value(CITY_EMBEDDING_NAME)) # Learning rate is also checkpointed in V2 optimizer. So we need to make # sure it uses the new value after warm started. elif 'learning_rate' in variable_name: self.assertAllClose( 0.0, warm_started_dnn_classifier.get_variable_value(variable_name)) else: self.assertAllClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) class BaseDNNClassifierEvaluateTest(object): def __init__(self, dnn_classifier_fn, fc_impl=feature_column_v2): self._dnn_classifier_fn = dnn_classifier_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_one_dim(self): """Asserts evaluation metrics for one-dimensional input and logits.""" global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step, self._model_dir) dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10.], [10.]]}, [[1], [0]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08], [-2.08]] => # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]] # loss = (-1. * log(0.111) -1. * log(0.889) = 2.31544200) / 2 expected_loss = 1.157721 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.ACCURACY: 0.5, metric_keys.MetricKeys.PRECISION: 0.0, metric_keys.MetricKeys.RECALL: 0.0, metric_keys.MetricKeys.PREDICTION_MEAN: 0.11105597, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, # There is no good way to calculate AUC for only two data points. # But that is what the algorithm returns. metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 0.5, tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1)) def test_multi_dim(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) n_classes = 3 dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age', shape=[2])], n_classes=n_classes, model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10., 8.], [10., 8.]]}, [[1], [0]] # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39], [-0.48, 0.48, 0.39]] # probabilities = exp(logits)/sum(exp(logits)) # = [[0.16670536, 0.43538380, 0.39791084], # [0.16670536, 0.43538380, 0.39791084]] # loss = -log(0.43538380) - log(0.16670536) expected_loss = 2.62305466 / 2 # batch size self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.ACCURACY: 0.5, tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1)) def test_float_labels(self): """Asserts evaluation metrics for float labels in binary classification.""" global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step, self._model_dir) dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10.], [10.]]}, [[0.8], [0.4]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08], [-2.08]] => # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]] # loss = (-0.8 * log(0.111) -0.2 * log(0.889) # -0.4 * log(0.111) -0.6 * log(0.889)) / 2 = 2.7314420 / 2 expected_loss = 1.365721 metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1) self.assertAlmostEqual(expected_loss, metrics[metric_keys.MetricKeys.LOSS]) def test_multi_dim_weights(self): """Tests evaluation with weights.""" # Uses same checkpoint with test_multi_dims global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) n_classes = 3 dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age', shape=[2])], n_classes=n_classes, weight_column='w', model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10., 8.], [10., 8.]], 'w': [[10.], [100.]]}, [[1], [0]] # Uses identical numbers as test_multi_dims # See that test for calculation of logits. # loss = (-log(0.43538380)*10 - log(0.16670536)*100) / 2 expected_loss = 93.734 metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1) self.assertAlmostEqual( expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3) class BaseDNNRegressorEvaluateTest(object): def __init__(self, dnn_regressor_fn, fc_impl=feature_column_v2): self._dnn_regressor_fn = dnn_regressor_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_one_dim(self): """Asserts evaluation metrics for one-dimensional input and logits.""" # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1. global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step, self._model_dir) dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): return {'age': [[10.]]}, [[1.]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08]] => predictions = [-2.08]. # loss = (1+2.08)^2 = 9.4864 expected_loss = 9.4864 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.PREDICTION_MEAN: -2.08, metric_keys.MetricKeys.LABEL_MEAN: 1.0, tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1)) def test_multi_dim(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3. global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) label_dimension = 3 dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age', shape=[2])], label_dimension=label_dimension, model_dir=self._model_dir) def _input_fn(): return {'age': [[10., 8.]]}, [[1., -1., 0.5]] # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929 # expected_loss = loss / 3 expected_loss = 1.4643 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.PREDICTION_MEAN: 0.39 / 3.0, metric_keys.MetricKeys.LABEL_MEAN: 0.5 / 3.0, tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1)) def test_multi_dim_weights(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" # same checkpoint with test_multi_dim. global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) label_dimension = 3 dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age', shape=[2])], label_dimension=label_dimension, weight_column='w', model_dir=self._model_dir) def _input_fn(): return {'age': [[10., 8.]], 'w': [10.]}, [[1., -1., 0.5]] # Uses identical numbers as test_multi_dim. # See that test for calculation of logits. # loss = 4.3929*10/3 expected_loss = 14.643 metrics = dnn_regressor.evaluate(input_fn=_input_fn, steps=1) self.assertAlmostEqual( expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3) class BaseDNNClassifierPredictTest(object): def __init__(self, dnn_classifier_fn, fc_impl=feature_column_v2): self._dnn_classifier_fn = dnn_classifier_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_one_dim(self, label_vocabulary, label_output_fn): """Asserts predictions for one-dimensional input and logits.""" create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step=0, model_dir=self._model_dir) dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), label_vocabulary=label_vocabulary, feature_columns=(self._fc_impl.numeric_column('x'),), model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => # logistic = exp(-2.08)/(1 + exp(-2.08)) = 0.11105597 # probabilities = [1-logistic, logistic] = [0.88894403, 0.11105597] # class_ids = argmax(probabilities) = [0] predictions = next(dnn_classifier.predict(input_fn=input_fn)) self.assertAllClose([-2.08], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose([0.11105597], predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [0.88894403, 0.11105597], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([0], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual([label_output_fn(0)], predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllClose( [0, 1], predictions[prediction_keys.PredictionKeys.ALL_CLASS_IDS]) self.assertAllEqual( [label_output_fn(0), label_output_fn(1)], predictions[prediction_keys.PredictionKeys.ALL_CLASSES]) def test_one_dim_without_label_vocabulary(self): self._test_one_dim( label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def test_one_dim_with_label_vocabulary(self): n_classes = 2 self._test_one_dim( label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) def _test_multi_dim_with_3_classes(self, label_vocabulary, label_output_fn): """Asserts predictions for multi-dimensional input and logits.""" create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step=0, model_dir=self._model_dir) dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),), label_vocabulary=label_vocabulary, n_classes=3, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( # Inputs shape is (batch_size, num_inputs). x={'x': np.array([[10., 8.]])}, batch_size=1, shuffle=False) # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [-0.48, 0.48, 0.39] => # probabilities[i] = exp(logits[i]) / sum_j exp(logits[j]) => # probabilities = [0.16670536, 0.43538380, 0.39791084] # class_ids = argmax(probabilities) = [1] predictions = next(dnn_classifier.predict(input_fn=input_fn)) self.assertItemsEqual([ prediction_keys.PredictionKeys.LOGITS, prediction_keys.PredictionKeys.PROBABILITIES, prediction_keys.PredictionKeys.CLASS_IDS, prediction_keys.PredictionKeys.CLASSES, prediction_keys.PredictionKeys.ALL_CLASS_IDS, prediction_keys.PredictionKeys.ALL_CLASSES ], six.iterkeys(predictions)) self.assertAllClose([-0.48, 0.48, 0.39], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( [0.16670536, 0.43538380, 0.39791084], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllEqual([1], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual([label_output_fn(1)], predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllEqual( [0, 1, 2], predictions[prediction_keys.PredictionKeys.ALL_CLASS_IDS]) self.assertAllEqual( [label_output_fn(0), label_output_fn(1), label_output_fn(2)], predictions[prediction_keys.PredictionKeys.ALL_CLASSES]) def test_multi_dim_with_3_classes_but_no_label_vocab(self): self._test_multi_dim_with_3_classes( label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def test_multi_dim_with_3_classes_and_label_vocab(self): n_classes = 3 self._test_multi_dim_with_3_classes( label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) class BaseDNNRegressorPredictTest(object): def __init__(self, dnn_regressor_fn, fc_impl=feature_column_v2): self._dnn_regressor_fn = dnn_regressor_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_one_dim(self): """Asserts predictions for one-dimensional input and logits.""" # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1. create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step=0, model_dir=self._model_dir) dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=(self._fc_impl.numeric_column('x'),), model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08]] => predictions = [-2.08]. self.assertAllClose({ prediction_keys.PredictionKeys.PREDICTIONS: [-2.08], }, next(dnn_regressor.predict(input_fn=input_fn))) def test_multi_dim(self): """Asserts predictions for multi-dimensional input and logits.""" # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3. create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), 100, self._model_dir) dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),), label_dimension=3, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( # Inputs shape is (batch_size, num_inputs). x={'x': np.array([[10., 8.]])}, batch_size=1, shuffle=False) # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] => predictions = [-0.48, 0.48, 0.39] self.assertAllClose( { prediction_keys.PredictionKeys.PREDICTIONS: [-0.48, 0.48, 0.39], }, next(dnn_regressor.predict(input_fn=input_fn))) class _SummaryHook(tf.compat.v1.train.SessionRunHook): """Saves summaries every N steps.""" def __init__(self): self._summaries = [] def begin(self): self._summary_op = tf.compat.v1.summary.merge_all() def before_run(self, run_context): return tf.compat.v1.train.SessionRunArgs({'summary': self._summary_op}) def after_run(self, run_context, run_values): s = tf.compat.v1.summary.Summary() s.ParseFromString(run_values.results['summary']) self._summaries.append(s) def summaries(self): return tuple(self._summaries) def _assert_checkpoint(testcase, global_step, input_units, hidden_units, output_units, model_dir): """Asserts checkpoint contains expected variables with proper shapes. Args: testcase: A TestCase instance. global_step: Expected global step value. input_units: The dimension of input layer. hidden_units: Iterable of integer sizes for the hidden layers. output_units: The dimension of output layer (logits). model_dir: The model directory. """ shapes = {name: shape for (name, shape) in tf.train.list_variables(model_dir)} # Global step. testcase.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) testcase.assertEqual( global_step, tf.train.load_variable(model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) # Hidden layer weights. prev_layer_units = input_units for i in range(len(hidden_units)): layer_units = hidden_units[i] testcase.assertAllEqual((prev_layer_units, layer_units), shapes[HIDDEN_WEIGHTS_NAME_PATTERN % i]) testcase.assertAllEqual((layer_units,), shapes[HIDDEN_BIASES_NAME_PATTERN % i]) prev_layer_units = layer_units # Output layer weights. testcase.assertAllEqual((prev_layer_units, output_units), shapes[LOGITS_WEIGHTS_NAME]) testcase.assertAllEqual((output_units,), shapes[LOGITS_BIASES_NAME]) def _assert_simple_summary(testcase, expected_values, actual_summary): """Assert summary the specified simple values. Args: testcase: A TestCase instance. expected_values: Dict of expected tags and simple values. actual_summary: `summary_pb2.Summary`. """ testcase.assertAllClose( expected_values, { v.tag: v.simple_value for v in actual_summary.value if (v.tag in expected_values) }) class BaseDNNClassifierTrainTest(object): def __init__(self, dnn_classifier_fn, fc_impl=feature_column_v2): self._dnn_classifier_fn = dnn_classifier_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_from_scratch_with_default_optimizer_binary(self): hidden_units = (2, 2) dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), model_dir=self._model_dir) # Train for a few steps, then validate final checkpoint. num_steps = 5 dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) def test_from_scratch_with_default_optimizer_multi_class(self): hidden_units = (2, 2) n_classes = 3 dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, then validate final checkpoint. num_steps = 5 dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[2]]), steps=num_steps) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=n_classes, model_dir=self._model_dir) def test_from_scratch_validate_summary(self): hidden_units = (2, 2) opt = mock_optimizer(self, hidden_units=hidden_units) dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(num_steps, dnn_classifier.get_variable_value(opt.iterations.name)) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: summary_keys = [v.tag for v in summary.value] self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys) def test_binary_classification(self): base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) # Uses identical numbers as DNNModelFnTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => probabilities = [0.889, 0.111] # loss = -1. * log(0.111) = 2.19772100 expected_loss = 2.19772100 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(base_global_step + num_steps, dnn_classifier.get_variable_value(opt.iterations.name)) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { 'dnn/hiddenlayer_0/fraction_of_zero_values': 0., 'dnn/hiddenlayer_1/fraction_of_zero_values': .5, 'dnn/logits/fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint( self, base_global_step + num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) def test_binary_classification_float_labels(self): base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) # Uses identical numbers as DNNModelFnTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => probabilities = [0.889, 0.111] # loss = -0.8 * log(0.111) -0.2 * log(0.889) = 1.7817210 expected_loss = 1.7817210 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[0.8]]), steps=num_steps) self.assertEqual(base_global_step + num_steps, dnn_classifier.get_variable_value(opt.iterations.name)) def test_multi_class(self): n_classes = 3 base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) # Uses identical numbers as DNNModelFnTest.test_multi_dim_logits. # See that test for calculation of logits. # logits = [-2.08, 2.08, 1.19] => probabilities = [0.0109, 0.7011, 0.2879] # loss = -1. * log(0.7011) = 0.35505795 expected_loss = 0.35505795 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_classifier = self._dnn_classifier_fn( n_classes=n_classes, hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(base_global_step + num_steps, dnn_classifier.get_variable_value(opt.iterations.name)) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { 'dnn/hiddenlayer_0/fraction_of_zero_values': 0., 'dnn/hiddenlayer_1/fraction_of_zero_values': .5, 'dnn/logits/fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint( self, base_global_step + num_steps, input_units=1, hidden_units=hidden_units, output_units=n_classes, model_dir=self._model_dir) class BaseDNNRegressorTrainTest(object): def __init__(self, dnn_regressor_fn, fc_impl=feature_column_v2): self._dnn_regressor_fn = dnn_regressor_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_from_scratch_with_default_optimizer(self): hidden_units = (2, 2) dnn_regressor = self._dnn_regressor_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), model_dir=self._model_dir) # Train for a few steps, then validate final checkpoint. num_steps = 5 dnn_regressor.train( input_fn=lambda: ({ 'age': ((1,),) }, ((10,),)), steps=num_steps) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) def test_from_scratch(self): hidden_units = (2, 2) opt = mock_optimizer(self, hidden_units=hidden_units) dnn_regressor = self._dnn_regressor_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_regressor.train( input_fn=lambda: ({ 'age': ((1,),) }, ((5.,),)), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(num_steps, dnn_regressor.get_variable_value(opt.iterations.name)) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: summary_keys = [v.tag for v in summary.value] self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys) def test_one_dim(self): """Asserts train loss for one-dimensional input and logits.""" base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) # Uses identical numbers as DNNModelFnTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => predictions = [-2.08] # loss = (1 + 2.08)^2 = 9.4864 expected_loss = 9.4864 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_regressor = self._dnn_regressor_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_regressor.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1.]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(base_global_step + num_steps, dnn_regressor.get_variable_value(opt.iterations.name)) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { 'dnn/hiddenlayer_0/fraction_of_zero_values': 0., 'dnn/hiddenlayer_1/fraction_of_zero_values': 0.5, 'dnn/logits/fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint( self, base_global_step + num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) def test_multi_dim(self): """Asserts train loss for multi-dimensional input and logits.""" base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) input_dimension = 2 label_dimension = 3 # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929 # expected_loss = loss / 3 (batch size) expected_loss = 1.4643 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_regressor = self._dnn_regressor_fn( hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column('age', shape=[input_dimension]) ], label_dimension=label_dimension, optimizer=opt, model_dir=self._model_dir) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_regressor.train( input_fn=lambda: ({ 'age': [[10., 8.]] }, [[1., -1., 0.5]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(base_global_step + num_steps, dnn_regressor.get_variable_value(opt.iterations.name)) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { 'dnn/hiddenlayer_0/fraction_of_zero_values': 0., 'dnn/hiddenlayer_1/fraction_of_zero_values': 0.5, 'dnn/logits/fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint( self, base_global_step + num_steps, input_units=input_dimension, hidden_units=hidden_units, output_units=label_dimension, model_dir=self._model_dir) ================================================ FILE: tensorflow_estimator/python/estimator/canned/head.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Abstractions for the head(s) of a model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import collections import six import tensorflow as tf from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import string_ops from tensorflow.python.util import function_utils from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.mode_keys import ModeKeys _DEFAULT_SERVING_KEY = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY # The above default is defined by TF Serving, but these next three are just # a local convention without any special meaning. _CLASSIFY_SERVING_KEY = 'classification' _REGRESS_SERVING_KEY = 'regression' _PREDICT_SERVING_KEY = 'predict' # A LossSpec contains # * a scalar `Tensor` representing reduced weighted training loss # * a `Tensor` representing the unreduced unweighted loss # * a `Tensor` representing the example weights # * possibly processed labels (e.g. vocabulary lookup, shape manipulation, etc) LossSpec = collections.namedtuple( 'LossSpec', ['training_loss', 'unreduced_loss', 'weights', 'processed_labels']) def _summary_key(head_name, val): return '%s/%s' % (val, head_name) if head_name else val def _create_eval_metrics_tuple(fn, kwargs): """Creates TPU eval metrics tuple. Helper function to make eval_metric tuple (eval_metric_fn, fn_kwargs) used by `TPUEstimator`. TPUEstimator requires that `eval_metric_fn` take exclusively Tensor arguments. This helper can help create such a function from a more generic function that can take both Tensor and non-Tensor arguments. Args: fn: A eval_metric_fn that takes both Tensor and non-Tensor arguments. This function must return a dict of form {'metric name': (metric_tensor, eval_op)} kwargs: Dict of arguments for `fn`. Returns: `eval_metric` tuple that can be passed to a `model_fn._TPUEstimatorSpec`. """ tensor_kwargs = {} nontensor_kwargs = {} for k, v in six.iteritems(kwargs): if tf.is_tensor(v): tensor_kwargs[k] = v else: nontensor_kwargs[k] = v def _fn(**tensors): return fn(**dict(nontensor_kwargs, **tensors)) return (_fn, tensor_kwargs) class _Head(object): """Interface for the head/top of a model. Given logits (or output of a hidden layer), a Head knows how to compute predictions, loss, train_op, metrics and export outputs. It is meant to: 1. Simplify writing model_fn and to make model_fn more configurable 2. Support wide range of machine learning models. Since most heads can work with logits, they can support DNN, RNN, Wide, Wide&Deep, Global objectives, Gradient boosted trees and many other types of machine learning models. Common usage: Here is simplified model_fn to build a DNN regression model. ```python def _my_dnn_model_fn(features, labels, mode, params, config=None): # Optionally your callers can pass head to model_fn as a param. head = tf.contrib.estimator.regression_head(...) inputs = tf.feature_column.input_layer(features, ...) hidden_layer0 = tf.layers.dense( inputs, units=1000, activation=tf.nn.relu) hidden_layer1 = tf.layers.dense( hidden_layer0, units=500, activation=tf.nn.relu) logits = tf.layers.dense( hidden_layer1, units=head.logits_dimension, activation=None) return head.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ``` There are cases where computing and applying gradients can not be meaningfully captured with optimizer or train_op_fn we support (for example, with sync optimizer). In such case, you can take the responsibility on your own. Here is a common use case, ```python estimator_spec = head.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, train_op_fn=lambda _: tf.no_op()) if mode == ModeKeys.TRAIN: optimizer = ... sync = tf.train.SyncReplicasOptimizer(opt=optimizer, ...) update_op = sync.minimize( estimator_spec.loss, global_step=tf.get_global_step()) hooks = [sync.make_session_run_hook(is_chief)] ... update train_op and hooks in EstimatorSpec and return ``` """ __metaclass__ = abc.ABCMeta @abc.abstractproperty def name(self): """The name of this head. Returns: A string. """ raise NotImplementedError('Calling an abstract method.') @abc.abstractproperty def logits_dimension(self): """Size of the last dimension of the logits `Tensor`. Typically, logits is of shape `[batch_size, logits_dimension]`. Returns: The expected size of the `logits` tensor. """ raise NotImplementedError('Calling an abstract method.') @abc.abstractmethod def create_loss(self, features, mode, logits, labels): """Returns a loss Tensor from provided logits. This function is designed to be used by framework developers. Almost all users should use create_estimator_spec(), which calls this internally. `mode` and `features` are most likely not used, but some Head implementations may require them. Args: features: Input `dict` of `Tensor` objects. mode: Estimator's `ModeKeys`. logits: logits `Tensor` to be used for loss construction. labels: Labels `Tensor`, or `dict` of same. Returns: A LossSpec that contains * the scalar `Tensor` representing reduced weighted training loss * the `Tensor` representing the unreduced unweighted loss * the `Tensor` representing the example weights * possibly processed labels (e.g. vocabulary lookup, shape manipulation, etc.) To be extendable in the future. """ raise NotImplementedError('Calling an abstract method.') # TODO(b/65403806): By default, collect regularization_losses from # GraphKeys.REGULARIZATION_LOSSES collection. def create_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None, regularization_losses=None): """Returns `EstimatorSpec` that a model_fn can return. Please note that, + All args must be passed via name. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. mode: Estimator's `ModeKeys`. logits: logits `Tensor` to be used by the head. labels: Labels `Tensor`, or `dict` of same. optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns an op to optimize the model with the loss in TRAIN mode. Used if `optimizer` is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in TRAIN mode. None is allowed in other modes. If you want to optimize loss yourself you can pass `lambda _: tf.no_op()` and then use EstimatorSpec.loss to compute and apply gradients. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. Returns: `EstimatorSpec`. """ try: tpu_estimator_spec = ( self._create_tpu_estimator_spec(features, mode, logits, labels, optimizer, train_op_fn, regularization_losses)) return tpu_estimator_spec.as_estimator_spec() except NotImplementedError: # Not all subclasses of _Head will have implemented # _create_tpu_estimator_spec. If it is implemented, we can use it to # create our `EstimatorSpec` here. raise NotImplementedError( 'Subclasses of _Head must implement `create_estimator_spec()` or ' '_create_tpu_estimator_spec().') def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None, regularization_losses=None): """Returns `model_fn._TPUEstimatorSpec` that a model_fn can return. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. mode: Estimator's `ModeKeys`. logits: logits `Tensor` to be used by the head. labels: Labels `Tensor`, or `dict` of same. optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns an op to optimize the model with the loss in TRAIN mode. Used if `optimizer` is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in TRAIN mode. None is allowed in other modes. If you want to optimize loss yourself you can pass `lambda _: tf.no_op()` and then use EstimatorSpec.loss to compute and apply gradients. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. Returns: A `model_fn._TPUEstimatorSpec' instance. """ raise NotImplementedError( 'TPUEstimatorSpec not available for this model head.') def _check_dense_labels_match_logits_and_reshape(labels, logits, expected_labels_dimension): """Checks that labels shape matches logits and reshapes if needed. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Then labels shape must be [D0, D1, ... DN, expected_labels_dimension]. If expected_labels_dimension=1, labels could be [D0, D1, ... DN] and this method reshapes them to [D0, D1, ... DN, 1]. Args: labels: labels Tensor. logits: logits Tensor. expected_labels_dimension: Integer. Returns: Validated and reshaped labels Tensor. Raises: ValueError: If labels is a SparseTensor. ValueError: If labels shape is statically defined and fails validation. OpError: If labels shape is not statically defined and fails validation. """ if labels is None: raise ValueError( 'You must provide a labels Tensor. Given: None. ' 'Suggested troubleshooting steps: Check that your data contain ' 'your label feature. Check that your input_fn properly parses and ' 'returns labels.') with ops.name_scope(None, 'labels', (labels, logits)) as scope: labels = tf.compat.v1.convert_to_tensor_or_sparse_tensor(labels) if isinstance(labels, tf.sparse.SparseTensor): raise ValueError( 'SparseTensor labels are not supported. ' 'labels must be a Tensor of shape [D0, D1, ..., DN, %s], ' 'e.g. [batch_size, %s]. ' 'Suggested Fix (1): Check the label feature in your data. ' 'Each example must contain %s value(s). If not, your choice of label ' 'was probably incorrect. ' 'Suggested Fix (2): In your input_fn, use ' 'tf.sparse_tensor_to_dense() to turn labels into a Tensor.' '' % (expected_labels_dimension, expected_labels_dimension, expected_labels_dimension)) if (labels.shape.ndims is not None and logits.shape.ndims is not None and labels.shape.ndims == logits.shape.ndims - 1): labels = tf.compat.v1.expand_dims(labels, -1) labels_shape = tf.compat.v1.shape(labels) logits_shape = tf.compat.v1.shape(logits) err_msg = ( 'labels shape must be [D0, D1, ... DN, {}]. ' 'Suggested Fix: check your n_classes argument to the estimator ' 'and/or the shape of your label.'.format(expected_labels_dimension)) assert_rank = tf.compat.v1.debugging.assert_rank_at_least( labels, 2, message=err_msg) with tf.control_dependencies([assert_rank]): static_shape = labels.shape if static_shape.ndims is not None: dim1 = static_shape[-1] if (dim1 is not None) and (dim1 != expected_labels_dimension): raise ValueError('Mismatched label shape. ' 'Expected labels dimension=%s. Received %s. ' 'Suggested Fix:' 'If your classifier expects one-hot encoding label,' 'check your n_classes argument to the estimator ' 'and/or the shape of your label. ' 'Otherwise, check the shape of your label.' % (expected_labels_dimension, dim1)) expected_labels_shape = tf.concat( [logits_shape[:-1], [expected_labels_dimension]], axis=0) assert_dimension = tf.compat.v1.debugging.assert_equal( expected_labels_shape, labels_shape, message=err_msg, data=[ 'expected_labels_shape: ', expected_labels_shape, 'labels_shape: ', labels_shape ]) with tf.control_dependencies([assert_dimension]): return tf.identity(labels, name=scope) def _get_weights_and_check_match_logits(features, weight_column, logits, allow_per_logit_weights=False): """Fetches weights from features and checks that the shape matches logits. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape can be either: * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`. * [D0, D1, ... DN, 1] * [D0, D1, ... DN]: In this case, weights is reshaped into [D0, D1, ... DN, 1] to work with weight broadcasting rules. Args: features: The features dict that contains weights. weight_column: The weight column. If not given, this method returns 1. logits: logits Tensor. allow_per_logit_weights: Boolean. Whether we allow weights along the logits dimension, namely shape `[D0, D1, ... DN, logits_dimension]`. Returns: Validated and reshaped weights Tensor. Raises: ValueError: If the weights `Tensor` cannot be cast into float. """ if allow_per_logit_weights: err_msg = ('weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' '[D0, D1, ... DN, logits_dimension]') else: err_msg = ('weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]') with ops.name_scope( None, 'weights', values=tuple(six.itervalues(features)) + (logits,)) as scope: # Fetch the weights. if weight_column is None: return 1. if isinstance(weight_column, six.string_types): weight_column = tf.feature_column.numeric_column( key=weight_column, shape=(1,)) if not isinstance( weight_column, (tf.compat.v2.__internal__.feature_column.DenseColumn, feature_column._DenseColumn)): # pylint: disable=protected-access raise TypeError('Weight column must be either a string or _DenseColumn.' ' Given type: {}.'.format(type(weight_column))) weights = weight_column._get_dense_tensor( # pylint: disable=protected-access feature_column._LazyBuilder(features)) # pylint: disable=protected-access if not (weights.dtype.is_floating or weights.dtype.is_integer): raise ValueError('Weight column should be castable to float. ' 'Given dtype: {}'.format(weights.dtype)) weights = tf.cast(weights, name='weights', dtype=tf.dtypes.float32) # Validate the weights shape. weights_shape = tf.compat.v1.shape(weights, name='weights_shape') logits_shape = tf.compat.v1.shape(logits, name='logits_shape') if (weights.shape.ndims is not None and logits.shape.ndims is not None and weights.shape.ndims == logits.shape.ndims - 1): assert_dimension = tf.compat.v1.debugging.assert_equal( logits_shape[:-1], weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with tf.control_dependencies([assert_dimension]): return tf.compat.v1.expand_dims(weights, -1, name=scope) supported_weights_shape = tf.concat([logits_shape[:-1], [1]], axis=0) if allow_per_logit_weights: condition = tf.math.reduce_any([ tf.reduce_all(tf.math.equal(logits_shape, weights_shape)), tf.reduce_all(tf.math.equal(supported_weights_shape, weights_shape)) ]) assert_dimension = tf.debugging.Assert( condition=condition, data=[ err_msg, 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) else: assert_dimension = tf.compat.v1.debugging.assert_equal( supported_weights_shape, weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with tf.control_dependencies([assert_dimension]): return tf.identity(weights, name=scope) def _check_logits_final_dim(logits, expected_logits_dimension): """Checks that logits shape is [D0, D1, ... DN, logits_dimension].""" with ops.name_scope(None, 'logits', (logits,)) as scope: logits = tf.cast(logits, dtype=tf.dtypes.float32) logits_shape = tf.compat.v1.shape(logits) assert_rank = tf.compat.v1.debugging.assert_rank_at_least( logits, 2, data=[logits_shape], message='logits shape must be [D0, D1, ... DN, logits_dimension]') with tf.control_dependencies([assert_rank]): static_shape = logits.shape if static_shape.ndims is not None and static_shape[-1] is not None: if (isinstance(expected_logits_dimension, int) and static_shape[-1] != expected_logits_dimension): raise ValueError( 'logits shape must be [D0, D1, ... DN, logits_dimension=%s], ' 'got %s.' % (expected_logits_dimension, static_shape)) return logits assert_dimension = tf.compat.v1.debugging.assert_equal( expected_logits_dimension, logits_shape[-1], data=[logits_shape], message=('logits shape must be [D0, D1, ... DN, ' 'logits_dimension=%s]' % (expected_logits_dimension,))) with tf.control_dependencies([assert_dimension]): return tf.identity(logits, name=scope) def _validate_loss_fn_args(loss_fn): """Validates loss_fn arguments. Required arguments: labels, logits. Optional arguments: features. Args: loss_fn: The loss function. Raises: ValueError: If the signature is unexpected. """ loss_fn_args = function_utils.fn_args(loss_fn) for required_arg in ['labels', 'logits']: if required_arg not in loss_fn_args: raise ValueError('loss_fn must contain argument: {}. ' 'Given arguments: {}'.format(required_arg, loss_fn_args)) invalid_args = list(set(loss_fn_args) - set(['labels', 'logits', 'features'])) if invalid_args: raise ValueError('loss_fn has unexpected args: {}'.format(invalid_args)) def _validate_n_classes(n_classes): """Validates n_classes argument. Required arguments: n_classes. Args: n_classes: The number of classes. Raises: ValueError: If n_classes is <= 2 and n_classes is a Python integer. Returns: n_classes in its original type. """ if isinstance(n_classes, int) and (n_classes <= 2): raise ValueError('n_classes must be > 2: %s.' % n_classes) n_classes_as_tensor = ops.convert_to_tensor(n_classes) assert_n_classes = tf.compat.v1.debugging.assert_greater( n_classes_as_tensor, 2, message='n_classes must be greater than 2') with tf.control_dependencies([assert_n_classes]): tf.no_op() # Return n_classes in its original type, so that any code # using the accessor logits_dimension() has the original type. return n_classes def _call_loss_fn(loss_fn, labels, logits, features, expected_loss_dim=1): """Calls loss_fn and checks the returned shape. Args: loss_fn: The loss function. labels: Processed labels Tensor. logits: Logits Tensor of shape [D0, D1, ... DN, logits_dimension]. features: Features dict. expected_loss_dim: The expected last dimension of loss Tensor. Returns: Loss Tensor with shape [D0, D1, ... DN, expected_loss_dim]. """ loss_fn_args = function_utils.fn_args(loss_fn) kwargs = {} if 'features' in loss_fn_args: kwargs['features'] = features with ops.name_scope( None, 'call_loss_fn', values=[labels, logits] + list(six.itervalues(features))): unweighted_loss = loss_fn(labels=labels, logits=logits, **kwargs) logits_shape = tf.compat.v1.shape(logits, name='logits_shape') expected_loss_shape = tf.concat([logits_shape[:-1], [expected_loss_dim]], axis=0, name='expected_loss_shape') loss_shape = tf.compat.v1.shape(unweighted_loss, name='loss_shape') check_loss_shape_op = tf.debugging.Assert( tf.reduce_all(tf.math.equal(loss_shape, expected_loss_shape)), data=[ 'loss_fn must return Tensor of shape ' '[D0, D1, ... DN, {}]. '.format(expected_loss_dim), 'logits_shape: ', logits_shape, 'loss_shape: ', loss_shape ], name='check_loss_shape') with tf.control_dependencies([check_loss_shape_op]): return tf.identity(unweighted_loss) def _indicator_labels_mean(labels, weights=None, name=None): with ops.name_scope(name, 'labels_mean', (labels, weights)) as scope: labels = tf.cast(labels, name='labels', dtype=tf.dtypes.float32) if weights is not None: weights = tf.compat.v2.__internal__.ops.broadcast_weights(weights, labels) return tf.compat.v1.metrics.mean(labels, weights=weights, name=scope) def _all_class_ids(logits, n_classes): batch_size = tf.compat.v1.shape(logits)[0] class_id_list = tf.range(n_classes) return tf.tile( input=tf.compat.v1.expand_dims(input=class_id_list, axis=0), multiples=[batch_size, 1]) def _all_classes(logits, n_classes, label_vocabulary=None): batch_size = tf.compat.v1.shape(logits)[0] if label_vocabulary: classes_list = label_vocabulary else: classes_list = string_ops.as_string(tf.range(n_classes)) return tf.tile( input=tf.compat.v1.expand_dims(input=classes_list, axis=0), multiples=[batch_size, 1]) def _classification_output(scores, n_classes, label_vocabulary=None): batch_size = tf.compat.v1.shape(scores)[0] if label_vocabulary: export_class_list = label_vocabulary else: export_class_list = string_ops.as_string(tf.range(n_classes)) export_output_classes = tf.tile( input=tf.compat.v1.expand_dims(input=export_class_list, axis=0), multiples=[batch_size, 1]) return export_output.ClassificationOutput( scores=scores, # `ClassificationOutput` requires string classes. classes=export_output_classes) def _accuracy_baseline(labels_mean): """Return accuracy baseline based on labels mean. This is the best the model could do by always predicting one class. Args: labels_mean: Tuple of value and update op. Returns: Tuple of value and update op. """ with ops.name_scope(None, 'accuracy_baseline', labels_mean): value, update_op = labels_mean return (tf.math.maximum(value, 1. - value, name='value'), tf.math.maximum(update_op, 1 - update_op, name='update_op')) def _predictions_mean(predictions, weights=None, name=None): with ops.name_scope(name, 'predictions_mean', (predictions, weights)) as scope: predictions = tf.cast( predictions, name='predictions', dtype=tf.dtypes.float32) if weights is not None: weights = tf.compat.v2.__internal__.ops.broadcast_weights(weights, predictions) return tf.compat.v1.metrics.mean(predictions, weights=weights, name=scope) def _auc(labels, predictions, weights=None, curve='ROC', name=None): with ops.name_scope(name, 'auc', (predictions, labels, weights)) as scope: predictions = tf.cast( predictions, name='predictions', dtype=tf.dtypes.float32) if weights is not None: weights = tf.compat.v2.__internal__.ops.broadcast_weights(weights, predictions) return tf.compat.v1.metrics.auc( labels=labels, predictions=predictions, weights=weights, curve=curve, name=scope) def _accuracy_at_threshold(labels, predictions, weights, threshold, name=None): with ops.name_scope(name, 'accuracy_at_%s' % threshold, (predictions, labels, weights, threshold)) as scope: threshold_predictions = tf.compat.v1.to_float( tf.math.greater_equal(predictions, threshold)) return tf.compat.v1.metrics.accuracy( labels=labels, predictions=threshold_predictions, weights=weights, name=scope) def _precision_at_threshold(labels, predictions, weights, threshold, name=None): with ops.name_scope(name, 'precision_at_%s' % threshold, (predictions, labels, weights, threshold)) as scope: precision_tensor, update_op = tf.compat.v1.metrics.precision_at_thresholds( labels=labels, predictions=predictions, thresholds=(threshold,), weights=weights, name=scope) return tf.compat.v1.squeeze(precision_tensor), tf.compat.v1.squeeze( update_op) def _recall_at_threshold(labels, predictions, weights, threshold, name=None): with ops.name_scope(name, 'recall_at_%s' % threshold, (predictions, labels, weights, threshold)) as scope: precision_tensor, update_op = tf.compat.v1.metrics.recall_at_thresholds( labels=labels, predictions=predictions, thresholds=(threshold,), weights=weights, name=scope) return tf.compat.v1.squeeze(precision_tensor), tf.compat.v1.squeeze( update_op) def _multi_class_head_with_softmax_cross_entropy_loss( n_classes, weight_column=None, label_vocabulary=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, loss_fn=None, name=None): """Creates a '_Head' for multi class classification. The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. In many applications, the shape is `[batch_size, n_classes]`. `labels` must be a dense `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, `labels` must be an integer `Tensor` with values specifying the class index. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. The loss is the weighted sum over the input dimensions. Namely, if the input labels have shape `[batch_size, 1]`, the loss is the weighted sum over `batch_size`. Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features)` as arguments and returns unreduced loss with shape `[D0, D1, ... DN, 1]`. `loss_fn` must support integer `labels` with shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to the input labels before passing them to `loss_fn`. Args: n_classes: Number of classes, must be greater than 2 (for 2 classes, use `_BinaryLogisticHeadWithSigmoidCrossEntropyLoss`). weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. label_vocabulary: A list or tuple of strings representing possible label values. If it is not given, that means labels are already encoded as an integer within [0, n_classes). If given, labels must be of string type and have any value in `label_vocabulary`. Note that errors will be raised if `label_vocabulary` is not provided but labels are strings. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM`. loss_fn: Optional loss function. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. Returns: An instance of `_Head` for multi class classification. Raises: ValueError: If `n_classes`, `label_vocabulary` or `loss_reduction` is invalid. """ if label_vocabulary is not None and not isinstance(label_vocabulary, (list, tuple)): raise ValueError( 'label_vocabulary should be a list or a tuple. Given type: {}'.format( type(label_vocabulary))) if (loss_reduction not in tf.compat.v1.losses.Reduction.all() or loss_reduction == tf.compat.v1.losses.Reduction.NONE): raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction)) if loss_fn: _validate_loss_fn_args(loss_fn) return _MultiClassHeadWithSoftmaxCrossEntropyLoss( n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction, loss_fn=loss_fn, name=name) class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): """See `_multi_class_head_with_softmax_cross_entropy_loss`.""" def __init__(self, n_classes, weight_column=None, label_vocabulary=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, loss_fn=None, name=None): if n_classes is None: raise ValueError('n_classes cannot be None') self._n_classes = _validate_n_classes(n_classes) self._weight_column = weight_column self._label_vocabulary = label_vocabulary self._loss_reduction = loss_reduction self._loss_fn = loss_fn self._name = name @property def name(self): return self._name @property def logits_dimension(self): return self._n_classes def _eval_metric_ops(self, labels, class_ids, weights, unreduced_loss, regularization_loss): """Returns the Eval metric ops.""" with ops.name_scope( None, 'metrics', (labels, class_ids, weights, unreduced_loss, regularization_loss)): keys = metric_keys.MetricKeys metric_ops = { # Estimator already adds a metric for loss. # TODO(xiejw): Any other metrics? _summary_key(self._name, keys.LOSS_MEAN): tf.compat.v1.metrics.mean( values=unreduced_loss, weights=weights, name=keys.LOSS_MEAN), _summary_key(self._name, keys.ACCURACY): tf.compat.v1.metrics.accuracy( labels=labels, predictions=class_ids, weights=weights, name=keys.ACCURACY), } if regularization_loss is not None: metric_ops[_summary_key(self._name, keys.LOSS_REGULARIZATION)] = ( tf.compat.v1.metrics.mean( values=regularization_loss, name=keys.LOSS_REGULARIZATION)) return metric_ops def _label_ids(self, labels): """Converts labels to integer id space.""" if self._label_vocabulary is None: if not labels.dtype.is_integer: raise ValueError( 'Labels dtype should be integer. Instead got {}.'.format( labels.dtype)) label_ids = labels else: if labels.dtype != tf.dtypes.string: raise ValueError('Labels dtype should be string if there is a ' 'vocabulary. Instead got {}'.format(labels.dtype)) label_ids = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup').lookup(labels) return _assert_range(label_ids, self._n_classes) def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode # Unused for this head. logits = ops.convert_to_tensor(logits) labels = _check_dense_labels_match_logits_and_reshape( labels=labels, logits=logits, expected_labels_dimension=1) label_ids = self._label_ids(labels) if self._loss_fn: unweighted_loss = _call_loss_fn( loss_fn=self._loss_fn, labels=label_ids, logits=logits, features=features, expected_loss_dim=1) else: unweighted_loss = tf.compat.v1.losses.sparse_softmax_cross_entropy( labels=label_ids, logits=logits, reduction=tf.compat.v1.losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. unweighted_loss = tf.compat.v1.expand_dims(unweighted_loss, axis=-1) weights = _get_weights_and_check_match_logits( features=features, weight_column=self._weight_column, logits=logits) training_loss = tf.compat.v1.losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=self._loss_reduction) return LossSpec( training_loss=training_loss, unreduced_loss=unweighted_loss, weights=weights, processed_labels=label_ids) def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None, regularization_losses=None): """Returns a `model_fn._TPUEstimatorSpec`. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. mode: Estimator's `ModeKeys`. logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. labels: Labels integer or string `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to set `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to avoid scaling errors. Returns: A `model_fn._TPUEstimatorSpec` instance. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. """ with ops.name_scope(self._name, 'head'): logits = _check_logits_final_dim(logits, self.logits_dimension) # Predict. pred_keys = prediction_keys.PredictionKeys with ops.name_scope(None, 'predictions', (logits,)): all_class_ids = _all_class_ids(logits, self._n_classes) all_classes = _all_classes( logits, self._n_classes, label_vocabulary=self._label_vocabulary) # class_ids's shape is [D0, D1, ... DN]. class_ids = tf.compat.v1.math.argmax( logits, axis=-1, name=pred_keys.CLASS_IDS) class_ids = tf.compat.v1.expand_dims(class_ids, axis=-1) if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = tf.strings.as_string(class_ids, name='str_classes') probabilities = tf.compat.v1.nn.softmax( logits, name=pred_keys.PROBABILITIES) predictions = { pred_keys.LOGITS: logits, pred_keys.PROBABILITIES: probabilities, # Expand to [batch_size, 1] pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, pred_keys.ALL_CLASS_IDS: all_class_ids, pred_keys.ALL_CLASSES: all_classes, } if mode == ModeKeys.PREDICT: classifier_output = _classification_output( scores=probabilities, n_classes=self._n_classes, label_vocabulary=self._label_vocabulary) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.PREDICT, predictions=predictions, export_outputs={ _DEFAULT_SERVING_KEY: classifier_output, _CLASSIFY_SERVING_KEY: classifier_output, _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions) }) training_loss, unreduced_loss, weights, label_ids = self.create_loss( features=features, mode=mode, logits=logits, labels=labels) if regularization_losses: regularization_loss = tf.math.add_n(regularization_losses) regularized_training_loss = tf.math.add_n( [training_loss, regularization_loss]) else: regularization_loss = None regularized_training_loss = training_loss # Eval. if mode == ModeKeys.EVAL: return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.EVAL, predictions=predictions, loss=regularized_training_loss, eval_metrics=_create_eval_metrics_tuple( self._eval_metric_ops, { 'labels': label_ids, 'class_ids': class_ids, 'weights': weights, 'unreduced_loss': unreduced_loss, 'regularization_loss': regularization_loss })) # Train. if optimizer is not None: if train_op_fn is not None: raise ValueError('train_op_fn and optimizer cannot both be set.') train_op = optimizer.minimize( regularized_training_loss, global_step=tf.compat.v1.train.get_global_step()) elif train_op_fn is not None: train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') train_op = _append_update_ops(train_op) # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == tf.compat.v1.losses.Reduction.SUM: example_weight_sum = tf.math.reduce_sum( weights * tf.compat.v1.ones_like(unreduced_loss)) mean_loss = training_loss / example_weight_sum else: mean_loss = None with ops.name_scope(''): keys = metric_keys.MetricKeys tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS), regularized_training_loss) if mean_loss is not None: tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS_MEAN), mean_loss) if regularization_loss is not None: tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS_REGULARIZATION), regularization_loss) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, train_op=train_op) def _binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column=None, thresholds=None, label_vocabulary=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, loss_fn=None, name=None): """Creates a `_Head` for single label binary classification. This head uses `sigmoid_cross_entropy_with_logits` loss. The head expects `logits` with shape `[D0, D1, ... DN, 1]`. In many applications, the shape is `[batch_size, 1]`. `labels` must be a dense `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, `labels` must be float `Tensor` with values in the interval `[0, 1]`. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. The loss is the weighted sum over the input dimensions. Namely, if the input labels have shape `[batch_size, 1]`, the loss is the weighted sum over `batch_size`. Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features)` as arguments and returns unreduced loss with shape `[D0, D1, ... DN, 1]`. `loss_fn` must support float `labels` with shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to the input labels before passing them to `loss_fn`. Args: weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. thresholds: Iterable of floats in the range `(0, 1)`. For binary classification metrics such as precision and recall, an eval metric is generated for each threshold value. This threshold is applied to the logistic values to determine the binary classification (i.e., above the threshold is `true`, below is `false`. label_vocabulary: A list or tuple of strings representing possible label values. If it is not given, that means labels are already encoded within [0, 1]. If given, labels must be string type and have any value in `label_vocabulary`. Note that errors will be raised if `label_vocabulary` is not provided but labels are strings. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM`. loss_fn: Optional loss function. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. Returns: An instance of `_Head` for binary classification. Raises: ValueError: If `thresholds` contains a value outside of `(0, 1)`. ValueError: If `loss_reduction` is invalid. TypeError: if `label_vocabulary` has invalid type. """ thresholds = tuple(thresholds) if thresholds else tuple() if label_vocabulary is not None and not isinstance(label_vocabulary, (list, tuple)): raise TypeError( 'label_vocabulary should be a list or tuple. Given type: {}'.format( type(label_vocabulary))) for threshold in thresholds: if (threshold <= 0.0) or (threshold >= 1.0): raise ValueError('thresholds not in (0, 1): {}.'.format((thresholds,))) if (loss_reduction not in tf.compat.v1.losses.Reduction.all() or loss_reduction == tf.compat.v1.losses.Reduction.NONE): raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction)) if loss_fn: _validate_loss_fn_args(loss_fn) return _BinaryLogisticHeadWithSigmoidCrossEntropyLoss( weight_column=weight_column, thresholds=thresholds, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction, loss_fn=loss_fn, name=name) class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): """See `_binary_logistic_head_with_sigmoid_cross_entropy_loss`.""" def __init__(self, weight_column=None, thresholds=None, label_vocabulary=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, loss_fn=None, name=None): self._weight_column = weight_column self._thresholds = tuple(thresholds) if thresholds else tuple() self._label_vocabulary = label_vocabulary self._loss_reduction = loss_reduction self._loss_fn = loss_fn self._name = name @property def name(self): return self._name @property def logits_dimension(self): return 1 def _eval_metric_ops(self, labels, logits, logistic, class_ids, weights, unreduced_loss, regularization_loss): with ops.name_scope(None, 'metrics', (labels, logits, logistic, class_ids, weights, unreduced_loss, regularization_loss)): keys = metric_keys.MetricKeys labels_mean = _indicator_labels_mean( labels=labels, weights=weights, name=keys.LABEL_MEAN) metric_ops = { # Estimator already adds a metric for loss. _summary_key(self._name, keys.LOSS_MEAN): tf.compat.v1.metrics.mean( values=unreduced_loss, weights=weights, name=keys.LOSS_MEAN), _summary_key(self._name, keys.ACCURACY): tf.compat.v1.metrics.accuracy( labels=labels, predictions=class_ids, weights=weights, name=keys.ACCURACY), _summary_key(self._name, keys.PRECISION): tf.compat.v1.metrics.precision( labels=labels, predictions=class_ids, weights=weights, name=keys.PRECISION), _summary_key(self._name, keys.RECALL): tf.compat.v1.metrics.recall( labels=labels, predictions=class_ids, weights=weights, name=keys.RECALL), _summary_key(self._name, keys.PREDICTION_MEAN): _predictions_mean( predictions=logistic, weights=weights, name=keys.PREDICTION_MEAN), _summary_key(self._name, keys.LABEL_MEAN): labels_mean, _summary_key(self._name, keys.ACCURACY_BASELINE): _accuracy_baseline(labels_mean), _summary_key(self._name, keys.AUC): _auc( labels=labels, predictions=logistic, weights=weights, name=keys.AUC), _summary_key(self._name, keys.AUC_PR): _auc( labels=labels, predictions=logistic, weights=weights, curve='PR', name=keys.AUC_PR) } if regularization_loss is not None: metric_ops[_summary_key(self._name, keys.LOSS_REGULARIZATION)] = ( tf.compat.v1.metrics.mean( values=regularization_loss, name=keys.LOSS_REGULARIZATION)) for threshold in self._thresholds: accuracy_key = keys.ACCURACY_AT_THRESHOLD % threshold metric_ops[_summary_key(self._name, accuracy_key)] = _accuracy_at_threshold( labels=labels, predictions=logistic, weights=weights, threshold=threshold, name=accuracy_key) # Precision for positive examples. precision_key = keys.PRECISION_AT_THRESHOLD % threshold metric_ops[_summary_key(self._name, precision_key)] = _precision_at_threshold( labels=labels, predictions=logistic, weights=weights, threshold=threshold, name=precision_key) # Recall for positive examples. recall_key = keys.RECALL_AT_THRESHOLD % threshold metric_ops[_summary_key(self._name, recall_key)] = _recall_at_threshold( labels=labels, predictions=logistic, weights=weights, threshold=threshold, name=recall_key) return metric_ops def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode # Unused for this head. logits = ops.convert_to_tensor(logits) labels = _check_dense_labels_match_logits_and_reshape( labels=labels, logits=logits, expected_labels_dimension=1) if self._label_vocabulary is not None: labels = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup').lookup(labels) labels = tf.cast(labels, dtype=tf.dtypes.float32) labels = _assert_range(labels, n_classes=2) if self._loss_fn: unweighted_loss = _call_loss_fn( loss_fn=self._loss_fn, labels=labels, logits=logits, features=features, expected_loss_dim=1) else: unweighted_loss = tf.compat.v1.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) weights = _get_weights_and_check_match_logits( features=features, weight_column=self._weight_column, logits=logits) training_loss = tf.compat.v1.losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=self._loss_reduction) return LossSpec( training_loss=training_loss, unreduced_loss=unweighted_loss, weights=weights, processed_labels=labels) def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None, regularization_losses=None): """Returns an `EstimatorSpec`. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. mode: Estimator's `ModeKeys`. logits: logits `Tensor` with shape `[D0, D1, ... DN, 1]`. For many applications, the shape is `[batch_size, 1]`. labels: Labels integer or string `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to set `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to avoid scaling errors. Returns: `EstimatorSpec`. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. """ # Predict. with ops.name_scope(self._name, 'head'): with ops.name_scope(None, 'predictions', (logits,)): pred_keys = prediction_keys.PredictionKeys logits = _check_logits_final_dim(logits, self.logits_dimension) logistic = tf.math.sigmoid(logits, name=pred_keys.LOGISTIC) two_class_logits = tf.concat((tf.compat.v1.zeros_like(logits), logits), axis=-1, name='two_class_logits') probabilities = tf.compat.v1.nn.softmax( two_class_logits, name=pred_keys.PROBABILITIES) class_ids = tf.compat.v1.math.argmax( two_class_logits, axis=-1, name=pred_keys.CLASS_IDS) class_ids = tf.compat.v1.expand_dims(class_ids, axis=-1) all_class_ids = _all_class_ids(logits, n_classes=2) all_classes = _all_classes( logits, n_classes=2, label_vocabulary=self._label_vocabulary) if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') predictions = { pred_keys.LOGITS: logits, pred_keys.LOGISTIC: logistic, pred_keys.PROBABILITIES: probabilities, pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, pred_keys.ALL_CLASS_IDS: all_class_ids, pred_keys.ALL_CLASSES: all_classes, } if mode == ModeKeys.PREDICT: classifier_output = _classification_output( scores=probabilities, n_classes=2, label_vocabulary=self._label_vocabulary) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.PREDICT, predictions=predictions, export_outputs={ _DEFAULT_SERVING_KEY: classifier_output, _CLASSIFY_SERVING_KEY: classifier_output, _REGRESS_SERVING_KEY: export_output.RegressionOutput( value=logistic), _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions) }) (training_loss, unreduced_loss, weights, processed_labels) = ( self.create_loss( features=features, mode=mode, logits=logits, labels=labels)) if regularization_losses: regularization_loss = tf.math.add_n(regularization_losses) regularized_training_loss = tf.math.add_n( [training_loss, regularization_loss]) else: regularization_loss = None regularized_training_loss = training_loss # Eval. if mode == ModeKeys.EVAL: return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.EVAL, predictions=predictions, loss=regularized_training_loss, eval_metrics=_create_eval_metrics_tuple( self._eval_metric_ops, { 'labels': processed_labels, 'logits': logits, 'logistic': logistic, 'class_ids': class_ids, 'weights': weights, 'unreduced_loss': unreduced_loss, 'regularization_loss': regularization_loss })) # Train. if optimizer is not None: if train_op_fn is not None: raise ValueError('train_op_fn and optimizer cannot both be set.') train_op = optimizer.minimize( regularized_training_loss, global_step=tf.compat.v1.train.get_global_step()) elif train_op_fn is not None: train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') train_op = _append_update_ops(train_op) # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == tf.compat.v1.losses.Reduction.SUM: example_weight_sum = tf.math.reduce_sum( weights * tf.compat.v1.ones_like(unreduced_loss)) mean_loss = training_loss / example_weight_sum else: mean_loss = None with ops.name_scope(''): keys = metric_keys.MetricKeys tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS), regularized_training_loss) if mean_loss is not None: tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS_MEAN), mean_loss) if regularization_loss is not None: tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS_REGULARIZATION), regularization_loss) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, train_op=train_op) def _regression_head(weight_column=None, label_dimension=1, loss_reduction=tf.compat.v1.losses.Reduction.SUM, loss_fn=None, inverse_link_fn=None, name=None): """Creates a `_Head` for regression using the `mean_squared_error` loss. The loss is the weighted sum over all input dimensions. Namely, if the input labels have shape `[batch_size, label_dimension]`, the loss is the weighted sum over both `batch_size` and `label_dimension`. The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`. In many applications, the shape is `[batch_size, label_dimension]`. The `labels` shape must match `logits`, namely `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape `[D0, D1, ... DN]` is also supported. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN, label_dimension]`. Supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features)` as arguments and returns unreduced loss with shape `[D0, D1, ... DN, label_dimension]`. Also supports custom `inverse_link_fn`, also known as 'mean function'. `inverse_link_fn` takes `logits` as argument and returns predicted values. This function is the inverse of the link function defined in https://en.wikipedia.org/wiki/Generalized_linear_model#Link_function Namely, for poisson regression, set `inverse_link_fn=tf.exp`. Args: weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. label_dimension: Number of regression labels per example. This is the size of the last dimension of the labels `Tensor` (typically, this has shape `[batch_size, label_dimension]`). loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM`. loss_fn: Optional loss function. Defaults to `mean_squared_error`. inverse_link_fn: Optional inverse link function, also known as 'mean function'. Defaults to identity. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. Returns: An instance of `_Head` for linear regression. Raises: ValueError: If `label_dimension` or `loss_reduction` is invalid. """ if (loss_reduction not in tf.compat.v1.losses.Reduction.all() or loss_reduction == tf.compat.v1.losses.Reduction.NONE): raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction)) if loss_fn: _validate_loss_fn_args(loss_fn) return _RegressionHeadWithMeanSquaredErrorLoss( weight_column=weight_column, label_dimension=label_dimension, loss_reduction=loss_reduction, loss_fn=loss_fn, inverse_link_fn=inverse_link_fn, name=name) class _RegressionHeadWithMeanSquaredErrorLoss(_Head): """`Head` for regression using the mean squared loss.""" def __init__(self, label_dimension, weight_column=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, loss_fn=None, inverse_link_fn=None, name=None): """`Head` for regression.""" if label_dimension < 1: raise ValueError('Invalid label_dimension %s.' % label_dimension) self._logits_dimension = label_dimension self._weight_column = weight_column self._loss_reduction = loss_reduction self._loss_fn = loss_fn self._inverse_link_fn = inverse_link_fn self._name = name @property def name(self): return self._name @property def logits_dimension(self): return self._logits_dimension def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode # Unused for this head. logits = ops.convert_to_tensor(logits) labels = _check_dense_labels_match_logits_and_reshape( labels=labels, logits=logits, expected_labels_dimension=self._logits_dimension) labels = tf.cast(labels, dtype=tf.dtypes.float32) if self._loss_fn: unweighted_loss = _call_loss_fn( loss_fn=self._loss_fn, labels=labels, logits=logits, features=features, expected_loss_dim=self._logits_dimension) else: unweighted_loss = tf.compat.v1.losses.mean_squared_error( labels=labels, predictions=logits, reduction=tf.compat.v1.losses.Reduction.NONE) weights = _get_weights_and_check_match_logits( features=features, weight_column=self._weight_column, logits=logits, allow_per_logit_weights=True) training_loss = tf.compat.v1.losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=self._loss_reduction) return LossSpec( training_loss=training_loss, unreduced_loss=unweighted_loss, weights=weights, processed_labels=labels) def _eval_metric_ops(self, predicted_value, labels, weights, unreduced_loss, regularization_loss): """Returns the Eval metric ops.""" keys = metric_keys.MetricKeys # Estimator already adds a metric for loss. eval_metric_ops = { _summary_key(self._name, keys.LOSS_MEAN): tf.compat.v1.metrics.mean(values=unreduced_loss, weights=weights), _summary_key(self._name, keys.PREDICTION_MEAN): _predictions_mean( predictions=predicted_value, weights=weights, name=keys.PREDICTION_MEAN), _summary_key(self._name, keys.LABEL_MEAN): tf.compat.v1.metrics.mean(values=labels, weights=weights) } if regularization_loss is not None: regularization_loss_key = _summary_key(self._name, keys.LOSS_REGULARIZATION) eval_metric_ops[regularization_loss_key] = tf.compat.v1.metrics.mean( values=regularization_loss, name=keys.LOSS_REGULARIZATION) return eval_metric_ops def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None, regularization_losses=None): """Returns an `EstimatorSpec`. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. mode: Estimator's `ModeKeys`. logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. labels: Labels `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, logits_dimension]`. When `logits_dimension=1`, shape `[D0, D1, ... DN]` is also supported. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to set `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to avoid scaling errors. Returns: A `model_fn._TPUEstimatorSpec` instance. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. """ # Predict. with ops.name_scope(self._name, 'head'): logits = _check_logits_final_dim(logits, self._logits_dimension) if self._inverse_link_fn: predicted_value = self._inverse_link_fn(logits) predictions = { prediction_keys.PredictionKeys.PREDICTIONS: predicted_value, prediction_keys.PredictionKeys.LOGITS: logits, } else: predicted_value = logits predictions = { prediction_keys.PredictionKeys.PREDICTIONS: predicted_value } if mode == ModeKeys.PREDICT: regression_output = export_output.RegressionOutput( value=predicted_value) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.PREDICT, predictions=predictions, export_outputs={ _DEFAULT_SERVING_KEY: regression_output, _REGRESS_SERVING_KEY: regression_output, _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions) }) training_loss, unreduced_loss, weights, _ = self.create_loss( features=features, mode=mode, logits=logits, labels=labels) if regularization_losses: regularization_loss = tf.math.add_n(regularization_losses) regularized_training_loss = tf.math.add_n( [training_loss, regularization_loss]) else: regularization_loss = None regularized_training_loss = training_loss # Eval. if mode == ModeKeys.EVAL: return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.EVAL, predictions=predictions, loss=regularized_training_loss, eval_metrics=_create_eval_metrics_tuple( self._eval_metric_ops, { 'predicted_value': predicted_value, 'labels': labels, 'weights': weights, 'unreduced_loss': unreduced_loss, 'regularization_loss': regularization_loss, })) # Train. if optimizer is not None: if train_op_fn is not None: raise ValueError('train_op_fn and optimizer cannot both be set.') train_op = optimizer.minimize( regularized_training_loss, global_step=tf.compat.v1.train.get_global_step()) elif train_op_fn is not None: train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') train_op = _append_update_ops(train_op) # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == tf.compat.v1.losses.Reduction.SUM: example_weight_sum = tf.math.reduce_sum( weights * tf.compat.v1.ones_like(unreduced_loss)) mean_loss = training_loss / example_weight_sum else: mean_loss = None with ops.name_scope(''): keys = metric_keys.MetricKeys tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS), regularized_training_loss) if mean_loss is not None: tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS_MEAN), mean_loss) if regularization_loss is not None: tf.compat.v1.summary.scalar( _summary_key(self._name, keys.LOSS_REGULARIZATION), regularization_loss) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, train_op=train_op) def _append_update_ops(train_op): """Returns `train_op` appending `UPDATE_OPS` collection if present.""" update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) if update_ops: return tf.group(train_op, *update_ops) return train_op def _assert_range(labels, n_classes, message=None): with ops.name_scope(None, 'assert_range', (labels,)): assert_less = tf.compat.v1.debugging.assert_less_equal( labels, ops.convert_to_tensor(n_classes - 1, dtype=labels.dtype), message=message or 'Labels must <= n_classes - 1') assert_greater = tf.compat.v1.debugging.assert_non_negative( labels, message=message or 'Labels must >= 0') with tf.control_dependencies((assert_less, assert_greater)): return tf.identity(labels) def _binary_logistic_or_multi_class_head(n_classes, weight_column, label_vocabulary, loss_reduction): """Creates either binary or multi-class head. Args: n_classes: Number of label classes. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM`. Returns: `head._Head` instance. """ if n_classes == 2: head = _binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = _multi_class_head_with_softmax_cross_entropy_loss( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) return head ================================================ FILE: tensorflow_estimator/python/estimator/canned/head_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for head.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.canned.v1 import dnn_testing_utils_v1 from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.mode_keys import ModeKeys _DEFAULT_SERVING_KEY = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY def _initialize_variables(test_case, scaffold): scaffold.finalize() test_case.assertIsNone(scaffold.init_feed_dict) test_case.assertIsNone(scaffold.init_fn) scaffold.init_op.run() scaffold.ready_for_local_init_op.eval() scaffold.local_init_op.run() scaffold.ready_op.eval() test_case.assertIsNotNone(scaffold.saver) def _assert_simple_summaries(test_case, expected_summaries, summary_str, tol=1e-6): """Assert summary the specified simple values. Args: test_case: test case. expected_summaries: Dict of expected tags and simple values. summary_str: Serialized `summary_pb2.Summary`. tol: Tolerance for relative and absolute. """ summary = tf.compat.v1.summary.Summary() summary.ParseFromString(summary_str) test_case.assertAllClose( expected_summaries, {v.tag: v.simple_value for v in summary.value}, rtol=tol, atol=tol) def _assert_no_hooks(test_case, spec): test_case.assertAllEqual([], spec.training_chief_hooks) test_case.assertAllEqual([], spec.training_hooks) def _sigmoid(logits): return 1 / (1 + np.exp(-logits)) @test_util.run_all_in_graph_and_eager_modes class CreateEstimatorSpecTest(tf.test.TestCase): class _HeadWithTPUSupport(head_lib._Head): """Head that overrides _create_tpu_estimator_spec.""" def name(self): return 'HeadWithTPUSupport' def logits_dimension(self): return None def create_loss(self, features, mode, logits, labels): return None def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None, regularization_losses=None): return model_fn._TPUEstimatorSpec( mode=ModeKeys.EVAL, loss=tf.constant(0.0, dtype=tf.dtypes.float32)) class _HeadWithOutTPUSupport(head_lib._Head): """Head that overrides create_estimator_spec.""" def name(self): return 'HeadWithOutTPUSupport' def logits_dimension(self): return None def create_loss(self, features, mode, logits, labels): return None def create_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None, regularization_losses=None): return model_fn.EstimatorSpec( mode=ModeKeys.EVAL, loss=tf.constant(0.0, dtype=tf.dtypes.float32)) class _InvalidHead(head_lib._Head): """Head that overrides neither estimator_spec functions.""" def name(self): return 'InvalidHead' def logits_dimension(self): return None def create_loss(self, features, mode, logits, labels): return None def test_head_override_tpu_estimator_spec(self): """Test for `_Head` that overrides _create_tpu_estimator_spec.""" head = self._HeadWithTPUSupport() tpu_spec = head._create_tpu_estimator_spec( features=None, mode=None, logits=None) self.assertTrue(isinstance(tpu_spec, model_fn._TPUEstimatorSpec)) est_spec = head.create_estimator_spec(features=None, mode=None, logits=None) self.assertTrue(isinstance(est_spec, model_fn.EstimatorSpec)) def test_head_override_estimator_spec(self): """Test for `_Head` that overrides create_estimator_spec.""" head = self._HeadWithOutTPUSupport() with self.assertRaisesRegexp( NotImplementedError, 'TPUEstimatorSpec not available for this model head.'): _ = head._create_tpu_estimator_spec(features=None, mode=None, logits=None) est_spec = head.create_estimator_spec(features=None, mode=None, logits=None) self.assertTrue(isinstance(est_spec, model_fn.EstimatorSpec)) def test_invalid_head_class(self): head = self._InvalidHead() with self.assertRaisesRegexp( NotImplementedError, 'TPUEstimatorSpec not available for this model head.'): _ = head._create_tpu_estimator_spec(features=None, mode=None, logits=None) with self.assertRaisesRegexp( NotImplementedError, r'Subclasses of _Head must implement `create_estimator_spec\(\)` or ' r'_create_tpu_estimator_spec\(\).'): _ = head.create_estimator_spec(features=None, mode=None, logits=None) @test_util.run_v1_only('Tests v1 only symbols') class MultiClassHeadWithSoftmaxCrossEntropyLoss(tf.test.TestCase): def setUp(self): tf.compat.v1.reset_default_graph() def test_n_classes_is_none(self): with self.assertRaisesRegexp(ValueError, 'n_classes cannot be None'): head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes=None) def test_n_classes_is_2(self): with self.assertRaisesRegexp(ValueError, 'n_classes must be > 2'): head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes=2) def test_invalid_loss_reduction(self): with self.assertRaisesRegexp( ValueError, r'Invalid loss_reduction: invalid_loss_reduction'): head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction='invalid_loss_reduction') with self.assertRaisesRegexp(ValueError, r'Invalid loss_reduction: none'): head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=tf.compat.v1.losses.Reduction.NONE) def test_loss_fn_arg_labels_missing(self): def _loss_fn(logits): del logits # Unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: labels\. ' r'Given arguments: \(\'logits\',\)'): head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_logits_missing(self): def _loss_fn(labels): del labels # unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: logits\. ' r'Given arguments: \(\'labels\',\)'): head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_features_ok(self): def _loss_fn(labels, logits, features): del labels, logits, features # Unused head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_invalid(self): def _loss_fn(labels, logits, name=None): del labels, logits, name # Unused with self.assertRaisesRegexp(ValueError, r'loss_fn has unexpected args: \[\'name\'\]'): head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_fn=_loss_fn) def test_invalid_logits_shape(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) self.assertEqual(n_classes, head.logits_dimension) # Logits should be shape (batch_size, 3). logits_2x2 = np.array(((45., 44.), (41., 42.),)) # Static shape. with self.assertRaisesRegexp(ValueError, 'logits shape'): head.create_estimator_spec( features={'x': np.array(((30.,), (42.,),))}, mode=ModeKeys.PREDICT, logits=logits_2x2) # Dynamic shape. logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': np.array(((30.,), (42.,),))}, mode=ModeKeys.PREDICT, logits=logits_placeholder) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.predictions[prediction_keys.PredictionKeys.PROBABILITIES].eval( {logits_placeholder: logits_2x2}) def test_invalid_labels_shape(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) self.assertEqual(n_classes, head.logits_dimension) # Logits should be shape (batch_size, 3). # Labels should be shape (batch_size, 1). labels_2x2 = np.array(((45, 44), (41, 42),), dtype=int) logits_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),)) features = {'x': np.array(((42.,),))} # Static shape. with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits_2x3, labels=labels_2x2) # Dynamic shape. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.int64) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder)[0] with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'): training_loss.eval({ logits_placeholder: logits_2x3, labels_placeholder: labels_2x2 }) def test_invalid_labels_type(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) self.assertEqual(n_classes, head.logits_dimension) # Logits should be shape (batch_size, 3). # Labels should be shape (batch_size, 1). labels_2x1 = np.array(((1.,), (1.,),)) logits_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),)) features = {'x': np.array(((42.,),))} # Static shape. with self.assertRaisesRegexp(ValueError, 'Labels dtype'): head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits_2x3, labels=labels_2x1) # Dynamic shape. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) with self.assertRaisesRegexp(ValueError, 'Labels dtype'): head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder) def test_invalid_labels_values(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) self.assertEqual(n_classes, head.logits_dimension) labels_2x1_with_large_id = np.array(((45,), (1,),), dtype=int) labels_2x1_with_negative_id = np.array(((-5,), (1,),), dtype=int) logits_2x3 = np.array(((1., 2., 4.), (1., 2., 3.),)) labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.int64) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.create_loss( features={'x': np.array(((42.,),))}, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder)[0] with self.cached_session(): with self.assertRaisesOpError('Labels must <= n_classes - 1'): training_loss.eval({ labels_placeholder: labels_2x1_with_large_id, logits_placeholder: logits_2x3 }) with self.cached_session(): with self.assertRaisesOpError('Labels must >= 0'): training_loss.eval({ labels_placeholder: labels_2x1_with_negative_id, logits_placeholder: logits_2x3 }) def test_invalid_labels_sparse_tensor(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) self.assertEqual(n_classes, head.logits_dimension) labels_2x1 = tf.sparse.SparseTensor( values=['english', 'italian'], indices=[[0, 0], [1, 0]], dense_shape=[2, 1]) logits_2x3 = np.array(((1., 2., 4.), (1., 2., 3.),)) with self.assertRaisesRegexp(ValueError, 'SparseTensor labels are not supported.'): head.create_loss( features={'x': np.array(((42.,),))}, mode=ModeKeys.EVAL, logits=logits_2x3, labels=labels_2x1) def test_incompatible_labels_shape(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) self.assertEqual(n_classes, head.logits_dimension) # Logits should be shape (batch_size, 3). # Labels should be shape (batch_size, 1). # Here batch sizes are different. values_3x1 = np.array(((1,), (1,), (1,),)) values_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),)) features = {'x': values_2x3} # Static shape. with self.assertRaisesRegexp( ValueError, r'Shape mismatch: The shape of labels \(received \(3,\)\) should equal ' r'the shape of logits except for the last dimension ' r'\(received \(2, 3\)\)\.'): head.create_loss( features=features, mode=ModeKeys.EVAL, logits=values_2x3, labels=values_3x1) # Dynamic shape. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.int64) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder)[0] with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'): training_loss.eval({ labels_placeholder: values_3x1, logits_placeholder: values_2x3 }) def test_name(self): head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, name='foo') self.assertEqual('foo', head.name) def test_predict(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) self.assertEqual(n_classes, head.logits_dimension) logits = [[1., 0., 0.], [0., 0., 1.]] expected_probabilities = [[0.576117, 0.2119416, 0.2119416], [0.2119416, 0.2119416, 0.576117]] expected_class_ids = [[0], [2]] expected_all_class_ids = [[0, 1, 2]] * 2 expected_classes = [[b'0'], [b'2']] expected_all_classes = [[b'0', b'1', b'2']] * 2 expected_export_classes = [[b'0', b'1', b'2']] * 2 spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) self.assertItemsEqual((_DEFAULT_SERVING_KEY, 'predict', 'classification'), spec.export_outputs.keys()) # Assert predictions and export_outputs. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(logits, predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( expected_probabilities, predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose(expected_class_ids, predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual(expected_classes, predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllClose( expected_all_class_ids, predictions[prediction_keys.PredictionKeys.ALL_CLASS_IDS]) self.assertAllEqual( expected_all_classes, predictions[prediction_keys.PredictionKeys.ALL_CLASSES]) self.assertAllClose( expected_probabilities, sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores)) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes)) def test_predict_with_tensor_n_classes(self): n_classes = tf.constant(3, dtype=tf.dtypes.int32) head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) self.assertEqual(n_classes, head.logits_dimension) logits = [[1., 0., 0.], [0., 0., 1.]] expected_probabilities = [[0.576117, 0.2119416, 0.2119416], [0.2119416, 0.2119416, 0.576117]] expected_class_ids = [[0], [2]] expected_all_class_ids = [[0, 1, 2]] * 2 expected_classes = [[b'0'], [b'2']] expected_all_classes = [[b'0', b'1', b'2']] * 2 expected_export_classes = [[b'0', b'1', b'2']] * 2 spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) self.assertItemsEqual((_DEFAULT_SERVING_KEY, 'predict', 'classification'), spec.export_outputs.keys()) # Assert predictions and export_outputs. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(logits, predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( expected_probabilities, predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose(expected_class_ids, predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual(expected_classes, predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllClose( expected_all_class_ids, predictions[prediction_keys.PredictionKeys.ALL_CLASS_IDS]) self.assertAllEqual( expected_all_classes, predictions[prediction_keys.PredictionKeys.ALL_CLASSES]) self.assertAllClose( expected_probabilities, sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores)) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes)) def test_predict_with_vocabulary_list(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[1., 0., 0.], [0., 0., 1.]] expected_classes = [[b'aang'], [b'zuko']] expected_export_classes = [[b'aang', b'iroh', b'zuko']] * 2 spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertAllEqual( expected_classes, sess.run(spec.predictions[prediction_keys.PredictionKeys.CLASSES])) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes)) def test_weight_should_not_impact_prediction(self): n_classes = 3 logits = [[1., 0., 0.], [0., 0., 1.]] expected_probabilities = [[0.576117, 0.2119416, 0.2119416], [0.2119416, 0.2119416, 0.576117]] head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, weight_column='label_weights') weights_2x1 = [[1.], [2.]] spec = head.create_estimator_spec( features={ 'x': np.array(((42,),), dtype=np.int32), 'label_weights': weights_2x1, }, mode=ModeKeys.PREDICT, logits=logits) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) predictions = sess.run(spec.predictions) self.assertAllClose(logits, predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( expected_probabilities, predictions[prediction_keys.PredictionKeys.PROBABILITIES]) def test_eval_create_loss(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = cross_entropy(labels, logits) = [10, 0]. expected_training_loss = 10. # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_eval_create_loss_loss_fn(self): """Tests head.create_loss for eval mode and custom loss_fn.""" loss = np.array([[1.], [2.]], dtype=np.float32) logits_input = np.array([[-10., 10., 0.], [-15., 10., 0]], dtype=np.float32) labels_input = np.array([[1], [2]], dtype=np.int64) def _loss_fn(labels, logits): check_labels = tf.debugging.Assert( tf.reduce_all(tf.math.equal(labels, labels_input)), data=[labels]) check_logits = tf.debugging.Assert( tf.reduce_all(tf.math.equal(logits, logits_input)), data=[logits]) with tf.control_dependencies([check_labels, check_logits]): return tf.constant(loss) head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_fn=_loss_fn) actual_training_loss = head.create_loss( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=logits_input, labels=labels_input)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(np.sum(loss), actual_training_loss.eval()) def test_eval_create_loss_loss_fn_wrong_shape(self): """Tests custom loss_fn that returns Tensor of unexpected shape.""" loss = np.array([1., 2.], dtype=np.float32) def _loss_fn(labels, logits): del labels, logits # Unused return tf.constant(loss) head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_fn=_loss_fn) logits = np.array([[-10., 10., 0.], [-15., 10., 0.]], dtype=np.float32) labels = np.array([[1], [2]], dtype=np.int64) actual_training_loss = head.create_loss( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] ' r'\[logits_shape: \] \[2 3\] \[loss_shape: \] \[2\]'): actual_training_loss.eval() def test_eval_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3) with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32), labels=None) def test_eval(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10. expected_loss = 10. # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss / 2, keys.ACCURACY: 0.5, # 1 of 2 labels is correct. } # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol) self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_eval_metric_ops_with_head_name(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, name='some_multiclass_head') logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) expected_metric_keys = [ '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS_MEAN), '{}/some_multiclass_head'.format(metric_keys.MetricKeys.ACCURACY) ] self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys()) def test_eval_with_regularization_losses(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, loss_reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE) logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. expected_unregularized_loss = 5. expected_regularized_loss = ( expected_unregularized_loss + expected_regularization_loss) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_unregularized_loss, keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.ACCURACY: 0.5, # 1 of 2 labels is correct. } # Assert predictions, loss, and metrics. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_regularized_loss, loss, rtol=tol, atol=tol) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol) self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_eval_with_label_vocabulary_create_loss(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[10., 0, 0], [0, 10, 0]] labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # loss = cross_entropy(labels, logits) = [10, 0]. expected_training_loss = 10. training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_eval_with_label_vocabulary(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[10., 0, 0], [0, 10, 0]] labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10. expected_loss = 10. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss / 2, keys.ACCURACY: 0.5, # 1 of 2 labels is correct. } tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol) self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_weighted_multi_example_eval(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, weight_column='label_weights') # Create estimator spec. logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32) labels = np.array(((1,), (2,), (2,)), dtype=np.int64) weights_3x1 = np.array(((1.,), (2.,), (3.,)), dtype=np.float64) # loss = sum(cross_entropy(labels, logits) * [1, 2, 3]) # = sum([10, 10, 0] * [1, 2, 3]) = 30 expected_loss = 30. spec = head.create_estimator_spec( features={ 'x': np.array(((42,),), dtype=np.int32), 'label_weights': weights_3x1, }, mode=ModeKeys.EVAL, logits=logits, labels=labels) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss / np.sum(weights_3x1), # Weighted accuracy is 1 * 3.0 / sum weights = 0.5 keys.ACCURACY: 0.5, } # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert loss, and metrics. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol) self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_train_create_loss(self): head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3) logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # unreduced_loss = cross_entropy(labels, logits) = [10, 0]. expected_unreduced_loss = [[10.], [0.]] # Weights default to 1. expected_weights = 1. # training_loss = 1 * 10 + 1 * 0 expected_training_loss = 10. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels) tol = 1e-2 with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) self.assertAllClose( expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights) def test_train_create_loss_loss_reduction(self): """Tests create_loss with loss_reduction.""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS) logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # unreduced_loss = cross_entropy(labels, logits) = [10, 0]. expected_unreduced_loss = [[10.], [0.]] # Weights default to 1. expected_weights = 1. # training_loss = 1 * 10 + 1 * 0 / num_nonzero_weights expected_training_loss = 10. / 2. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels) tol = 1e-2 with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) self.assertAllClose( expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights) def test_train_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3) def _no_op_train_fn(loss): del loss return tf.no_op() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32), labels=None, train_op_fn=_no_op_train_fn) def test_train(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10. expected_loss = 10. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, }, summary_str, tol) def test_train_with_optimizer(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} expected_train_result = 'my_train_op' class _Optimizer(object): def minimize(self, loss, global_step): del global_step return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10. expected_loss = 10. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=_Optimizer()) tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) def test_train_with_update_ops(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) with tf.Graph().as_default(): w = tf.Variable(1) update_op = w.assign_add(1) tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.UPDATE_OPS, update_op) t = tf.Variable('') expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return t.assign(expected_train_result) spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32), labels=np.array(((1,), (1,)), dtype=np.int64), train_op_fn=_train_op_fn) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) sess.run(spec.train_op) w_value, t_value = sess.run([w, t]) self.assertEqual(2, w_value) self.assertEqual(expected_train_result, t_value) def test_train_summaries_with_head_name(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, name='some_multiclass_head') logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10. expected_loss = 10. features = {'x': np.array(((42,),), dtype=np.int32)} def _train_op_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert summaries. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) summary_str = sess.run(spec.scaffold.summary_op) _assert_simple_summaries( self, { '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS): expected_loss, '{}/some_multiclass_head'.format( metric_keys.MetricKeys.LOSS_MEAN): expected_loss / 2, }, summary_str, tol) def test_train_with_regularization_losses(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, loss_reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE) logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. # loss = unregularized_loss + regularization_loss = 7. expected_loss = 7. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses) # Assert predictions, loss, train_op, and summaries. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_REGULARIZATION: (expected_regularization_loss), }, summary_str, tol) def test_train_one_dim_create_loss(self): """Tests create_loss with 1D labels and weights (shape [batch_size]).""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='label_weights') logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32) labels_rank_1 = np.array((1, 2, 2,), dtype=np.int64) weights_rank_1 = np.array((1., 2., 3.,), dtype=np.float64) features = { 'x': np.array(((42,),), dtype=np.float32), 'label_weights': weights_rank_1 } # unreduced_loss = cross_entropy(labels, logits) = [10, 10, 0]. expected_unreduced_loss = [[10.], [10.], [0.]] # weights are reshaped to [3, 1] to match logits. expected_weights = [[1.], [2.], [3.]] # training_loss = 1 * 10 + 2 * 10 + 3 * 0 = 30. expected_training_loss = 30. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1) tol = 1e-2 with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) self.assertAllClose( expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights.eval()) def test_train_one_dim(self): """Tests train with 1D labels and weights (shape [batch_size]).""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='label_weights') logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32) labels_rank_1 = np.array((1, 2, 2,), dtype=np.int64) weights_rank_1 = np.array((1., 2., 3.,), dtype=np.float64) self.assertEqual((3,), labels_rank_1.shape) self.assertEqual((3,), weights_rank_1.shape) expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) # loss = sum(cross_entropy(labels, logits) * [1, 2, 3]) # = sum([10, 10, 0] * [1, 2, 3]) = 30 expected_loss = 30. features = { 'x': np.array(((42,),), dtype=np.float32), 'label_weights': weights_rank_1 } spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1, train_op_fn=_train_op_fn) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: (expected_loss / np.sum(weights_rank_1)), }, summary_str, tol) def test_train_with_vocabulary_create_loss(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[10., 0, 0], [0, 10, 0]] labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # loss = cross_entropy(labels, logits) = [10, 0]. expected_training_loss = 10. training_loss = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_train_with_vocabulary(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[10., 0, 0], [0, 10, 0]] labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} def _train_op_fn(loss): del loss return tf.no_op() # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10. expected_loss = 10. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) loss = sess.run(spec.loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) def test_weighted_multi_example_train(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes, weight_column='label_weights') # Create estimator spec. logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32) labels = np.array(((1,), (2,), (2,)), dtype=np.int64) weights_3x1 = np.array(((1.,), (2.,), (3.,)), dtype=np.float64) expected_train_result = 'my_train_op' # loss = sum(cross_entropy(labels, logits) * [1, 2, 3]) # = sum([10, 10, 0] * [1, 2, 3]) = 30 expected_loss = 30. def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) spec = head.create_estimator_spec( features={ 'x': np.array(((42,),), dtype=np.float32), 'label_weights': weights_3x1, }, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, # loss mean = sum(cross_entropy(labels, logits) * [1,2,3]) / (1+2+3) # = sum([10, 10, 0] * [1, 2, 3]) / 6 = 30 / 6 metric_keys.MetricKeys.LOSS_MEAN: expected_loss / np.sum(weights_3x1), }, summary_str, tol) def test_multi_dim_weighted_train_create_loss(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # unreduced_loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]]. expected_unreduced_loss = [[[0.], [12.]], [[0.], [15.]]] # weights are reshaped to [2, 2, 1] to match logits. expected_weights = [[[1.], [1.5]], [[2.], [2.5]]] # training_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5 expected_training_loss = 55.5 training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels) tol = 1e-2 with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) self.assertAllClose( expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights.eval()) def test_multi_dim_weighted_train(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]]. # weighted_sum_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5 expected_loss = 55.5 spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert predictions, loss, train_op, and summaries. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) def test_multi_dim_train_weights_wrong_inner_dim(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 1].""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[1.], [2.]], dtype=np.float32) def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'): spec.loss.eval() def test_multi_dim_train_weights_wrong_outer_dim(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 3].""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[[1., 1.1, 1.2], [1.5, 1.6, 1.7]], [[2., 2.1, 2.2], [2.5, 2.6, 2.7]]]) weights_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights_placeholder}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 3\]'): spec.loss.eval({weights_placeholder: weights}) def test_multi_dim_weighted_eval(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]]. # weighted_sum_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5 expected_loss = 55.5 # Create estimator spec. spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.EVAL, logits=logits, labels=labels) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss / np.sum(weights), keys.ACCURACY: (1. * 1. + 1.5 * 0. + 2. * 1. + 2.5 * 0.) / np.sum(weights), } # Assert predictions, loss, and metrics. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol) self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) @test_util.run_v1_only('Tests v1 only symbols') class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(tf.test.TestCase): def setUp(self): tf.compat.v1.reset_default_graph() def test_threshold_too_small(self): with self.assertRaisesRegexp(ValueError, r'thresholds not in \(0, 1\)'): head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( thresholds=(0., 0.5)) def test_threshold_too_large(self): with self.assertRaisesRegexp(ValueError, r'thresholds not in \(0, 1\)'): head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( thresholds=(0.5, 1.)) def test_invalid_loss_reduction(self): with self.assertRaisesRegexp( ValueError, r'Invalid loss_reduction: invalid_loss_reduction'): head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction='invalid_loss_reduction') with self.assertRaisesRegexp(ValueError, r'Invalid loss_reduction: none'): head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=tf.compat.v1.losses.Reduction.NONE) def test_loss_fn_arg_labels_missing(self): def _loss_fn(logits): del logits # Unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: labels\. ' r'Given arguments: \(\'logits\',\)'): head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_fn=_loss_fn) def test_loss_fn_arg_logits_missing(self): def _loss_fn(labels): del labels # unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: logits\. ' r'Given arguments: \(\'labels\',\)'): head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_fn=_loss_fn) def test_loss_fn_arg_features_ok(self): def _loss_fn(labels, logits, features): del labels, logits, features # Unused head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_fn=_loss_fn) def test_loss_fn_arg_invalid(self): def _loss_fn(labels, logits, name=None): del labels, logits, name # Unused with self.assertRaisesRegexp(ValueError, r'loss_fn has unexpected args: \[\'name\'\]'): head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_fn=_loss_fn) def test_invalid_logits_shape(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() self.assertEqual(1, head.logits_dimension) # Logits should be shape (batch_size, 1). logits_2x2 = np.array(((45., 44.), (41., 42.),)) # Static shape. with self.assertRaisesRegexp(ValueError, 'logits shape'): head.create_estimator_spec( features={'x': np.array(((42.,),))}, mode=ModeKeys.PREDICT, logits=logits_2x2) # Dynamic shape. logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': np.array(((42.,),))}, mode=ModeKeys.PREDICT, logits=logits_placeholder) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.predictions[prediction_keys.PredictionKeys.PROBABILITIES].eval( {logits_placeholder: logits_2x2}) def test_invalid_labels_shape(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() self.assertEqual(1, head.logits_dimension) # Labels and logits should be shape (batch_size, 1). labels_2x2 = np.array(((45., 44.), (41., 42.),)) logits_2x1 = np.array(((45.,), (41.,),)) # Static shape. with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': np.array(((42.,),))}, mode=ModeKeys.EVAL, logits=logits_2x1, labels=labels_2x2) # Dynamic shape. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.create_loss( features={'x': np.array(((42.,),))}, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder)[0] with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'): training_loss.eval({ logits_placeholder: logits_2x1, labels_placeholder: labels_2x2 }) def test_incompatible_labels_shape(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() self.assertEqual(1, head.logits_dimension) # Both logits and labels should be shape (batch_size, 1). values_2x1 = np.array(((0.,), (1.,),)) values_3x1 = np.array(((0.,), (1.,), (0.,),)) # Static shape. with self.assertRaisesRegexp(ValueError, 'logits and labels must have the same shape'): head.create_loss( features={'x': values_2x1}, mode=ModeKeys.EVAL, logits=values_2x1, labels=values_3x1) with self.assertRaisesRegexp(ValueError, 'logits and labels must have the same shape'): head.create_loss( features={'x': values_2x1}, mode=ModeKeys.EVAL, logits=values_3x1, labels=values_2x1) # Dynamic shape. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.create_loss( features={'x': values_2x1}, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder)[0] with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[3 1\] \[labels_shape: \] \[2 1\]'): training_loss.eval({ labels_placeholder: values_2x1, logits_placeholder: values_3x1 }) with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'): training_loss.eval({ labels_placeholder: values_3x1, logits_placeholder: values_2x1 }) def test_name(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( name='foo') self.assertEqual('foo', head.name) def test_predict(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = [[0.3], [-0.4]] expected_logistics = [[0.574443], [0.401312]] expected_probabilities = [[0.425557, 0.574443], [0.598688, 0.401312]] expected_class_ids = [[1], [0]] expected_all_class_ids = [[0, 1]] * 2 expected_classes = [[b'1'], [b'0']] expected_all_classes = [[b'0', b'1']] * 2 expected_export_classes = [[b'0', b'1']] * 2 spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) # Assert spec contains expected tensors. self.assertIsNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNone(spec.train_op) self.assertItemsEqual( ('classification', 'regression', 'predict', _DEFAULT_SERVING_KEY), spec.export_outputs.keys()) _assert_no_hooks(self, spec) # Assert predictions. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(logits, predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose(expected_logistics, predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( expected_probabilities, predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose(expected_class_ids, predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual(expected_classes, predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllClose( expected_all_class_ids, predictions[prediction_keys.PredictionKeys.ALL_CLASS_IDS]) self.assertAllEqual( expected_all_classes, predictions[prediction_keys.PredictionKeys.ALL_CLASSES]) self.assertAllClose( expected_probabilities, sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores)) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes)) self.assertAllClose(expected_logistics, sess.run(spec.export_outputs['regression'].value)) def test_predict_with_vocabulary_list(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( label_vocabulary=['aang', 'iroh']) logits = [[1.], [0.]] expected_classes = [[b'iroh'], [b'aang']] spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertAllEqual( expected_classes, sess.run(spec.predictions[prediction_keys.PredictionKeys.CLASSES])) def test_eval_create_loss(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = cross_entropy(labels, logits) = [0, 41]. expected_training_loss = 41. # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_eval_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=np.array(((45,), (-41,),), dtype=np.float32), labels=None) def test_eval(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) keys = metric_keys.MetricKeys expected_metrics = { # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41 # loss_mean = loss/2 = 41./2 = 20.5 keys.LOSS_MEAN: 20.5, keys.ACCURACY: 1. / 2, keys.PRECISION: 1., keys.RECALL: 1. / 2, keys.PREDICTION_MEAN: 1. / 2, keys.LABEL_MEAN: 2. / 2, keys.ACCURACY_BASELINE: 2. / 2, keys.AUC: 0., keys.AUC_PR: 1., } # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(41., loss) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics) self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_eval_metric_ops_with_head_name(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( name='some_binary_head') logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) expected_metric_keys = [ '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS_MEAN), '{}/some_binary_head'.format(metric_keys.MetricKeys.ACCURACY), '{}/some_binary_head'.format(metric_keys.MetricKeys.PRECISION), '{}/some_binary_head'.format(metric_keys.MetricKeys.RECALL), '{}/some_binary_head'.format(metric_keys.MetricKeys.PREDICTION_MEAN), '{}/some_binary_head'.format(metric_keys.MetricKeys.LABEL_MEAN), '{}/some_binary_head'.format(metric_keys.MetricKeys.ACCURACY_BASELINE), '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC), '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC_PR), ] self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys()) def test_eval_with_regularization_losses(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE) logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(0, 41) / 2 = 20.5 expected_unregularized_loss = 20.5 expected_regularized_loss = ( expected_unregularized_loss + expected_regularization_loss) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_unregularized_loss, keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.ACCURACY: 1. / 2, keys.PRECISION: 1., keys.RECALL: 1. / 2, keys.PREDICTION_MEAN: 1. / 2, keys.LABEL_MEAN: 2. / 2, keys.ACCURACY_BASELINE: 2. / 2, keys.AUC: 0., keys.AUC_PR: 1., } # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_regularized_loss, loss) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics) self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_eval_with_vocabulary_list_create_loss(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( label_vocabulary=['aang', 'iroh']) logits = np.array(((45,), (-41,),), dtype=np.float32) labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(41., training_loss.eval()) def test_eval_with_vocabulary_list(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( label_vocabulary=['aang', 'iroh']) logits = np.array(((45,), (-41,),), dtype=np.float32) labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} sess.run(update_ops) self.assertAllClose(1. / 2, value_ops[metric_keys.MetricKeys.ACCURACY].eval()) def test_eval_with_thresholds_create_loss(self): thresholds = [0.25, 0.5, 0.75] head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( thresholds=thresholds) logits = np.array(((-1,), (1,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # probabilities[i] = 1/(1 + exp(-logits[i])) => # probabilities = [1/(1 + exp(1)), 1/(1 + exp(-1))] = [0.269, 0.731] # loss = -ln(probabilities[label[i]])) = [-ln(0.269), -ln(0.731)] # = [1.31304389, 0.31334182] # weighted sum loss = 1.62638571 expected_training_loss = 1.62638571 # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_eval_with_thresholds(self): thresholds = [0.25, 0.5, 0.75] head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( thresholds=thresholds) logits = np.array(((-1,), (1,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) # probabilities[i] = 1/(1 + exp(-logits[i])) => # probabilities = [1/(1 + exp(1)), 1/(1 + exp(-1))] = [0.269, 0.731] # loss = -sum(ln(probabilities[label[i]])) = -ln(0.269) -ln(0.731) # = 1.62652338 keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: 1.62652338 / 2., keys.ACCURACY: 1. / 2, keys.PRECISION: 1., keys.RECALL: .5, keys.PREDICTION_MEAN: 1. / 2, keys.LABEL_MEAN: 2. / 2, keys.ACCURACY_BASELINE: 2. / 2, keys.AUC: 0., keys.AUC_PR: 1., keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 1., keys.PRECISION_AT_THRESHOLD % thresholds[0]: 1., keys.RECALL_AT_THRESHOLD % thresholds[0]: 1., keys.ACCURACY_AT_THRESHOLD % thresholds[1]: .5, keys.PRECISION_AT_THRESHOLD % thresholds[1]: 1., keys.RECALL_AT_THRESHOLD % thresholds[1]: .5, keys.ACCURACY_AT_THRESHOLD % thresholds[2]: 0., keys.PRECISION_AT_THRESHOLD % thresholds[2]: 0., keys.RECALL_AT_THRESHOLD % thresholds[2]: 0., } self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(1.62652338, loss) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol) self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, atol=tol, rtol=tol) def test_train_create_loss(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} # unreduced_loss = cross_entropy(labels, logits) = [0, 41] expected_unreduced_loss = [[0.], [41.]] # weights default to 1. expected_weights = 1. # training loss = 1 * 0 + 1 * 41 expected_training_loss = 41. # Create loss. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval()) self.assertAllClose(expected_weights, actual_weights) def test_train_create_loss_loss_reduction(self): """Tests create_loss with loss_reduction.""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS) logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} # unreduced_loss = cross_entropy(labels, logits) = [0, 41] expected_unreduced_loss = [[0.], [41.]] # weights default to 1. expected_weights = 1. # training loss = (1 * 0 + 1 * 41) / num_nonzero_weights expected_training_loss = 41. / 2. # Create loss. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval()) self.assertAllClose(expected_weights, actual_weights) def test_eval_create_loss_loss_fn(self): """Tests head.create_loss for eval mode and custom loss_fn.""" loss = np.array([[1.], [2.]], dtype=np.float32) logits_input = np.array([[-10.], [10.]], dtype=np.float32) labels_input = np.array([[1], [0]], dtype=np.int64) def _loss_fn(labels, logits): check_labels = tf.debugging.Assert( tf.reduce_all(tf.math.equal(labels, labels_input)), data=[labels]) check_logits = tf.debugging.Assert( tf.reduce_all(tf.math.equal(logits, logits_input)), data=[logits]) with tf.control_dependencies([check_labels, check_logits]): return tf.constant(loss) head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_fn=_loss_fn) actual_training_loss = head.create_loss( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=logits_input, labels=labels_input)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(np.sum(loss), actual_training_loss.eval()) def test_eval_create_loss_loss_fn_wrong_shape(self): """Tests custom loss_fn that returns Tensor of unexpected shape.""" loss = np.array([1., 2.], dtype=np.float32) def _loss_fn(labels, logits): del labels, logits # Unused return tf.constant(loss) head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_fn=_loss_fn) logits = np.array([[-10.], [10.]], dtype=np.float32) labels = np.array([[1], [0]], dtype=np.int64) actual_training_loss = head.create_loss( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] ' r'\[logits_shape: \] \[2 1\] \[loss_shape: \] \[2\]'): actual_training_loss.eval() def test_train_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() def _no_op_train_fn(loss): del loss return tf.no_op() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (-41,),), dtype=np.float32), labels=None, train_op_fn=_no_op_train_fn) def test_train(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42,),), dtype=np.float32)} # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41 expected_loss = 41. def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, # loss_mean = loss/2 = 41/2 = 20.5 metric_keys.MetricKeys.LOSS_MEAN: 20.5, }, summary_str) def test_train_with_optimizer(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42,),), dtype=np.float32)} # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41 expected_loss = 41. class _Optimizer(object): def minimize(self, loss, global_step): del global_step with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=_Optimizer()) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) def test_train_with_update_ops(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() with tf.Graph().as_default(): w = tf.Variable(1) update_op = w.assign_add(1) tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.UPDATE_OPS, update_op) t = tf.Variable('') expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return t.assign(expected_train_result) spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (-41,),), dtype=np.float32), labels=np.array(((1,), (1,),), dtype=np.float64), train_op_fn=_train_op_fn) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) sess.run(spec.train_op) w_value, t_value = sess.run([w, t]) self.assertEqual(2, w_value) self.assertEqual(expected_train_result, t_value) def test_train_summaries_with_head_name(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( name='some_binary_head') logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41 expected_loss = 41. def _train_op_fn(loss): del loss return tf.no_op() # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) summary_str = sess.run(spec.scaffold.summary_op) _assert_simple_summaries( self, { '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS): expected_loss, # loss_mean = loss/2 = 41/2 = 20.5 '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS_MEAN): 20.5, }, summary_str) def test_train_with_regularization_losses(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE) logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42,),), dtype=np.float32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(0, 41) / 2 = 20.5 # loss = unregularized_loss + regularization_loss = 7. expected_loss = 22.5 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_REGULARIZATION: (expected_regularization_loss), }, summary_str) def test_float_labels_invalid_values(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[1.2], [0.4]], dtype=np.float32) features = {'x': np.array([[42]], dtype=np.float32)} with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, r'Labels must <= n_classes - 1'): training_loss = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels)[0] def test_float_labels_train_create_loss(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[0.8], [0.4]], dtype=np.float32) features = {'x': np.array([[42]], dtype=np.float32)} # loss = cross_entropy(labels, logits) # = -label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]) # = [-0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)), # -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))] # = [0.57407698418, 0.67435524446] # weighted sum loss = 0.57407698418 + 0.67435524446 expected_training_loss = 1.24843222864 # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_float_labels_train(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[0.8], [0.4]], dtype=np.float32) expected_train_result = b'my_train_op' features = {'x': np.array([[42]], dtype=np.float32)} # loss = sum(cross_entropy(labels, logits)) # = sum(-label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i])) # = -0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)) # -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3)) # = 1.2484322 expected_loss = 1.2484322 def _train_op_fn(loss): with tf.control_dependencies((dnn_testing_utils_v1.assert_close( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32)),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAlmostEqual(expected_loss, loss, delta=1.e-5) self.assertEqual(expected_train_result, train_result) def test_float_labels_eval_create_loss(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[0.8], [0.4]], dtype=np.float32) features = {'x': np.array([[42]], dtype=np.float32)} # loss = cross_entropy(labels, logits) # = -label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]) # = [-0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)), # -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))] # = [0.57407698418, 0.67435524446] # weighted sum loss = 0.57407698418 + 0.67435524446 expected_training_loss = 1.24843222864 # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_float_labels_eval(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[0.8], [0.4]], dtype=np.float32) features = {'x': np.array([[42]], dtype=np.float32)} # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) # loss = sum(cross_entropy(labels, logits)) # = sum(-label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i])) # = -0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)) # -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3)) # = 1.2484322 expected_loss = 1.2484322 # Assert loss. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAlmostEqual(expected_loss, loss, delta=1.e-5) self.assertAlmostEqual(expected_loss / 2., metrics[metric_keys.MetricKeys.LOSS_MEAN]) def test_weighted_multi_example_predict(self): """3 examples, 1 batch.""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.int32) spec = head.create_estimator_spec( features={ 'x': np.array(((42,), (43,), (44,)), dtype=np.int32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32), }, mode=ModeKeys.PREDICT, logits=logits) # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) predictions = sess.run(spec.predictions) self.assertAllClose( logits.astype(np.float32), predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( _sigmoid(logits).astype(np.float32), predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [[0., 1.], [1., 0.], [0., 1.]], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([[1], [0], [1]], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual([[b'1'], [b'0'], [b'1']], predictions[prediction_keys.PredictionKeys.CLASSES]) def test_weighted_multi_example_eval(self): """3 examples, 1 batch.""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.int32) spec = head.create_estimator_spec( features={ 'x': np.array(((42,), (43,), (44,)), dtype=np.int32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32), }, mode=ModeKeys.EVAL, logits=logits, labels=np.array(((1,), (1,), (0,)), dtype=np.int32)) # label_mean = (1*1 + .1*1 + 1.5*0)/(1 + .1 + 1.5) = 1.1/2.6 # = .42307692307 expected_label_mean = .42307692307 keys = metric_keys.MetricKeys expected_metrics = { # losses = label_weights*cross_entropy(labels, logits) # = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66) # loss = sum(losses) = 1 + 4.1 + 66 = 70.1 # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5) # = 70.1/2.6 = 26.9615384615 keys.LOSS_MEAN: 26.9615384615, # accuracy = (1*1 + .1*0 + 1.5*0)/(1 + .1 + 1.5) = 1/2.6 = .38461538461 keys.ACCURACY: .38461538461, keys.PRECISION: 1. / 2.5, keys.RECALL: 1. / 1.1, # prediction_mean = (1*1 + .1*0 + 1.5*1)/(1 + .1 + 1.5) = 2.5/2.6 # = .96153846153 keys.PREDICTION_MEAN: .96153846153, keys.LABEL_MEAN: expected_label_mean, keys.ACCURACY_BASELINE: 1 - expected_label_mean, keys.AUC: .45454565, keys.AUC_PR: .6737757325172424, } # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(70.1, loss) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics) self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_train_one_dim_create_loss(self): """Tests create_loss with 1D labels and weights (shape [batch_size]).""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.float32) labels_rank_1 = np.array((1., 1., 0.,)) weights_rank_1 = np.array(((1., .1, 1.5,)), dtype=np.float64) features = { 'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32), 'label_weights': weights_rank_1, } # unreduced_loss = cross_entropy(labels, logits) = [0, 41, 44] expected_unreduced_loss = [[0.], [41.], [44.]] # weights are reshaped to [3, 1] to match logits. expected_weights = [[1.], [.1], [1.5]] # training loss = 1 * 0 + .1 * 41 + 1.5 * 44 expected_training_loss = 70.1 # Create loss. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) self.assertAllClose( expected_unreduced_loss, unreduced_loss.eval(), rtol=1e-2, atol=1e-2) self.assertAllClose(expected_weights, actual_weights.eval()) def test_train_one_dim(self): """Tests train with 1D labels and weights (shape [batch_size]).""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.float32) labels_rank_1 = np.array((1., 1., 0.,)) weights_rank_1 = np.array(((1., .1, 1.5,)), dtype=np.float64) self.assertEqual((3,), labels_rank_1.shape) self.assertEqual((3,), weights_rank_1.shape) features = { 'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32), 'label_weights': weights_rank_1, } expected_train_result = b'my_train_op' # losses = label_weights*cross_entropy(labels, logits) # = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66) # loss = sum(losses) = 1 + 4.1 + 66 = 70.1 expected_loss = 70.1 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1, train_op_fn=_train_op_fn) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertIsNotNone(spec.train_op) # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5) # = 70.1/2.6 = 26.9615384615 metric_keys.MetricKeys.LOSS_MEAN: 26.9615384615, }, summary_str) def test_weighted_multi_example_train(self): """3 examples, 1 batch.""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.float32) expected_train_result = b'my_train_op' # losses = label_weights*cross_entropy(labels, logits) # = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66) # loss = sum(losses) = 1 + 4.1 + 66 = 70.1 expected_loss = 70.1 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features={ 'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float64), }, mode=ModeKeys.TRAIN, logits=logits, labels=np.array(((1.,), (1.,), (0.,))), train_op_fn=_train_op_fn) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertIsNotNone(spec.train_op) # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5) # = 70.1/2.6 = 26.9615384615 metric_keys.MetricKeys.LOSS_MEAN: 26.9615384615, }, summary_str) def test_multi_dim_weighted_train_create_loss(self): """Logits and labels of shape [2, 2, 1], weights [2, 2].""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # unreduced_loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]]. expected_unreduced_loss = [[[10.], [0.]], [[0.], [12.]]] # Weights are reshaped to [2, 2, 1] to match logits. expected_weights = [[[1.], [1.5]], [[2.], [2.5]]] # training_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40 expected_training_loss = 40. # Create loss. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels) tol = 1e-2 with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) self.assertAllClose( expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights.eval()) def test_multi_dim_weighted_train(self): """Logits and labels of shape [2, 2, 1], weights [2, 2].""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]]. # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40 expected_loss = 40. expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) # Create estimator spec. spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert predictions, loss, train_op, and summaries. tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) def test_multi_dim_train_weights_wrong_inner_dim(self): """Logits and labels of shape [2, 2, 1], weights [2, 1].""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[1.], [2.]], dtype=np.float32) def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \] \[2 2 1\] \[weights_shape: \] \[2 1\]'): spec.loss.eval() def test_multi_dim_train_weights_wrong_outer_dim(self): """Logits and labels of shape [2, 2, 1], weights [2, 2, 2].""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights_placeholder}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \]\s\[2 2 1\]\s\[weights_shape: \]\s\[2 2 2\]'): spec.loss.eval({ weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]], [[2., 2.1], [2.5, 2.6]]]) }) def test_multi_dim_weighted_eval(self): """Logits and labels of shape [2, 2, 1], weights [2, 2].""" head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]]. # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40 expected_loss = 40. # Create estimator spec. spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.EVAL, logits=logits, labels=labels) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss / np.sum(weights), keys.ACCURACY: (1. * 0. + 1.5 * 1. + 2. * 1. + 2.5 * 0.) / np.sum(weights), keys.PRECISION: 2.0 / 3.0, keys.RECALL: 2.0 / 4.5, keys.PREDICTION_MEAN: (1. * 1 + 1.5 * 0 + 2. * 1 + 2.5 * 0) / np.sum(weights), keys.LABEL_MEAN: (1. * 0 + 1.5 * 0 + 2. * 1 + 2.5 * 1) / np.sum(weights), keys.ACCURACY_BASELINE: (1. * 0 + 1.5 * 0 + 2. * 1 + 2.5 * 1) / np.sum(weights), # We cannot reliably calculate AUC with only 4 data points, but the # values should not change because of backwards-compatibility. keys.AUC: 0.5222, keys.AUC_PR: 0.7341, } tol = 1e-2 with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, metrics = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol) self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) @test_util.run_v1_only('Tests v1 only symbols') class RegressionHead(tf.test.TestCase): def setUp(self): tf.compat.v1.reset_default_graph() def test_invalid_label_dimension(self): with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'): head_lib._regression_head(label_dimension=-1) with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'): head_lib._regression_head(label_dimension=0) def test_invalid_loss_reduction(self): with self.assertRaisesRegexp( ValueError, r'Invalid loss_reduction: invalid_loss_reduction'): head_lib._regression_head(loss_reduction='invalid_loss_reduction') with self.assertRaisesRegexp(ValueError, r'Invalid loss_reduction: none'): head_lib._regression_head( loss_reduction=tf.compat.v1.losses.Reduction.NONE) def test_loss_fn_arg_labels_missing(self): def _loss_fn(logits): del logits # Unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: labels\. ' r'Given arguments: \(\'logits\',\)'): head_lib._regression_head(loss_fn=_loss_fn) def test_loss_fn_arg_logits_missing(self): def _loss_fn(labels): del labels # unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: logits\. ' r'Given arguments: \(\'labels\',\)'): head_lib._regression_head(loss_fn=_loss_fn) def test_loss_fn_arg_features_ok(self): def _loss_fn(labels, logits, features): del labels, logits, features # Unused head_lib._regression_head(loss_fn=_loss_fn) def test_loss_fn_arg_invalid(self): def _loss_fn(labels, logits, name=None): del labels, logits, name # Unused with self.assertRaisesRegexp(ValueError, r'loss_fn has unexpected args: \[\'name\'\]'): head_lib._regression_head(loss_fn=_loss_fn) def test_invalid_logits(self): head = head_lib._regression_head(label_dimension=3) self.assertEqual(3, head.logits_dimension) logits_1d = np.array(((45.,), (41.,),)) # Static shape. with self.assertRaisesRegexp(ValueError, 'logits shape'): head.create_estimator_spec( features={'x': np.array(((42.,),))}, mode=ModeKeys.PREDICT, logits=logits_1d) # Dynamic shape. logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': np.array(((42.,),))}, mode=ModeKeys.PREDICT, logits=logits_placeholder) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.predictions[prediction_keys.PredictionKeys.PREDICTIONS].eval( {logits_placeholder: logits_1d}) def test_incompatible_labels_eval(self): head = head_lib._regression_head(label_dimension=3) self.assertEqual(3, head.logits_dimension) values_3d = np.array(((45., 46., 47.), (41., 42., 43.),)) values_1d = np.array(((43.,), (44.,),)) # Static shape. with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': values_1d}, mode=ModeKeys.EVAL, logits=values_3d, labels=values_1d) with self.assertRaisesRegexp(ValueError, 'logits shape'): head.create_estimator_spec( features={'x': values_3d}, labels=values_3d, mode=ModeKeys.EVAL, logits=values_1d, train_op_fn=None) # Dynamic shape. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': values_1d}, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.loss.eval({ labels_placeholder: values_3d, logits_placeholder: values_1d }) training_loss = head.create_loss( features={'x': values_1d}, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder)[0] with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'): training_loss.eval({ labels_placeholder: values_1d, logits_placeholder: values_3d }) def test_incompatible_labels_train(self): head = head_lib._regression_head(label_dimension=3) self.assertEqual(3, head.logits_dimension) values_3d = np.array(((45., 46., 47.), (41., 42., 43.),)) values_1d = np.array(((43.,), (44.,),)) # Static shape. with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': values_1d}, mode=ModeKeys.TRAIN, logits=values_3d, labels=values_1d) with self.assertRaisesRegexp(ValueError, 'logits shape'): head.create_estimator_spec( features={'x': values_3d}, mode=ModeKeys.TRAIN, logits=values_1d, labels=values_3d, train_op_fn=lambda x: x) # Dynamic shape. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': values_1d}, mode=ModeKeys.TRAIN, logits=logits_placeholder, labels=labels_placeholder, train_op_fn=lambda x: x) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.loss.eval({ labels_placeholder: values_3d, logits_placeholder: values_1d }) training_loss = head.create_loss( features={'x': values_1d}, mode=ModeKeys.TRAIN, logits=logits_placeholder, labels=labels_placeholder)[0] with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'): training_loss.eval({ labels_placeholder: values_1d, logits_placeholder: values_3d }) def test_name(self): head = head_lib._regression_head(name='foo') self.assertEqual('foo', head.name) def test_predict(self): head = head_lib._regression_head() self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.int32) spec = head.create_estimator_spec( features={'x': np.array(((42.,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) # Assert spec contains expected tensors. prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[prediction_key].dtype) self.assertIsNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNone(spec.train_op) default_serving_key = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY self.assertItemsEqual((default_serving_key, 'predict', 'regression'), spec.export_outputs.keys()) _assert_no_hooks(self, spec) # Assert predictions. with self.cached_session(): _initialize_variables(self, spec.scaffold) self.assertAllClose(logits, spec.predictions[prediction_key].eval()) self.assertAllClose(logits, spec.export_outputs[default_serving_key].value.eval()) self.assertAllClose(logits, spec.export_outputs['regression'].value.eval()) self.assertAllClose( logits, spec.export_outputs['predict'].outputs['predictions'].eval()) def test_predict_with_inverse_link_fn(self): def _inverse_link_fn(logits): return logits - 10. head = head_lib._regression_head(inverse_link_fn=_inverse_link_fn) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.int32) expected_predictions = np.array(((35,), (31,),), dtype=np.int32) spec = head.create_estimator_spec( features={'x': np.array(((42.,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) # Assert spec contains expected tensors. keys = prediction_keys.PredictionKeys self.assertItemsEqual((keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype) self.assertEqual(tf.dtypes.float32, spec.predictions[keys.LOGITS].dtype) default_serving_key = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY self.assertItemsEqual((default_serving_key, 'predict', 'regression'), spec.export_outputs.keys()) # Assert predictions. with self.cached_session(): _initialize_variables(self, spec.scaffold) self.assertAllClose(expected_predictions, spec.predictions[keys.PREDICTIONS].eval()) self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval()) self.assertAllClose(expected_predictions, spec.export_outputs[default_serving_key].value.eval()) self.assertAllClose(expected_predictions, spec.export_outputs['regression'].value.eval()) self.assertAllClose( expected_predictions, spec.export_outputs['predict'].outputs['predictions'].eval()) self.assertAllClose( logits, spec.export_outputs['predict'].outputs['logits'].eval()) def test_eval_create_loss(self): head = head_lib._regression_head() logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) # loss = [(43-45)^2, (44-41)] = [4, 9] self.assertAllClose(13., training_loss.eval()) def test_eval_create_loss_loss_fn(self): """Tests head.create_loss for eval mode and custom loss_fn.""" loss = np.array([[0., 1.], [2., 3.]], dtype=np.float32) logits_input = np.array([[-1., 1.], [-2., 2.]], dtype=np.float32) labels_input = np.array([[1., 0.], [2., -1.]], dtype=np.float32) def _loss_fn(labels, logits): check_labels = tf.debugging.Assert( tf.reduce_all(tf.math.equal(labels, labels_input)), data=[labels]) check_logits = tf.debugging.Assert( tf.reduce_all(tf.math.equal(logits, logits_input)), data=[logits]) with tf.control_dependencies([check_labels, check_logits]): return tf.constant(loss) head = head_lib._regression_head(label_dimension=2, loss_fn=_loss_fn) actual_training_loss = head.create_loss( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=logits_input, labels=labels_input)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(np.sum(loss), actual_training_loss.eval()) def test_eval_create_loss_loss_fn_wrong_shape(self): """Tests custom loss_fn that returns Tensor of unexpected shape.""" loss = np.array([[1.], [2.]], dtype=np.float32) def _loss_fn(labels, logits): del labels, logits # Unused return tf.constant(loss) head = head_lib._regression_head(label_dimension=2, loss_fn=_loss_fn) logits = np.array([[-1., 1.], [-2., 2.]], dtype=np.float32) labels = np.array([[1., 0.], [2., -1.]], dtype=np.float32) actual_training_loss = head.create_loss( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 2\]\. \] ' r'\[logits_shape: \] \[2 2\] \[loss_shape: \] \[2 1\]'): actual_training_loss.eval() def test_eval_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib._regression_head() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=np.array(((45,), (41,),), dtype=np.float32), labels=None) def test_eval(self): head = head_lib._regression_head() self.assertEqual(1, head.logits_dimension) logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) # Assert spec contains expected tensors. prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[ metric_keys.MetricKeys.LOSS_MEAN] predictions, loss, loss_mean = sess.run( (spec.predictions[prediction_key], spec.loss, loss_mean_update_op)) self.assertAllClose(logits, predictions) # loss = (43-45)^2 + (44-41)^2 = 4+9 = 13 self.assertAllClose(13., loss) # loss_mean = loss/2 = 13/2 = 6.5 expected_loss_mean = 6.5 # Check results of both update (in `loss_mean`) and value ops. self.assertAllClose(expected_loss_mean, loss_mean) self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval()) def test_eval_metric_ops_with_head_name_for_regression(self): head = head_lib._regression_head(name='some_regression_head') logits = np.array(((1,), (9,)), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) expected_metric_keys = [ '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS_MEAN), '{}/some_regression_head'.format( metric_keys.MetricKeys.PREDICTION_MEAN), '{}/some_regression_head'.format(metric_keys.MetricKeys.LABEL_MEAN), ] self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys()) def test_eval_with_regularization_losses(self): head = head_lib._regression_head( loss_reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE) self.assertEqual(1, head.logits_dimension) logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = ((43-45)^2 + (44-41)^2) / batch_size # = (4 + 9) / 2 = 6.5 expected_unregularized_loss = 6.5 expected_regularized_loss = ( expected_unregularized_loss + expected_regularization_loss) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_unregularized_loss, keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.PREDICTION_MEAN: (45 + 41) / 2.0, keys.LABEL_MEAN: (43 + 44) / 2.0, } # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} prediction_key = prediction_keys.PredictionKeys.PREDICTIONS predictions, loss, metrics = sess.run( (spec.predictions[prediction_key], spec.loss, update_ops)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_regularized_loss, loss) # Check results of both update (in `metrics`) and value ops. self.assertAllClose(expected_metrics, metrics) self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_train_create_loss(self): head = head_lib._regression_head() logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} # unreduced_loss = [(43-45)^2, (44-41)] = [4, 9] expected_unreduced_loss = [[4.], [9.]] # weights default to 1. expected_weights = 1 # training_loss = 1 * 4 + 1 * 9 = 13 expected_training_loss = 13. # Create loss. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval()) self.assertAllClose(expected_weights, actual_weights) def test_train_create_loss_loss_reduction(self): """Tests create_loss with loss_reduction.""" head = head_lib._regression_head( loss_reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS) logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} # unreduced_loss = [(43-45)^2, (44-41)] = [4, 9] expected_unreduced_loss = [[4.], [9.]] # weights default to 1. expected_weights = 1 # training_loss = (1 * 4 + 1 * 9) / num_nonzero_weights expected_training_loss = 13. / 2. # Create loss. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval()) self.assertAllClose(expected_weights, actual_weights) def test_train_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib._regression_head() def _no_op_train_fn(loss): del loss return tf.no_op() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (41,),), dtype=np.float32), labels=None, train_op_fn=_no_op_train_fn) def test_train(self): head = head_lib._regression_head() self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43.,), (44.,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42.,),), dtype=np.float32)} # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13 expected_loss = 13 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert spec contains expected tensors. prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, # loss_mean = loss/2 = 13/2 = 6.5 metric_keys.MetricKeys.LOSS_MEAN: 6.5, }, summary_str) def test_train_with_optimizer(self): head = head_lib._regression_head() self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43.,), (44.,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42.,),), dtype=np.float32)} # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13 expected_loss = 13 class _Optimizer(object): def minimize(self, loss, global_step): del global_step with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=_Optimizer()) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) def test_train_with_update_ops(self): head = head_lib._regression_head() with tf.Graph().as_default(): w = tf.Variable(1) update_op = w.assign_add(1) tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.UPDATE_OPS, update_op) t = tf.Variable('') expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return t.assign(expected_train_result) spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (41,),), dtype=np.float32), labels=np.array(((43.,), (44.,),), dtype=np.float64), train_op_fn=_train_op_fn) with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) sess.run(spec.train_op) w_value, t_value = sess.run([w, t]) self.assertEqual(2, w_value) self.assertEqual(expected_train_result, t_value) def test_train_summaries_with_head_name(self): head = head_lib._regression_head(name='some_regression_head') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43.,), (44.,),), dtype=np.float64) features = {'x': np.array(((42.,),), dtype=np.float32)} # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13 expected_loss = 13 def _train_op_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) summary_str = sess.run(spec.scaffold.summary_op) _assert_simple_summaries( self, { '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS): expected_loss, # loss_mean = loss/2 = 13/2 = 6.5 '{}/some_regression_head' .format(metric_keys.MetricKeys.LOSS_MEAN): 6.5, }, summary_str) def test_train_with_regularization_losses(self): head = head_lib._regression_head( loss_reduction=tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE) self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43.,), (44.,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42.,),), dtype=np.float32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = ((43-45)^2 + (44-41)^2) / batch_size # = (4 + 9) / 2 = 6.5 # loss = unregularized_loss + regularization_loss = 8.5 expected_loss = 8.5 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) prediction_key = prediction_keys.PredictionKeys.PREDICTIONS predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_REGULARIZATION: expected_regularization_loss, }, summary_str) def test_weighted_multi_example_eval(self): """1d label, 3 examples, 1 batch.""" head = head_lib._regression_head(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,), (44,)), dtype=np.int32) spec = head.create_estimator_spec( features={ 'x': np.array(((42,), (43,), (44,)), dtype=np.int32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32), }, mode=ModeKeys.EVAL, logits=logits, labels=np.array(((35,), (42,), (45,)), dtype=np.int32)) # Assert spec contains expected tensors. prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[ metric_keys.MetricKeys.LOSS_MEAN] predictions, loss, loss_mean = sess.run( (spec.predictions[prediction_key], spec.loss, loss_mean_update_op)) self.assertAllClose(logits, predictions) # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6 self.assertAllClose(101.6, loss) # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231 expected_loss_mean = 39.0769231 # Check results of both update (in `loss_mean`) and value ops. self.assertAllClose(expected_loss_mean, loss_mean) self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval()) def test_weight_with_numeric_column(self): """1d label, 3 examples, 1 batch.""" head = head_lib._regression_head( weight_column=tf.feature_column.numeric_column( 'label_weights', normalizer_fn=lambda x: x + 1.)) # Create estimator spec. logits = np.array(((45,), (41,), (44,)), dtype=np.int32) spec = head.create_estimator_spec( features={ 'x': np.array(((42,), (43,), (44,)), dtype=np.int32), 'label_weights': np.array(((0.,), (-0.9,), (0.5,)), dtype=np.float32), }, mode=ModeKeys.EVAL, logits=logits, labels=np.array(((35,), (42,), (45,)), dtype=np.int32)) # Assert loss. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) loss = sess.run(spec.loss) # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6 self.assertAllClose(101.6, loss) def test_weighted_multi_example_train(self): """1d label, 3 examples, 1 batch.""" head = head_lib._regression_head(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,), (44,)), dtype=np.float32) expected_train_result = b'my_train_op' # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6 expected_loss = 101.6 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features={ 'x': np.array(((42,), (43,), (44,)), dtype=np.float32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float64), }, mode=ModeKeys.TRAIN, logits=logits, labels=np.array(((35.,), (42.,), (45.,)), dtype=np.float32), train_op_fn=_train_op_fn) # Assert spec contains expected tensors. prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231 metric_keys.MetricKeys.LOSS_MEAN: 39.0769231, }, summary_str) def test_train_one_dim_create_loss(self): """Tests create_loss with 1D labels and weights (shape [batch_size]).""" head = head_lib._regression_head(weight_column='label_weights') logits = np.array(((45,), (41,), (44,)), dtype=np.float32) x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32) weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64) labels_rank_1 = np.array((35., 42., 45.,)) # unreduced_loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1]. expected_unreduced_loss = [[100.], [1.], [1.]] # weights are reshaped to [3, 1] to match logits. expected_weights = [[1.], [.1], [1.5]] # training_loss = 100 * 1 + 1 * .1 + 1.5 * 1 = 101.6 expected_training_loss = 101.6 features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1} # Create loss. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval()) self.assertAllClose(expected_weights, actual_weights.eval()) def test_train_one_dim(self): """Tests train with 1D labels and weights (shape [batch_size]).""" head = head_lib._regression_head(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,), (44,)), dtype=np.float32) expected_train_result = b'my_train_op' # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6 expected_loss = 101.6 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32) weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64) labels_rank_1 = np.array((35., 42., 45.,)) features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1} self.assertEqual((3,), x_feature_rank_1.shape) self.assertEqual((3,), weight_rank_1.shape) self.assertEqual((3,), labels_rank_1.shape) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1, train_op_fn=_train_op_fn) # Assert spec contains expected tensors. prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231 metric_keys.MetricKeys.LOSS_MEAN: 39.0769231, }, summary_str) def test_weighted_multi_value_eval_create_loss(self): """3d label, 1 example, 1 batch.""" head = head_lib._regression_head( weight_column='label_weights', label_dimension=3) logits = np.array(((45., 41., 44.),)) labels = np.array(((35., 42., 45.),)) features = { 'x': np.array(((42., 43., 44.),)), 'label_weights': np.array(((1., .1, 1.5),)) } # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1]. # weighted sum loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6 self.assertAllClose(101.6, training_loss.eval()) def test_weighted_multi_value_eval(self): """3d label, 1 example, 1 batch.""" head = head_lib._regression_head( weight_column='label_weights', label_dimension=3) self.assertEqual(3, head.logits_dimension) logits = np.array(((45., 41., 44.),)) labels = np.array(((35., 42., 45.),)) features = { 'x': np.array(((42., 43., 44.),)), 'label_weights': np.array(((1., .1, 1.5),)) } # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) # Assert spec contains expected tensors. prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[ metric_keys.MetricKeys.LOSS_MEAN] predictions, loss, loss_mean = sess.run( (spec.predictions[prediction_key], spec.loss, loss_mean_update_op)) self.assertAllClose(logits, predictions) # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6 self.assertAllClose(101.6, loss) # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923 expected_loss_mean = 39.076923 # Check results of both update (in `loss_mean`) and value ops. self.assertAllClose(expected_loss_mean, loss_mean) self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval()) def test_weighted_multi_value_train_create_loss(self): """3d label, 1 example, 1 batch.""" head = head_lib._regression_head( weight_column='label_weights', label_dimension=3) logits = np.array(((45., 41., 44.),)) labels = np.array(((35., 42., 45.),)) features = { 'x': np.array(((42., 43., 44.),)), 'label_weights': np.array(((1., .1, 1.5),)) } # Create loss. training_loss = head.create_loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels)[0] with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1]. # weighted sum loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6 self.assertAllClose(101.6, training_loss.eval()) def test_weighted_multi_value_train(self): """3d label, 1 example, 1 batch.""" head = head_lib._regression_head( weight_column='label_weights', label_dimension=3) self.assertEqual(3, head.logits_dimension) logits = np.array(((45., 41., 44.),)) labels = np.array(((35., 42., 45.),)) expected_train_result = b'my_train_op' # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6 expected_loss = 101.6 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) features = { 'x': np.array(((42., 43., 44.),)), 'label_weights': np.array(((1., .1, 1.5),)), } # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) # Assert spec contains expected tensors. prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), spec.predictions.keys()) self.assertEqual(tf.dtypes.float32, spec.predictions[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) _assert_no_hooks(self, spec) # Evaluate predictions, loss, train_op, and summaries. with self.cached_session() as sess: _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) _assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923 metric_keys.MetricKeys.LOSS_MEAN: 39.076923, }, summary_str) def test_weighted_multi_batch_eval(self): """1d label, 1 example, 3 batches.""" head = head_lib._regression_head(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45.,), (41.,), (44.,))) input_fn = numpy_io.numpy_input_fn( x={ 'x': np.array(((42.,), (43.,), (44.,))), 'label_weights': np.array(((1.,), (.1,), (1.5,))), # 'logits' is not a feature, but we use `numpy_input_fn` to make a # batched version of it, and pop it off before passing to # `create_estimator_spec`. 'logits': logits, }, y=np.array(((35.,), (42.,), (45.,))), batch_size=1, num_epochs=1, shuffle=False) batched_features, batched_labels = input_fn() batched_logits = batched_features.pop('logits') spec = head.create_estimator_spec( features=batched_features, mode=ModeKeys.EVAL, logits=batched_logits, labels=batched_labels, train_op_fn=None) # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5] # loss = sum(losses) = 100+.1+1.5 = 101.6 # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923 expected_metrics = { metric_keys.MetricKeys.LOSS_MEAN: 39.076923, metric_keys.MetricKeys.PREDICTION_MEAN: (45 + 41 * 0.1 + 44 * 1.5) / 2.6, metric_keys.MetricKeys.LABEL_MEAN: (35 + 42 * 0.1 + 45 * 1.5) / 2.6, } # Assert spec contains expected tensors. self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) _assert_no_hooks(self, spec) with self.cached_session() as sess: # Finalize graph and initialize variables. _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) tf.compat.v1.train.queue_runner.start_queue_runners() # Run tensors for `steps` steps. steps = len(logits) results = tuple([ sess.run(( spec.loss, # The `[1]` gives us the metric update op. {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops})) for _ in range(steps) ]) # Assert losses and metrics. self.assertAllClose((100, .1, 1.5), [r[0] for r in results]) # For metrics, check results of both update (in `results`) and value ops. # Note: we only check the result of the last step for streaming metrics. self.assertAllClose(expected_metrics, results[steps - 1][1]) self.assertAllClose( expected_metrics, {k: spec.eval_metric_ops[k][0].eval() for k in spec.eval_metric_ops}) def test_weighted_multi_batch_train(self): """1d label, 1 example, 3 batches.""" head = head_lib._regression_head(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45.,), (41.,), (44.,))) input_fn = numpy_io.numpy_input_fn( x={ 'x': np.array(((42.,), (43.,), (44.,))), 'label_weights': np.array(((1.,), (.1,), (1.5,))), # 'logits' is not a feature, but we use `numpy_input_fn` to make a # batched version of it, and pop it off before passing to # `create_estimator_spec`. 'logits': logits, }, y=np.array(((35.,), (42.,), (45.,))), batch_size=1, num_epochs=1, shuffle=False) batched_features, batched_labels = input_fn() batched_logits = batched_features.pop('logits') spec = head.create_estimator_spec( features=batched_features, mode=ModeKeys.TRAIN, logits=batched_logits, labels=batched_labels, train_op_fn=lambda loss: loss * -7.) # Assert spec contains expected tensors. self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertIsNotNone(spec.train_op) with self.cached_session() as sess: # Finalize graph and initialize variables. _initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) tf.compat.v1.train.queue_runner.start_queue_runners() results = tuple( [sess.run((spec.loss, spec.train_op)) for _ in range(len(logits))]) # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5] expected_losses = np.array((100, .1, 1.5)) self.assertAllClose(expected_losses, [r[0] for r in results]) self.assertAllClose(expected_losses * -7., [r[1] for r in results]) def test_multi_dim_weighted_train_create_loss(self): """Logits, labels of shape [2, 2, 3], weight shape [2, 2].""" label_dimension = 3 head = head_lib._regression_head( weight_column='label_weights', label_dimension=label_dimension) logits = np.array([[[00., 01., 02.], [10., 11., 12.]], [[20., 21., 22.], [30., 31., 32.]]]) labels = np.array([[[01., 02., 03.], [12., 13., 14.]], [[23., 24., 25.], [34., 35., 36.]]]) weights = np.array([[1., 1.5], [2., 2.5]]) expected_unreduced_loss = [[[1., 1., 1.], [4., 4., 4.]], [[9., 9., 9.], [16., 16., 16.]]] expected_training_loss = np.sum( np.array([[[1. * x for x in [1., 1., 1.]], [1.5 * x for x in [4., 4., 4.]]], [[2. * x for x in [9., 9., 9.]], [2.5 * x for x in [16., 16., 16.]]]])) # Weights are expanded to [2, 2, 1] to match logits. expected_weights = [[[1.], [1.5]], [[2.], [2.5]]] # Create loss. training_loss, unreduced_loss, actual_weights, _ = head.create_loss( features={'label_weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval()) self.assertAllClose(expected_weights, actual_weights.eval()) def test_multi_dim_weighted_train(self): """Logits, labels of shape [2, 2, 3], weight shape [2, 2].""" head = head_lib._regression_head( weight_column='label_weights', label_dimension=3) logits = np.array([[[00., 01., 02.], [10., 11., 12.]], [[20., 21., 22.], [30., 31., 32.]]]) labels = np.array([[[01., 02., 03.], [12., 13., 14.]], [[23., 24., 25.], [34., 35., 36.]]]) expected_train_result = b'my_train_op' features = { 'label_weights': np.array([[1., 1.5], [2., 2.5]]), } # loss = 1*3*1^2 + 1.5*3*2^2 + 2*3*3^2 +2.5*3*4^2 = 195 expected_loss = 195. # Create estimator spec. def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_loss, spec.loss.eval()) def test_multi_dim_train_weights_wrong_inner_dim(self): """Logits, labels of shape [2, 2, 3], weight shape [2, 1].""" head = head_lib._regression_head( weight_column='label_weights', label_dimension=3) logits = np.array([[[00., 01., 02.], [10., 11., 12.]], [[20., 21., 22.], [30., 31., 32.]]]) labels = np.array([[[01., 02., 03.], [12., 13., 14.]], [[23., 24., 25.], [34., 35., 36.]]]) features = { 'label_weights': np.array([[1.], [2]]), } def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'): spec.loss.eval() def test_multi_dim_train_weights_wrong_outer_dim(self): """Logits, labels of shape [2, 2, 3], weight shape [2, 2, 2].""" head = head_lib._regression_head( weight_column='label_weights', label_dimension=3) logits = np.array([[[00., 01., 02.], [10., 11., 12.]], [[20., 21., 22.], [30., 31., 32.]]]) labels = np.array([[[01., 02., 03.], [12., 13., 14.]], [[23., 24., 25.], [34., 35., 36.]]]) weights_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) features = { 'label_weights': weights_placeholder, } def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn) with self.cached_session(): _initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 2\]'): spec.loss.eval({ weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]], [[2., 2.1], [2.5, 2.6]]]) }) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/kmeans.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """A canned Estimator for k-means clustering.""" # TODO(ccolby): Move clustering_ops.py into this file and streamline the code. from __future__ import absolute_import from __future__ import division from __future__ import print_function import time import numpy as np import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import clustering_ops from tensorflow.python.ops import control_flow_ops from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_output class _LossRelativeChangeHook(tf.compat.v1.train.SessionRunHook): """Stops when the change in loss goes below a tolerance.""" def __init__(self, loss_tensor, tolerance): """Creates a _LossRelativeChangeHook. Args: loss_tensor: A scalar tensor of the loss value. tolerance: A relative tolerance of loss change between iterations. """ self._loss_tensor = loss_tensor self._tolerance = tolerance self._prev_loss = None def before_run(self, run_context): del run_context # unused return tf.compat.v1.train.SessionRunArgs(self._loss_tensor) def after_run(self, run_context, run_values): loss = run_values.results assert loss is not None if self._prev_loss: relative_change = ( abs(loss - self._prev_loss) / (1 + abs(self._prev_loss))) if relative_change < self._tolerance: run_context.request_stop() self._prev_loss = loss class _InitializeClustersHook(tf.compat.v1.train.SessionRunHook): """Initializes the cluster centers. The chief repeatedly invokes an initialization op until all cluster centers are initialized. The workers wait for the initialization phase to complete. """ def __init__(self, init_op, is_initialized_var, is_chief): """Creates an _InitializeClustersHook. Args: init_op: An op that, when run, will choose some initial cluster centers. This op may need to be run multiple times to choose all the centers. is_initialized_var: A boolean variable reporting whether all initial centers have been chosen. is_chief: A boolean specifying whether this task is the chief. """ self._init_op = init_op self._is_initialized_var = is_initialized_var self._is_chief = is_chief def after_create_session(self, session, coord): del coord # unused assert self._init_op.graph is tf.compat.v1.get_default_graph() assert self._is_initialized_var.graph is self._init_op.graph while True: try: if session.run(self._is_initialized_var): break elif self._is_chief: session.run(self._init_op) else: time.sleep(1) except RuntimeError as e: tf.compat.v1.logging.info(e) def _parse_features_if_necessary(features, feature_columns): """Helper function to convert the input points into a usable format. Args: features: The input features. feature_columns: An optionable iterable containing all the feature columns used by the model. All items in the set should be feature column instances that can be passed to `tf.feature_column.input_layer`. If this is None, all features will be used. Returns: If `features` is a dict of `k` features (optionally filtered by `feature_columns`), each of which is a vector of `n` scalars, the return value is a Tensor of shape `(n, k)` representing `n` input points, where the items in the `k` dimension are sorted lexicographically by `features` key. If `features` is not a dict, it is returned unmodified. """ if not isinstance(features, dict): return features if feature_columns: return tf.compat.v1.feature_column.input_layer(features, feature_columns) keys = sorted(features.keys()) with ops.colocate_with(features[keys[0]]): return tf.concat([features[k] for k in keys], axis=1) class _ModelFn(object): """Model function for the estimator.""" def __init__(self, num_clusters, initial_clusters, distance_metric, seed, use_mini_batch, mini_batch_steps_per_iteration, kmeans_plus_plus_num_retries, relative_tolerance, feature_columns): self._num_clusters = num_clusters self._initial_clusters = initial_clusters self._distance_metric = distance_metric self._seed = seed self._use_mini_batch = use_mini_batch self._mini_batch_steps_per_iteration = mini_batch_steps_per_iteration self._kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries self._relative_tolerance = relative_tolerance self._feature_columns = feature_columns def model_fn(self, features, mode, config): """Model function for the estimator. Note that this does not take a `labels` arg. This works, but `input_fn` must return either `features` or, equivalently, `(features, None)`. Args: features: The input points. See `tf.estimator.Estimator`. mode: See `tf.estimator.Estimator`. config: See `tf.estimator.Estimator`. Returns: A `tf.estimator.EstimatorSpec` (see `tf.estimator.Estimator`) specifying this behavior: * `train_op`: Execute one mini-batch or full-batch run of Lloyd's algorithm. * `loss`: The sum of the squared distances from each input point to its closest center. * `eval_metric_ops`: Maps `SCORE` to `loss`. * `predictions`: Maps `ALL_DISTANCES` to the distance from each input point to each cluster center; maps `CLUSTER_INDEX` to the index of the closest cluster center for each input point. """ # input_points is a single Tensor. Therefore, the sharding functionality # in clustering_ops is unused, and some of the values below are lists of a # single item. input_points = _parse_features_if_necessary(features, self._feature_columns) # Let N = the number of input_points. # all_distances: A list of one matrix of shape (N, num_clusters). Each value # is the distance from an input point to a cluster center. # model_predictions: A list of one vector of shape (N). Each value is the # cluster id of an input point. # losses: Similar to cluster_idx but provides the distance to the cluster # center. # is_initialized: scalar indicating whether the initial cluster centers # have been chosen; see init_op. # init_op: an op to choose the initial cluster centers. A single worker # repeatedly executes init_op until is_initialized becomes True. # training_op: an op that runs an iteration of training, either an entire # Lloyd iteration or a mini-batch of a Lloyd iteration. Multiple workers # may execute this op, but only after is_initialized becomes True. (all_distances, model_predictions, losses, is_initialized, init_op, training_op) = clustering_ops.KMeans( inputs=input_points, num_clusters=self._num_clusters, initial_clusters=self._initial_clusters, distance_metric=self._distance_metric, use_mini_batch=self._use_mini_batch, mini_batch_steps_per_iteration=self._mini_batch_steps_per_iteration, random_seed=self._seed, kmeans_plus_plus_num_retries=self._kmeans_plus_plus_num_retries ).training_graph() loss = tf.math.reduce_sum(losses) tf.compat.v1.summary.scalar('loss/raw', loss) incr_step = tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1) training_op = control_flow_ops.with_dependencies([training_op, incr_step], loss) training_hooks = [ _InitializeClustersHook(init_op, is_initialized, config.is_chief) ] if self._relative_tolerance is not None: training_hooks.append( _LossRelativeChangeHook(loss, self._relative_tolerance)) export_outputs = { KMeansClustering.ALL_DISTANCES: export_output.PredictOutput(all_distances[0]), KMeansClustering.CLUSTER_INDEX: export_output.PredictOutput(model_predictions[0]), tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: export_output.PredictOutput(model_predictions[0]) } return model_fn_lib.EstimatorSpec( mode=mode, predictions={ KMeansClustering.ALL_DISTANCES: all_distances[0], KMeansClustering.CLUSTER_INDEX: model_predictions[0], }, loss=loss, train_op=training_op, eval_metric_ops={ KMeansClustering.SCORE: tf.compat.v1.metrics.mean(loss) }, training_hooks=training_hooks, export_outputs=export_outputs) # TODO(agarwal,ands): support sharded input. @estimator_export(v1=['estimator.experimental.KMeans']) class KMeansClustering(estimator.Estimator): """An Estimator for K-Means clustering. Example: ``` import numpy as np import tensorflow as tf num_points = 100 dimensions = 2 points = np.random.uniform(0, 1000, [num_points, dimensions]) def input_fn(): return tf.compat.v1.train.limit_epochs( tf.convert_to_tensor(points, dtype=tf.float32), num_epochs=1) num_clusters = 5 kmeans = tf.compat.v1.estimator.experimental.KMeans( num_clusters=num_clusters, use_mini_batch=False) # train num_iterations = 10 previous_centers = None for _ in xrange(num_iterations): kmeans.train(input_fn) cluster_centers = kmeans.cluster_centers() if previous_centers is not None: print 'delta:', cluster_centers - previous_centers previous_centers = cluster_centers print 'score:', kmeans.score(input_fn) print 'cluster centers:', cluster_centers # map the input points to their clusters cluster_indices = list(kmeans.predict_cluster_index(input_fn)) for i, point in enumerate(points): cluster_index = cluster_indices[i] center = cluster_centers[cluster_index] print 'point:', point, 'is in cluster', cluster_index, 'centered at', center ``` The `SavedModel` saved by the `export_saved_model` method does not include the cluster centers. However, the cluster centers may be retrieved by the latest checkpoint saved during training. Specifically, ``` kmeans.cluster_centers() ``` is equivalent to ``` tf.train.load_variable( kmeans.model_dir, KMeansClustering.CLUSTER_CENTERS_VAR_NAME) ``` """ # Valid values for the distance_metric constructor argument. SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE COSINE_DISTANCE = clustering_ops.COSINE_DISTANCE # Values for initial_clusters constructor argument. RANDOM_INIT = clustering_ops.RANDOM_INIT KMEANS_PLUS_PLUS_INIT = clustering_ops.KMEANS_PLUS_PLUS_INIT # Metric returned by evaluate(): The sum of the squared distances from each # input point to its closest center. SCORE = 'score' # Keys returned by predict(). # ALL_DISTANCES: The distance from each input point to each cluster center. # CLUSTER_INDEX: The index of the closest cluster center for each input point. CLUSTER_INDEX = 'cluster_index' ALL_DISTANCES = 'all_distances' # Variable name used by cluster_centers(). CLUSTER_CENTERS_VAR_NAME = clustering_ops.CLUSTERS_VAR_NAME def __init__(self, num_clusters, model_dir=None, initial_clusters=RANDOM_INIT, distance_metric=SQUARED_EUCLIDEAN_DISTANCE, seed=None, use_mini_batch=True, mini_batch_steps_per_iteration=1, kmeans_plus_plus_num_retries=2, relative_tolerance=None, config=None, feature_columns=None): r"""Creates an Estimator for running KMeans training and inference. This Estimator implements the following variants of the K-means algorithm: If `use_mini_batch` is False, it runs standard full batch K-means. Each training step runs a single iteration of K-Means and must process the full input at once. To run in this mode, the `input_fn` passed to `train` must return the entire input dataset. If `use_mini_batch` is True, it runs a generalization of the mini-batch K-means algorithm. It runs multiple iterations, where each iteration is composed of `mini_batch_steps_per_iteration` steps. Each training step accumulates the contribution from one mini-batch into temporary storage. Every `mini_batch_steps_per_iteration` steps, the cluster centers are updated and the temporary storage cleared for the next iteration. For example: the entire dataset contains 64k examples, where the batch size is 64. User can choose mini_batch_steps_per_iteration = 100 to run 10% of the entire data every iteration in order to update the cluster centers. Note that: * If `mini_batch_steps_per_iteration=1`, the algorithm reduces to the standard K-means mini-batch algorithm. * If `mini_batch_steps_per_iteration = num_inputs / batch_size`, the algorithm becomes an asynchronous version of the full-batch algorithm. However, there is no guarantee by this implementation that each input is seen exactly once per iteration. Also, different updates are applied asynchronously without locking. So this asynchronous version may not behave exactly like a full-batch version. Args: num_clusters: An integer tensor specifying the number of clusters. This argument is ignored if `initial_clusters` is a tensor or numpy array. model_dir: The directory to save the model results and log files. initial_clusters: Specifies how the initial cluster centers are chosen. One of the following: * a tensor or numpy array with the initial cluster centers. * a callable `f(inputs, k)` that selects and returns up to `k` centers from an input batch. `f` is free to return any number of centers from `0` to `k`. It will be invoked on successive input batches as necessary until all `num_clusters` centers are chosen. * `KMeansClustering.RANDOM_INIT`: Choose centers randomly from an input batch. If the batch size is less than `num_clusters` then the entire batch is chosen to be initial cluster centers and the remaining centers are chosen from successive input batches. * `KMeansClustering.KMEANS_PLUS_PLUS_INIT`: Use kmeans++ to choose centers from the first input batch. If the batch size is less than `num_clusters`, a TensorFlow runtime error occurs. distance_metric: The distance metric used for clustering. One of: * `KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE`: Euclidean distance between vectors `u` and `v` is defined as \\(||u - v||_2\\) which is the square root of the sum of the absolute squares of the elements' difference. * `KMeansClustering.COSINE_DISTANCE`: Cosine distance between vectors `u` and `v` is defined as \\(1 - (u . v) / (||u||_2 ||v||_2)\\). seed: Python integer. Seed for PRNG used to initialize centers. use_mini_batch: A boolean specifying whether to use the mini-batch k-means algorithm. See explanation above. mini_batch_steps_per_iteration: The number of steps after which the updated cluster centers are synced back to a master copy. Used only if `use_mini_batch=True`. See explanation above. kmeans_plus_plus_num_retries: For each point that is sampled during kmeans++ initialization, this parameter specifies the number of additional points to draw from the current distribution before selecting the best. If a negative value is specified, a heuristic is used to sample `O(log(num_to_sample))` additional points. Used only if `initial_clusters=KMeansClustering.KMEANS_PLUS_PLUS_INIT`. relative_tolerance: A relative tolerance of change in the loss between iterations. Stops learning if the loss changes less than this amount. This may not work correctly if `use_mini_batch=True`. config: See `tf.estimator.Estimator`. feature_columns: An optionable iterable containing all the feature columns used by the model. All items in the set should be feature column instances that can be passed to `tf.feature_column.input_layer`. If this is None, all features will be used. Raises: ValueError: An invalid argument was passed to `initial_clusters` or `distance_metric`. """ if isinstance(initial_clusters, str) and initial_clusters not in [ KMeansClustering.RANDOM_INIT, KMeansClustering.KMEANS_PLUS_PLUS_INIT ]: raise ValueError("Unsupported initialization algorithm '%s'" % initial_clusters) if distance_metric not in [ KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, KMeansClustering.COSINE_DISTANCE ]: raise ValueError("Unsupported distance metric '%s'" % distance_metric) self._distance_metric = distance_metric super(KMeansClustering, self).__init__( model_fn=_ModelFn(num_clusters, initial_clusters, distance_metric, seed, use_mini_batch, mini_batch_steps_per_iteration, kmeans_plus_plus_num_retries, relative_tolerance, feature_columns).model_fn, model_dir=model_dir, config=config) def _predict_one_key(self, input_fn, predict_key): for result in self.predict(input_fn=input_fn, predict_keys=[predict_key]): yield result[predict_key] def predict_cluster_index(self, input_fn): """Finds the index of the closest cluster center to each input point. Args: input_fn: Input points. See `tf.estimator.Estimator.predict`. Yields: The index of the closest cluster center for each input point. """ for index in self._predict_one_key(input_fn, KMeansClustering.CLUSTER_INDEX): yield index def score(self, input_fn): """Returns the sum of squared distances to nearest clusters. Note that this function is different from the corresponding one in sklearn which returns the negative sum. Args: input_fn: Input points. See `tf.estimator.Estimator.evaluate`. Only one batch is retrieved. Returns: The sum of the squared distance from each point in the first batch of inputs to its nearest cluster center. """ return self.evaluate(input_fn=input_fn, steps=1)[KMeansClustering.SCORE] def transform(self, input_fn): """Transforms each input point to its distances to all cluster centers. Note that if `distance_metric=KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE`, this function returns the squared Euclidean distance while the corresponding sklearn function returns the Euclidean distance. Args: input_fn: Input points. See `tf.estimator.Estimator.predict`. Yields: The distances from each input point to each cluster center. """ for distances in self._predict_one_key(input_fn, KMeansClustering.ALL_DISTANCES): if self._distance_metric == KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE: yield np.sqrt(distances) else: yield distances def cluster_centers(self): """Returns the cluster centers.""" return self.get_variable_value(KMeansClustering.CLUSTER_CENTERS_VAR_NAME) ================================================ FILE: tensorflow_estimator/python/estimator/canned/kmeans_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for KMeans.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import time import numpy as np from sklearn.cluster import KMeans as SklearnKMeans import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow.python.platform import benchmark from tensorflow.python.platform import flags from tensorflow_estimator.python.estimator import run_config from tensorflow_estimator.python.estimator.canned import kmeans as kmeans_lib FLAGS = flags.FLAGS def normalize(x): return x / np.sqrt(np.sum(x * x, axis=-1, keepdims=True)) def cosine_similarity(x, y): return np.dot(normalize(x), np.transpose(normalize(y))) def make_random_centers(num_centers, num_dims, center_norm=500): return np.round( np.random.rand(num_centers, num_dims).astype(np.float32) * center_norm) def make_random_points(centers, num_points, max_offset=20): num_centers, num_dims = centers.shape assignments = np.random.choice(num_centers, num_points) offsets = np.round( np.random.randn(num_points, num_dims).astype(np.float32) * max_offset) return (centers[assignments] + offsets, assignments, np.add.reduce(offsets * offsets, 1)) class KMeansTestBase(tf.test.TestCase): def input_fn(self, batch_size=None, points=None, randomize=None, num_epochs=None): """Returns an input_fn that randomly selects batches from given points.""" batch_size = batch_size or self.batch_size points = points if points is not None else self.points num_points = points.shape[0] if randomize is None: randomize = ( self.use_mini_batch and self.mini_batch_steps_per_iteration <= 1) def _fn(): x = tf.constant(points) if batch_size == num_points: return tf.compat.v1.train.limit_epochs(x, num_epochs=num_epochs), None if randomize: indices = tf.random.uniform( tf.constant([batch_size]), minval=0, maxval=num_points - 1, dtype=tf.dtypes.int32, seed=10) else: # We need to cycle through the indices sequentially. We create a queue # to maintain the list of indices. q = tf.queue.FIFOQueue(num_points, tf.dtypes.int32, ()) # Conditionally initialize the Queue. def _init_q(): with tf.control_dependencies([q.enqueue_many(tf.range(num_points))]): return tf.no_op() init_q = tf.compat.v1.cond(q.size() <= 0, _init_q, tf.no_op) with tf.control_dependencies([init_q]): offsets = q.dequeue_many(batch_size) with tf.control_dependencies([q.enqueue_many(offsets)]): indices = tf.identity(offsets) batch = tf.compat.v1.gather(x, indices) return (tf.compat.v1.train.limit_epochs(batch, num_epochs=num_epochs), None) return _fn @staticmethod def config(tf_random_seed): return run_config.RunConfig().replace(tf_random_seed=tf_random_seed) @property def initial_clusters(self): return kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT @property def batch_size(self): return self.num_points @property def use_mini_batch(self): return False @property def mini_batch_steps_per_iteration(self): return 1 @test_util.run_all_in_graph_and_eager_modes class KMeansTest(KMeansTestBase): def setUp(self): np.random.seed(3) self.num_centers = 5 self.num_dims = 2 self.num_points = 1000 self.true_centers = make_random_centers(self.num_centers, self.num_dims) self.points, _, self.scores = make_random_points(self.true_centers, self.num_points) self.true_score = np.add.reduce(self.scores) def _kmeans(self, relative_tolerance=None): return kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=self.initial_clusters, distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=self.use_mini_batch, mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, seed=24, relative_tolerance=relative_tolerance) def test_clusters(self): kmeans = self._kmeans() kmeans.train(input_fn=self.input_fn(), steps=1) clusters = kmeans.cluster_centers() self.assertAllEqual(list(clusters.shape), [self.num_centers, self.num_dims]) def test_fit(self): kmeans = self._kmeans() kmeans.train(input_fn=self.input_fn(), steps=1) score1 = kmeans.score(input_fn=self.input_fn(batch_size=self.num_points)) steps = 10 * self.num_points // self.batch_size kmeans.train(input_fn=self.input_fn(), steps=steps) score2 = kmeans.score(input_fn=self.input_fn(batch_size=self.num_points)) self.assertTrue(score1 > score2) self.assertNear(self.true_score, score2, self.true_score * 0.05) def test_monitor(self): if self.use_mini_batch: # We don't test for use_mini_batch case since the loss value can be noisy. return kmeans = kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=self.initial_clusters, distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=self.use_mini_batch, mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, config=self.config(14), seed=12, relative_tolerance=1e-4) kmeans.train( input_fn=self.input_fn(), # Force it to train until the relative tolerance monitor stops it. steps=None) score = kmeans.score(input_fn=self.input_fn(batch_size=self.num_points)) self.assertNear(self.true_score, score, self.true_score * 0.01) def _infer_helper(self, kmeans, clusters, num_points): points, true_assignments, true_offsets = make_random_points( clusters, num_points) input_fn = self.input_fn(batch_size=num_points, points=points, num_epochs=1) # Test predict assignments = list(kmeans.predict_cluster_index(input_fn)) self.assertAllEqual(assignments, true_assignments) # Test score score = kmeans.score(input_fn=lambda: (tf.constant(points), None)) self.assertNear(score, np.sum(true_offsets), 0.01 * score) # Test transform transform = list(kmeans.transform(input_fn)) true_transform = np.sqrt( np.maximum( 0, np.sum(np.square(points), axis=1, keepdims=True) - 2 * np.dot(points, np.transpose(clusters)) + np.transpose(np.sum(np.square(clusters), axis=1, keepdims=True)))) self.assertAllClose(transform, true_transform, rtol=0.05, atol=10) def test_infer(self): kmeans = self._kmeans() # Make a call to fit to initialize the cluster centers. max_steps = 1 kmeans.train(input_fn=self.input_fn(), max_steps=max_steps) clusters = kmeans.cluster_centers() # Run inference on small datasets. self._infer_helper(kmeans, clusters, 10) self._infer_helper(kmeans, clusters, 1) def _parse_feature_dict_helper(self, features, parsed_feature_dict): # Perform a sanity check. self.assertEqual(features.shape, parsed_feature_dict.shape) self.assertEqual(features.dtype, parsed_feature_dict.dtype) # Then check that running the tensor yields the original list of points. with self.cached_session() as sess: parsed_points = sess.run(parsed_feature_dict) self.assertAllEqual(self.points, parsed_points) def test_parse_features(self): """Tests the various behaviours of kmeans._parse_features_if_necessary.""" # No-op if a tensor is passed in. features = tf.constant(self.points) parsed_features = kmeans_lib._parse_features_if_necessary(features, None) self.assertAllEqual(features, parsed_features) # All values from a feature dict are transformed into a tensor. feature_dict = { 'x': [[point[0]] for point in self.points], 'y': [[point[1]] for point in self.points] } parsed_feature_dict = kmeans_lib._parse_features_if_necessary( feature_dict, None) self._parse_feature_dict_helper(features, parsed_feature_dict) # Only the feature_columns of a feature dict are transformed into a tensor. feature_dict_with_extras = { 'foo': 'bar', 'x': [[point[0]] for point in self.points], 'baz': {'fizz': 'buzz'}, 'y': [[point[1]] for point in self.points] } feature_columns = [ tf.feature_column.numeric_column(key='x'), tf.feature_column.numeric_column(key='y') ] parsed_feature_dict = kmeans_lib._parse_features_if_necessary( feature_dict_with_extras, feature_columns) self._parse_feature_dict_helper(features, parsed_feature_dict) @test_util.run_all_in_graph_and_eager_modes class KMeansTestMultiStageInit(KMeansTestBase): def test_random(self): points = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 0]], dtype=np.float32) kmeans = kmeans_lib.KMeansClustering( num_clusters=points.shape[0], initial_clusters=kmeans_lib.KMeansClustering.RANDOM_INIT, distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=True, mini_batch_steps_per_iteration=100, seed=24, relative_tolerance=None) kmeans.train( input_fn=self.input_fn(batch_size=1, points=points, randomize=False), steps=1) clusters = kmeans.cluster_centers() self.assertAllEqual(points, clusters) def test_kmeans_plus_plus_batch_just_right(self): points = np.array([[1, 2]], dtype=np.float32) kmeans = kmeans_lib.KMeansClustering( num_clusters=points.shape[0], initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=True, mini_batch_steps_per_iteration=100, seed=24, relative_tolerance=None) kmeans.train( input_fn=self.input_fn(batch_size=1, points=points, randomize=False), steps=1) clusters = kmeans.cluster_centers() self.assertAllEqual(points, clusters) def test_kmeans_plus_plus_batch_too_small(self): points = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 0]], dtype=np.float32) kmeans = kmeans_lib.KMeansClustering( num_clusters=points.shape[0], initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=True, mini_batch_steps_per_iteration=100, seed=24, relative_tolerance=None) with self.assertRaisesOpError(AssertionError): kmeans.train( input_fn=self.input_fn(batch_size=4, points=points, randomize=False), steps=1) @test_util.run_all_in_graph_and_eager_modes class MiniBatchKMeansTest(KMeansTest): @property def batch_size(self): return 50 @property def use_mini_batch(self): return True @test_util.run_all_in_graph_and_eager_modes class FullBatchAsyncKMeansTest(KMeansTest): @property def batch_size(self): return 50 @property def use_mini_batch(self): return True @property def mini_batch_steps_per_iteration(self): return self.num_points // self.batch_size @test_util.run_all_in_graph_and_eager_modes class KMeansCosineDistanceTest(KMeansTestBase): def setUp(self): self.points = np.array([[2.5, 0.1], [2, 0.2], [3, 0.1], [4, 0.2], [0.1, 2.5], [0.2, 2], [0.1, 3], [0.2, 4]], dtype=np.float32) self.num_points = self.points.shape[0] self.true_centers = np.array([ normalize( np.mean(normalize(self.points)[0:4, :], axis=0, keepdims=True))[0], normalize( np.mean(normalize(self.points)[4:, :], axis=0, keepdims=True))[0] ], dtype=np.float32) self.true_assignments = np.array([0] * 4 + [1] * 4) self.true_score = len(self.points) - np.tensordot( normalize(self.points), self.true_centers[self.true_assignments]) self.num_centers = 2 self.kmeans = kmeans_lib.KMeansClustering( self.num_centers, initial_clusters=kmeans_lib.KMeansClustering.RANDOM_INIT, distance_metric=kmeans_lib.KMeansClustering.COSINE_DISTANCE, use_mini_batch=self.use_mini_batch, mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, config=self.config(3)) def test_fit(self): max_steps = 10 * self.num_points // self.batch_size self.kmeans.train(input_fn=self.input_fn(), max_steps=max_steps) centers = normalize(self.kmeans.cluster_centers()) centers = centers[centers[:, 0].argsort()] true_centers = self.true_centers[self.true_centers[:, 0].argsort()] self.assertAllClose(centers, true_centers, atol=0.04) def test_transform(self): self.kmeans.train(input_fn=self.input_fn(), steps=10) centers = normalize(self.kmeans.cluster_centers()) true_transform = 1 - cosine_similarity(self.points, centers) transform = list( self.kmeans.transform( input_fn=self.input_fn(batch_size=self.num_points, num_epochs=1))) self.assertAllClose(transform, true_transform, atol=1e-3) def test_predict(self): max_steps = 10 * self.num_points // self.batch_size self.kmeans.train(input_fn=self.input_fn(), max_steps=max_steps) centers = normalize(self.kmeans.cluster_centers()) assignments = list( self.kmeans.predict_cluster_index( input_fn=self.input_fn(num_epochs=1, batch_size=self.num_points))) self.assertAllClose( centers[assignments], self.true_centers[self.true_assignments], atol=1e-2) centers = centers[centers[:, 0].argsort()] true_centers = self.true_centers[self.true_centers[:, 0].argsort()] self.assertAllClose(centers, true_centers, atol=0.04) score = self.kmeans.score( input_fn=self.input_fn(batch_size=self.num_points)) self.assertAllClose(score, self.true_score, atol=1e-2) def test_predict_kmeans_plus_plus(self): # Most points are concentrated near one center. KMeans++ is likely to find # the less populated centers. points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3], [-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1], [-3., -3.1], [-3., -3.1], [-3.2, -3.], [-3., -3.]], dtype=np.float32) true_centers = np.array([ normalize(np.mean(normalize(points)[0:2, :], axis=0, keepdims=True))[0], normalize(np.mean(normalize(points)[2:4, :], axis=0, keepdims=True))[0], normalize(np.mean(normalize(points)[4:, :], axis=0, keepdims=True))[0] ], dtype=np.float32) true_assignments = [0] * 2 + [1] * 2 + [2] * 8 true_score = len(points) - np.tensordot( normalize(points), true_centers[true_assignments]) kmeans = kmeans_lib.KMeansClustering( 3, initial_clusters=self.initial_clusters, distance_metric=kmeans_lib.KMeansClustering.COSINE_DISTANCE, use_mini_batch=self.use_mini_batch, mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, config=self.config(3)) kmeans.train(input_fn=lambda: (tf.constant(points), None), steps=30) centers = normalize(kmeans.cluster_centers()) self.assertAllClose( sorted(centers.tolist()), sorted(true_centers.tolist()), atol=1e-2) def _input_fn(): return (tf.compat.v1.train.limit_epochs( tf.constant(points), num_epochs=1), None) assignments = list(kmeans.predict_cluster_index(input_fn=_input_fn)) self.assertAllClose( centers[assignments], true_centers[true_assignments], atol=1e-2) score = kmeans.score(input_fn=lambda: (tf.constant(points), None)) self.assertAllClose(score, true_score, atol=1e-2) @test_util.run_all_in_graph_and_eager_modes class MiniBatchKMeansCosineTest(KMeansCosineDistanceTest): @property def batch_size(self): return 2 @property def use_mini_batch(self): return True @test_util.run_all_in_graph_and_eager_modes class FullBatchAsyncKMeansCosineTest(KMeansCosineDistanceTest): @property def batch_size(self): return 2 @property def use_mini_batch(self): return True @property def mini_batch_steps_per_iteration(self): return self.num_points // self.batch_size class KMeansBenchmark(benchmark.Benchmark): """Base class for benchmarks.""" def SetUp(self, dimension=50, num_clusters=50, points_per_cluster=10000, center_norm=500, cluster_width=20): np.random.seed(123456) self.num_clusters = num_clusters self.num_points = num_clusters * points_per_cluster self.centers = make_random_centers( self.num_clusters, dimension, center_norm=center_norm) self.points, _, scores = make_random_points( self.centers, self.num_points, max_offset=cluster_width) self.score = float(np.sum(scores)) def _report(self, num_iters, start, end, scores): print(scores) self.report_benchmark( iters=num_iters, wall_time=(end - start) / num_iters, extras={ 'true_sum_squared_distances': self.score, 'fit_scores': scores }) def _fit(self, num_iters=10): pass def benchmark_01_2dim_5center_500point(self): self.SetUp(dimension=2, num_clusters=5, points_per_cluster=100) self._fit() def benchmark_02_20dim_20center_10kpoint(self): self.SetUp(dimension=20, num_clusters=20, points_per_cluster=500) self._fit() def benchmark_03_100dim_50center_50kpoint(self): self.SetUp(dimension=100, num_clusters=50, points_per_cluster=1000) self._fit() def benchmark_03_100dim_50center_50kpoint_unseparated(self): self.SetUp( dimension=100, num_clusters=50, points_per_cluster=1000, cluster_width=250) self._fit() def benchmark_04_100dim_500center_500kpoint(self): self.SetUp(dimension=100, num_clusters=500, points_per_cluster=1000) self._fit(num_iters=4) def benchmark_05_100dim_500center_500kpoint_unseparated(self): self.SetUp( dimension=100, num_clusters=500, points_per_cluster=1000, cluster_width=250) self._fit(num_iters=4) class TensorflowKMeansBenchmark(KMeansBenchmark): def _fit(self, num_iters=10): scores = [] start = time.time() for i in range(num_iters): print('Starting tensorflow KMeans: %d' % i) tf_kmeans = kmeans_lib.KMeansClustering( self.num_clusters, initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, kmeans_plus_plus_num_retries=int(math.log(self.num_clusters) + 2), seed=i * 42, relative_tolerance=1e-6, config=self.config(3)) tf_kmeans.train( input_fn=lambda: (tf.constant(self.points), None), steps=50) _ = tf_kmeans.cluster_centers() scores.append( tf_kmeans.score(input_fn=lambda: (tf.constant(self.points), None))) self._report(num_iters, start, time.time(), scores) class SklearnKMeansBenchmark(KMeansBenchmark): def _fit(self, num_iters=10): scores = [] start = time.time() for i in range(num_iters): print('Starting sklearn KMeans: %d' % i) sklearn_kmeans = SklearnKMeans( n_clusters=self.num_clusters, init='k-means++', max_iter=50, n_init=1, tol=1e-4, random_state=i * 42) sklearn_kmeans.train(self.points) scores.append(sklearn_kmeans.inertia_) self._report(num_iters, start, time.time(), scores) @test_util.run_all_in_graph_and_eager_modes class KMeansTestQueues(tf.test.TestCase): def input_fn(self): def _fn(): queue = tf.queue.FIFOQueue( capacity=10, dtypes=tf.dtypes.float32, shapes=[10, 3]) enqueue_op = queue.enqueue(tf.zeros([10, 3], dtype=tf.dtypes.float32)) tf.compat.v1.train.queue_runner.add_queue_runner( tf.compat.v1.train.queue_runner.QueueRunner(queue, [enqueue_op])) return queue.dequeue(), None return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. # Note that since cluster initialization is dependent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): kmeans = kmeans_lib.KMeansClustering(5) kmeans.train(input_fn=self.input_fn(), steps=1) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Linear Estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import six import tensorflow as tf from tensorflow.python.feature_column import feature_column from tensorflow.python.feature_column import feature_column_lib from tensorflow.python.feature_column import feature_column_v2 as fc_v2 from tensorflow.python.framework import ops from tensorflow.python.ops import variable_scope from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import optimizers from tensorflow_estimator.python.estimator.canned.linear_optimizer.python.utils import sdca_ops from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.head import binary_class_head from tensorflow_estimator.python.estimator.head import head_utils from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys # The default learning rate of 0.2 is a historical artifact of the initial # implementation, but seems a reasonable choice. _LEARNING_RATE = 0.2 @estimator_export('estimator.experimental.LinearSDCA') class LinearSDCA(object): """Stochastic Dual Coordinate Ascent helper for linear estimators. Objects of this class are intended to be provided as the optimizer argument (though LinearSDCA objects do not implement the `tf.train.Optimizer` interface) when creating `tf.estimator.LinearClassifier` or `tf.estimator.LinearRegressor`. SDCA can only be used with `LinearClassifier` and `LinearRegressor` under the following conditions: - Feature columns are of type V2. - Multivalent categorical columns are not normalized. In other words the `sparse_combiner` argument in the estimator constructor should be "sum". - For classification: binary label. - For regression: one-dimensional label. Example usage: ```python real_feature_column = numeric_column(...) sparse_feature_column = categorical_column_with_hash_bucket(...) linear_sdca = tf.estimator.experimental.LinearSDCA( example_id_column='example_id', num_loss_partitions=1, num_table_shards=1, symmetric_l2_regularization=2.0) classifier = tf.estimator.LinearClassifier( feature_columns=[real_feature_column, sparse_feature_column], weight_column=..., optimizer=linear_sdca) classifier.train(input_fn_train, steps=50) classifier.evaluate(input_fn=input_fn_eval) ``` Here the expectation is that the `input_fn_*` functions passed to train and evaluate return a pair (dict, label_tensor) where dict has `example_id_column` as `key` whose value is a `Tensor` of shape [batch_size] and dtype string. num_loss_partitions defines sigma' in eq (11) of [3]. Convergence of (global) loss is guaranteed if `num_loss_partitions` is larger or equal to the product `(#concurrent train ops/per worker) x (#workers)`. Larger values for `num_loss_partitions` lead to slower convergence. The recommended value for `num_loss_partitions` in `tf.estimator` (where currently there is one process per worker) is the number of workers running the train steps. It defaults to 1 (single machine). `num_table_shards` defines the number of shards for the internal state table, typically set to match the number of parameter servers for large data sets. The SDCA algorithm was originally introduced in [1] and it was followed by the L1 proximal step [2], a distributed version [3] and adaptive sampling [4]. [1] www.jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf [2] https://arxiv.org/pdf/1309.2375.pdf [3] https://arxiv.org/pdf/1502.03508.pdf [4] https://arxiv.org/pdf/1502.08053.pdf Details specific to this implementation are provided in: https://github.com/tensorflow/estimator/tree/master/tensorflow_estimator/python/estimator/canned/linear_optimizer/doc/sdca.ipynb """ def __init__(self, example_id_column, num_loss_partitions=1, num_table_shards=None, symmetric_l1_regularization=0.0, symmetric_l2_regularization=1.0, adaptive=False): """Construct a new SDCA optimizer for linear estimators. Args: example_id_column: The column name containing the example ids. num_loss_partitions: Number of workers. num_table_shards: Number of shards of the internal state table, typically set to match the number of parameter servers. symmetric_l1_regularization: A float value, must be greater than or equal to zero. symmetric_l2_regularization: A float value, must be greater than zero and should typically be greater than 1. adaptive: A boolean indicating whether to use adaptive sampling. """ self._example_id_column = example_id_column self._num_loss_partitions = num_loss_partitions self._num_table_shards = num_table_shards self._symmetric_l1_regularization = symmetric_l1_regularization self._symmetric_l2_regularization = symmetric_l2_regularization self._adaptive = adaptive def _prune_and_unique_sparse_ids(self, id_weight_pair): """Remove duplicate and negative ids in a sparse tendor.""" id_tensor = id_weight_pair.id_tensor if id_weight_pair.weight_tensor: weight_tensor = id_weight_pair.weight_tensor.values else: weight_tensor = tf.ones([tf.compat.v1.shape(id_tensor.indices)[0]], tf.dtypes.float32) example_ids = tf.reshape(id_tensor.indices[:, 0], [-1]) flat_ids = tf.cast( tf.reshape(id_tensor.values, [-1]), dtype=tf.dtypes.int64) # Prune invalid IDs (< 0) from the flat_ids, example_ids, and # weight_tensor. These can come from looking up an OOV entry in the # vocabulary (default value being -1). is_id_valid = tf.math.greater_equal(flat_ids, 0) flat_ids = tf.compat.v1.boolean_mask(flat_ids, is_id_valid) example_ids = tf.compat.v1.boolean_mask(example_ids, is_id_valid) weight_tensor = tf.compat.v1.boolean_mask(weight_tensor, is_id_valid) projection_length = tf.math.reduce_max(flat_ids) + 1 # project ids based on example ids so that we can dedup ids that # occur multiple times for a single example. projected_ids = projection_length * example_ids + flat_ids # Remove any redundant ids. ids, idx = tf.unique(projected_ids) # Keep only one example id per duplicated ids. example_ids_filtered = tf.math.unsorted_segment_min( example_ids, idx, tf.compat.v1.shape(ids)[0]) # reproject ids back feature id space. reproject_ids = (ids - projection_length * example_ids_filtered) weights = tf.reshape( tf.math.unsorted_segment_sum(weight_tensor, idx, tf.compat.v1.shape(ids)[0]), [-1]) return sdca_ops._SparseFeatureColumn( # pylint: disable=protected-access example_ids_filtered, reproject_ids, weights) def get_train_step(self, state_manager, weight_column_name, loss_type, feature_columns, features, targets, bias_var, global_step): """Returns the training operation of an SdcaModel optimizer.""" batch_size = tf.compat.v1.shape(targets)[0] cache = tf.compat.v2.__internal__.feature_column.FeatureTransformationCache(features) # Iterate over all feature columns and create appropriate lists for dense # and sparse features as well as dense and sparse weights (variables) for # SDCA. dense_features, dense_feature_weights = [], [] sparse_feature_with_values, sparse_feature_with_values_weights = [], [] for column in sorted(feature_columns, key=lambda x: x.name): if isinstance(column, feature_column_lib.CategoricalColumn): id_weight_pair = column.get_sparse_tensors(cache, state_manager) sparse_feature_with_values.append( self._prune_and_unique_sparse_ids(id_weight_pair)) # If a partitioner was used during variable creation, we will have a # list of Variables here larger than 1. sparse_feature_with_values_weights.append( state_manager.get_variable(column, 'weights')) elif isinstance(column, tf.compat.v2.__internal__.feature_column.DenseColumn): if column.variable_shape.ndims != 1: raise ValueError('Column %s has rank %d, larger than 1.' % (type(column).__name__, column.variable_shape.ndims)) dense_features.append(column.get_dense_tensor(cache, state_manager)) # For real valued columns, the variables list contains exactly one # element. dense_feature_weights.append( state_manager.get_variable(column, 'weights')) else: raise ValueError('LinearSDCA does not support column type %s.' % type(column).__name__) # Add the bias column dense_features.append(tf.ones([batch_size, 1])) dense_feature_weights.append(bias_var) example_weights = tf.reshape( features[weight_column_name], shape=[-1]) if weight_column_name else tf.ones([batch_size]) example_ids = features[self._example_id_column] training_examples = dict( sparse_features=sparse_feature_with_values, dense_features=dense_features, example_labels=tf.compat.v1.to_float(tf.reshape(targets, shape=[-1])), example_weights=example_weights, example_ids=example_ids) training_variables = dict( sparse_features_weights=sparse_feature_with_values_weights, dense_features_weights=dense_feature_weights) sdca_model = sdca_ops._SDCAModel( # pylint: disable=protected-access examples=training_examples, variables=training_variables, options=dict( symmetric_l1_regularization=self._symmetric_l1_regularization, symmetric_l2_regularization=self._symmetric_l2_regularization, adaptive=self._adaptive, num_loss_partitions=self._num_loss_partitions, num_table_shards=self._num_table_shards, loss_type=loss_type)) train_op = sdca_model.minimize(global_step=global_step) return sdca_model, train_op def _get_default_optimizer_v2(feature_columns): learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns))) return tf_keras.optimizers.legacy.Ftrl(learning_rate=learning_rate) def _get_default_optimizer(feature_columns): learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns))) return tf.compat.v1.train.FtrlOptimizer(learning_rate=learning_rate) def _get_expanded_variable_list(var_list): """Given an iterable of variables, expands them if they are partitioned. Args: var_list: An iterable of variables. Returns: A list of variables where each partitioned variable is expanded to its components. """ returned_list = [] for variable in var_list: if (isinstance(variable, tf.Variable) or tf.compat.v2.__internal__.ops.is_resource_variable(variable) or isinstance(variable, tf.Tensor)): returned_list.append(variable) # Single variable/tensor case. else: # Must be a PartitionedVariable, so convert into a list. returned_list.extend(list(variable)) return returned_list # TODO(rohanj): Consider making this a public utility method. def _compute_fraction_of_zero(variables): """Given a linear variables list, compute the fraction of zero weights. Args: variables: A list or list of list of variables Returns: The fraction of zeros (sparsity) in the linear model. """ with ops.name_scope('zero_fraction'): variables = tf.nest.flatten(variables) with ops.name_scope('total_size'): sizes = [ tf.compat.v1.size(x, out_type=tf.dtypes.int64) for x in variables ] total_size_int64 = tf.math.add_n(sizes) with ops.name_scope('total_zero'): total_zero_float32 = tf.math.add_n([ tf.compat.v1.cond( tf.math.equal(size, tf.constant(0, dtype=tf.dtypes.int64)), true_fn=lambda: tf.constant(0, dtype=tf.dtypes.float32), false_fn=lambda: tf.math.zero_fraction(x) * tf.cast( size, dtype=tf.dtypes.float32), name='zero_count') for x, size in zip(variables, sizes) ]) with ops.name_scope('compute'): total_size_float32 = tf.cast( total_size_int64, dtype=tf.dtypes.float32, name='float32_size') zero_fraction_or_nan = total_zero_float32 / total_size_float32 zero_fraction_or_nan = tf.identity( zero_fraction_or_nan, name='zero_fraction_or_nan') return zero_fraction_or_nan def linear_logit_fn_builder_v2(units, feature_columns, sparse_combiner='sum'): """Function builder for a linear logit_fn. Args: units: An int indicating the dimension of the logit layer. feature_columns: An iterable containing all the feature columns used by the model. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". Returns: A logit_fn (see below). """ def linear_logit_fn(features): """Linear model logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. """ if not feature_column_lib.is_feature_column_v2(feature_columns): raise ValueError( 'Received a feature column from TensorFlow v1, but this is a ' 'TensorFlow v2 Estimator. Please either use v2 feature columns ' '(accessible via tf.feature_column.* in TF 2.x) with this ' 'Estimator, or switch to a v1 Estimator for use with v1 feature ' 'columns (accessible via tf.compat.v1.estimator.* and ' 'tf.compat.v1.feature_column.*, respectively.') linear_model = LinearModel( feature_columns=feature_columns, units=units, sparse_combiner=sparse_combiner, name='linear_model') logits = linear_model(features) bias = linear_model.bias # We'd like to get all the non-bias variables associated with this # LinearModel. # TODO(rohanj): Figure out how to get shared embedding weights variable # here. variables = linear_model.variables variables.remove(bias) # Expand (potential) Partitioned variables bias = _get_expanded_variable_list([bias]) variables = _get_expanded_variable_list(variables) if units > 1: tf.compat.v1.summary.histogram('bias', bias) else: # If units == 1, the bias value is a length-1 list of a scalar Tensor, # so we should provide a scalar summary. tf.compat.v1.summary.scalar('bias', bias[0][0]) tf.compat.v1.summary.scalar('fraction_of_zero_weights', _compute_fraction_of_zero(variables)) return logits return linear_logit_fn @estimator_export(v1=['estimator.experimental.linear_logit_fn_builder']) def linear_logit_fn_builder(units, feature_columns, sparse_combiner='sum'): """Function builder for a linear logit_fn. Args: units: An int indicating the dimension of the logit layer. feature_columns: An iterable containing all the feature columns used by the model. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". Returns: A logit_fn (see below). """ def linear_logit_fn(features): """Linear model logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. """ if feature_column_lib.is_feature_column_v2(feature_columns): linear_model = LinearModel( feature_columns=feature_columns, units=units, sparse_combiner=sparse_combiner, name='linear_model') logits = linear_model(features) # We'd like to get all the non-bias variables associated with this # LinearModel. # TODO(rohanj): Figure out how to get shared embedding weights variable # here. bias = linear_model.bias variables = linear_model.variables # Expand (potential) Partitioned variables bias = _get_expanded_variable_list([bias]) variables = _get_expanded_variable_list(variables) variables = [var for var in variables if var not in bias] # Expand (potential) Partitioned variables bias = _get_expanded_variable_list([bias]) else: linear_model = feature_column._LinearModel( # pylint: disable=protected-access feature_columns=feature_columns, units=units, sparse_combiner=sparse_combiner, name='linear_model') logits = linear_model(features) cols_to_vars = linear_model.cols_to_vars() bias = cols_to_vars.pop('bias') variables = cols_to_vars.values() variables = _get_expanded_variable_list(variables) if units > 1: tf.compat.v1.summary.histogram('bias', bias) else: # If units == 1, the bias value is a length-1 list of a scalar Tensor, # so we should provide a scalar summary. tf.compat.v1.summary.scalar('bias', bias[0][0]) tf.compat.v1.summary.scalar('fraction_of_zero_weights', _compute_fraction_of_zero(variables)) return logits return linear_logit_fn def _sdca_model_fn(features, labels, mode, head, feature_columns, optimizer): """A model_fn for linear models that use the SDCA optimizer. Args: features: dict of `Tensor`. labels: `Tensor` of shape `[batch_size]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. feature_columns: An iterable containing all the feature columns used by the model. optimizer: a `LinearSDCA` instance. Returns: An `EstimatorSpec` instance. Raises: ValueError: mode or params are invalid, or features has the wrong type. """ assert feature_column_lib.is_feature_column_v2(feature_columns) if isinstance(head, (binary_class_head.BinaryClassHead, head_lib._BinaryLogisticHeadWithSigmoidCrossEntropyLoss)): # pylint: disable=protected-access loss_type = 'logistic_loss' elif isinstance(head, (regression_head.RegressionHead, head_lib._RegressionHeadWithMeanSquaredErrorLoss)): # pylint: disable=protected-access assert head.logits_dimension == 1 loss_type = 'squared_loss' else: raise ValueError('Unsupported head type: {}'.format(head)) # The default name for LinearModel. linear_model_name = 'linear_model' # Name scope has no effect on variables in LinearModel, as it uses # tf.get_variables() for variable creation. So we modify the model name to # keep the variable names the same for checkpoint backward compatibility in # canned Linear v2. if isinstance( head, (binary_class_head.BinaryClassHead, regression_head.RegressionHead)): linear_model_name = 'linear/linear_model' linear_model = LinearModel( feature_columns=feature_columns, units=1, sparse_combiner='sum', name=linear_model_name) logits = linear_model(features) # We'd like to get all the non-bias variables associated with this # LinearModel. # TODO(rohanj): Figure out how to get shared embedding weights variable # here. bias = linear_model.bias variables = linear_model.variables # Expand (potential) Partitioned variables bias = _get_expanded_variable_list([bias]) variables = _get_expanded_variable_list(variables) variables = [var for var in variables if var not in bias] tf.compat.v1.summary.scalar('bias', bias[0][0]) tf.compat.v1.summary.scalar('fraction_of_zero_weights', _compute_fraction_of_zero(variables)) if mode == ModeKeys.TRAIN: sdca_model, train_op = optimizer.get_train_step( linear_model.layer._state_manager, # pylint: disable=protected-access head._weight_column, # pylint: disable=protected-access loss_type, feature_columns, features, labels, linear_model.bias, tf.compat.v1.train.get_global_step()) update_weights_hook = _SDCAUpdateWeightsHook(sdca_model, train_op) model_fn_ops = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=lambda unused_loss_fn: train_op, logits=logits) return model_fn_ops._replace( training_chief_hooks=(model_fn_ops.training_chief_hooks + (update_weights_hook,))) else: return head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits) class _SDCAUpdateWeightsHook(tf.compat.v1.train.SessionRunHook): """SessionRunHook to update and shrink SDCA model weights.""" def __init__(self, sdca_model, train_op): self._sdca_model = sdca_model self._train_op = train_op def begin(self): """Construct the update_weights op. The op is implicitly added to the default graph. """ self._update_op = self._sdca_model.update_weights(self._train_op) def before_run(self, run_context): """Return the update_weights op so that it is executed during this run.""" return tf.compat.v1.train.SessionRunArgs(self._update_op) def _linear_model_fn_builder_v2(units, feature_columns, sparse_combiner='sum', features=None): """Function builder for a linear model_fn. Args: units: An int indicating the dimension of the logit layer. feature_columns: An iterable containing all the feature columns used by the model. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. Returns: A `Tensor` representing the logits. A list of trainable variables. """ if not feature_column_lib.is_feature_column_v2(feature_columns): raise ValueError( 'Received a feature column from TensorFlow v1, but this is a ' 'TensorFlow v2 Estimator. Please either use v2 feature columns ' '(accessible via tf.feature_column.* in TF 2.x) with this ' 'Estimator, or switch to a v1 Estimator for use with v1 feature ' 'columns (accessible via tf.compat.v1.estimator.* and ' 'tf.compat.v1.feature_column.*, respectively.') # Name scope has no effect on variables in LinearModel, as it uses # tf.get_variables() for variable creation. So we modify the model name to # keep the variable names the same for checkpoint backward compatibility. linear_model = LinearModel( feature_columns=feature_columns, units=units, sparse_combiner=sparse_combiner, name='linear/linear_model') logits = linear_model(features) bias = linear_model.bias # We'd like to get all the non-bias variables associated with this # LinearModel. # TODO(rohanj): Figure out how to get shared embedding weights variable # here. variables = linear_model.variables variables.remove(bias) if units > 1: tf.compat.v1.summary.histogram('bias', bias) else: # If units == 1, the bias value is a length-1 list of a scalar Tensor, # so we should provide a scalar summary. tf.compat.v1.summary.scalar('bias', bias[0]) tf.compat.v1.summary.scalar('fraction_of_zero_weights', _compute_fraction_of_zero(variables)) return logits, linear_model.variables def _linear_model_fn_v2(features, labels, mode, head, feature_columns, optimizer, config, sparse_combiner='sum'): """A model_fn for linear models that use a gradient-based optimizer. Args: features: dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. feature_columns: An iterable containing all the feature columns used by the model. optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. config: `RunConfig` object to configure the runtime settings. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". Returns: An `EstimatorSpec` instance. Raises: ValueError: mode or params are invalid, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) del config if isinstance(optimizer, LinearSDCA): assert sparse_combiner == 'sum' return _sdca_model_fn(features, labels, mode, head, feature_columns, optimizer) else: logits, trainable_variables = _linear_model_fn_builder_v2( units=head.logits_dimension, feature_columns=feature_columns, sparse_combiner=sparse_combiner, features=features) # In TRAIN mode, create optimizer and assign global_step variable to # optimizer.iterations to make global_step increased correctly, as Hooks # relies on global step as step counter. if mode == ModeKeys.TRAIN: optimizer = optimizers.get_optimizer_instance_v2( optimizer or _get_default_optimizer_v2(feature_columns), learning_rate=_LEARNING_RATE) optimizer.iterations = tf.compat.v1.train.get_or_create_global_step() return head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=optimizer, trainable_variables=trainable_variables, logits=logits) def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config, sparse_combiner='sum'): """A model_fn for linear models that use a gradient-based optimizer. Args: features: dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. feature_columns: An iterable containing all the feature columns used by the model. optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. partitioner: Partitioner for variables. config: `RunConfig` object to configure the runtime settings. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". Returns: An `EstimatorSpec` instance. Raises: ValueError: mode or params are invalid, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioner or (tf.compat.v1.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with tf.compat.v1.variable_scope( 'linear', values=tuple(six.itervalues(features)), partitioner=partitioner): if isinstance(optimizer, LinearSDCA): assert sparse_combiner == 'sum' return _sdca_model_fn(features, labels, mode, head, feature_columns, optimizer) else: logit_fn = linear_logit_fn_builder( units=head.logits_dimension, feature_columns=feature_columns, sparse_combiner=sparse_combiner, ) logits = logit_fn(features=features) optimizer = optimizers.get_optimizer_instance( optimizer or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) return head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits) def _validate_linear_sdca_optimizer_for_linear_classifier( feature_columns, n_classes, optimizer, sparse_combiner): """Helper function for the initialization of LinearClassifier.""" if isinstance(optimizer, LinearSDCA): if sparse_combiner != 'sum': raise ValueError('sparse_combiner must be "sum" when optimizer ' 'is a LinearSDCA object.') if not feature_column_lib.is_feature_column_v2(feature_columns): raise ValueError('V2 feature columns required when optimizer ' 'is a LinearSDCA object.') if n_classes > 2: raise ValueError('LinearSDCA cannot be used in a multi-class setting.') @estimator_export('estimator.LinearClassifier', v1=[]) class LinearClassifierV2(estimator.EstimatorV2): """Linear classifier model. Train a linear model to classify instances into one of multiple possible classes. When number of possible classes is 2, this is binary classification. Example: ```python categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) # Estimator using the default optimizer. estimator = tf.estimator.LinearClassifier( feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b]) # Or estimator using the FTRL optimizer with regularization. estimator = tf.estimator.LinearClassifier( feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b], optimizer=tf_keras.optimizers.Ftrl( learning_rate=0.1, l1_regularization_strength=0.001 )) # Or estimator using an optimizer with a learning rate decay. estimator = tf.estimator.LinearClassifier( feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b], optimizer=lambda: tf_keras.optimizers.Ftrl( learning_rate=tf.exponential_decay( learning_rate=0.1, global_step=tf.get_global_step(), decay_steps=10000, decay_rate=0.96)) # Or estimator with warm-starting from a previous checkpoint. estimator = tf.estimator.LinearClassifier( feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b], warm_start_from="/path/to/checkpoint/dir") # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train) metrics = estimator.evaluate(input_fn=input_fn_eval) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. * for each `column` in `feature_columns`: - if `column` is a `SparseColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedSparseColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `RealValuedColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using softmax cross entropy. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Ftrl', config=None, warm_start_from=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, sparse_combiner='sum'): """Construct a `LinearClassifier` estimator object. Args: feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: number of label classes. Default is binary classification. Note that class labels are integers representing the class index (i.e. values from 0 to n_classes-1). For arbitrary label values (e.g. string labels), convert to class indices first. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf_keras.optimizers.*` or `tf.estimator.experimental.LinearSDCA` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. config: `RunConfig` object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights and biases are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. for more details, see `tf.feature_column.linear_model`. Returns: A `LinearClassifier` estimator. Raises: ValueError: if n_classes < 2. """ _validate_linear_sdca_optimizer_for_linear_classifier( feature_columns=feature_columns, n_classes=n_classes, optimizer=optimizer, sparse_combiner=sparse_combiner) estimator._canned_estimator_api_gauge.get_cell('Classifier').set('Linear') # pylint: disable=protected-access head = head_utils.binary_or_multi_class_head( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) def _model_fn(features, labels, mode, config): """Call the defined shared _linear_model_fn.""" return _linear_model_fn_v2( features=features, labels=labels, mode=mode, head=head, feature_columns=tuple(feature_columns or []), optimizer=optimizer, config=config, sparse_combiner=sparse_combiner) super(LinearClassifierV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.LinearClassifier']) # pylint: disable=missing-docstring class LinearClassifier(estimator.Estimator): __doc__ = LinearClassifierV2.__doc__.replace('SUM_OVER_BATCH_SIZE', 'SUM') def __init__(self, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Ftrl', config=None, partitioner=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, sparse_combiner='sum'): _validate_linear_sdca_optimizer_for_linear_classifier( feature_columns=feature_columns, n_classes=n_classes, optimizer=optimizer, sparse_combiner=sparse_combiner) estimator._canned_estimator_api_gauge.get_cell('Classifier').set('Linear') # pylint: disable=protected-access head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): """Call the defined shared _linear_model_fn.""" return _linear_model_fn( features=features, labels=labels, mode=mode, head=head, feature_columns=tuple(feature_columns or []), optimizer=optimizer, partitioner=partitioner, config=config, sparse_combiner=sparse_combiner) super(LinearClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export('estimator.LinearEstimator', v1=[]) class LinearEstimatorV2(estimator.EstimatorV2): """An estimator for TensorFlow linear models with user-specified head. Example: ```python categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) # Estimator using the default optimizer. estimator = tf.estimator.LinearEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b]) # Or estimator using an optimizer with a learning rate decay. estimator = tf.estimator.LinearEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b], optimizer=lambda: tf_keras.optimizers.Ftrl( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96)) # Or estimator using the FTRL optimizer with regularization. estimator = tf.estimator.LinearEstimator( head=tf.estimator.MultiLabelHead(n_classes=3), feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b]) optimizer=tf_keras.optimizers.Ftrl( learning_rate=0.1, l1_regularization_strength=0.001 )) def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train, steps=100) metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. * for each `column` in `feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss and predicted output are determined by the specified head. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, head, feature_columns, model_dir=None, optimizer='Ftrl', config=None, sparse_combiner='sum', warm_start_from=None): """Initializes a `LinearEstimator` instance. Args: head: A `Head` instance constructed with a method such as `tf.estimator.MultiLabelHead`. feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. optimizer: An instance of `tf_keras.optimizers.*` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. config: `RunConfig` object to configure the runtime settings. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. for more details, see `tf.feature_column.linear_model`. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights and biases are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. """ def _model_fn(features, labels, mode, config): return _linear_model_fn_v2( features=features, labels=labels, mode=mode, head=head, feature_columns=tuple(feature_columns or []), optimizer=optimizer, config=config, sparse_combiner=sparse_combiner) estimator._canned_estimator_api_gauge.get_cell('Estimator').set('Linear') # pylint: disable=protected-access super(LinearEstimatorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.LinearEstimator']) # pylint: disable=missing-docstring class LinearEstimator(estimator.Estimator): __doc__ = LinearEstimatorV2.__doc__ def __init__(self, head, feature_columns, model_dir=None, optimizer='Ftrl', config=None, partitioner=None, sparse_combiner='sum', warm_start_from=None): """Initializes a `LinearEstimator` instance. Args: head: A `_Head` instance constructed with a method such as `tf.contrib.estimator.multi_label_head`. feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. optimizer: An instance of `tf.Optimizer` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. config: `RunConfig` object to configure the runtime settings. partitioner: Optional. Partitioner for input layer. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. for more details, see `tf.feature_column.linear_model`. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights and biases are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. """ def _model_fn(features, labels, mode, config): return _linear_model_fn( features=features, labels=labels, mode=mode, head=head, feature_columns=tuple(feature_columns or []), optimizer=optimizer, partitioner=partitioner, config=config, sparse_combiner=sparse_combiner) estimator._canned_estimator_api_gauge.get_cell('Estimator').set('Linear') # pylint: disable=protected-access super(LinearEstimator, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) def _validate_linear_sdca_optimizer_for_linear_regressor( feature_columns, label_dimension, optimizer, sparse_combiner): """Helper function for the initialization of LinearRegressor.""" if isinstance(optimizer, LinearSDCA): if sparse_combiner != 'sum': raise ValueError('sparse_combiner must be "sum" when optimizer ' 'is a LinearSDCA object.') if not feature_column_lib.is_feature_column_v2(feature_columns): raise ValueError('V2 feature columns required when optimizer ' 'is a LinearSDCA object.') if label_dimension > 1: raise ValueError('LinearSDCA can only be used with one-dimensional ' 'label.') @estimator_export('estimator.LinearRegressor', v1=[]) class LinearRegressorV2(estimator.EstimatorV2): """An estimator for TensorFlow Linear regression problems. Train a linear regression model to predict label value given observation of feature values. Example: ```python categorical_column_a = categorical_column_with_hash_bucket(...) categorical_column_b = categorical_column_with_hash_bucket(...) categorical_feature_a_x_categorical_feature_b = crossed_column(...) # Estimator using the default optimizer. estimator = tf.estimator.LinearRegressor( feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b]) # Or estimator using the FTRL optimizer with regularization. estimator = tf.estimator.LinearRegressor( feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b], optimizer=tf_keras.optimizers.Ftrl( learning_rate=0.1, l1_regularization_strength=0.001 )) # Or estimator using an optimizer with a learning rate decay. estimator = tf.estimator.LinearRegressor( feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b], optimizer=lambda: tf_keras.optimizers.Ftrl( learning_rate=tf.compat.v1.train.exponential_decay( learning_rate=0.1, global_step=tf.compat.v1.train.get_global_step(), decay_steps=10000, decay_rate=0.96)) # Or estimator with warm-starting from a previous checkpoint. estimator = tf.estimator.LinearRegressor( feature_columns=[categorical_column_a, categorical_feature_a_x_categorical_feature_b], warm_start_from="/path/to/checkpoint/dir") # Input builders def input_fn_train: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_eval: # Returns tf.data.Dataset of (x, y) tuple where y represents label's class # index. pass def input_fn_predict: # Returns tf.data.Dataset of (x, None) tuple. pass estimator.train(input_fn=input_fn_train) metrics = estimator.evaluate(input_fn=input_fn_eval) predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a KeyError: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. * for each `column` in `feature_columns`: - if `column` is a `SparseColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedSparseColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `RealValuedColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using mean squared error. @compatibility(eager) Estimators can be used while eager execution is enabled. Note that `input_fn` and all hooks are executed inside a graph context, so they have to be written to be compatible with graph mode. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Ftrl', config=None, warm_start_from=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, sparse_combiner='sum'): """Initializes a `LinearRegressor` instance. Args: feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. label_dimension: Number of regression targets per example. This is the size of the last dimension of the labels and logits `Tensor` objects (typically, these have shape `[batch_size, label_dimension]`). weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. optimizer: An instance of `tf_keras.optimizers.*` or `tf.estimator.experimental.LinearSDCA` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL optimizer. config: `RunConfig` object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights and biases are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM`. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum" -- these are effectively different ways to do example-level normalization, which can be useful for bag-of-words features. for more details, see `tf.feature_column.linear_model`. """ _validate_linear_sdca_optimizer_for_linear_regressor( feature_columns=feature_columns, label_dimension=label_dimension, optimizer=optimizer, sparse_combiner=sparse_combiner) head = regression_head.RegressionHead( label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Regressor').set('Linear') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the defined shared _linear_model_fn.""" return _linear_model_fn_v2( features=features, labels=labels, mode=mode, head=head, feature_columns=tuple(feature_columns or []), optimizer=optimizer, config=config, sparse_combiner=sparse_combiner) super(LinearRegressorV2, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) @estimator_export(v1=['estimator.LinearRegressor']) # pylint: disable=missing-docstring class LinearRegressor(estimator.Estimator): __doc__ = LinearRegressorV2.__doc__.replace('SUM_OVER_BATCH_SIZE', 'SUM') def __init__(self, feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Ftrl', config=None, partitioner=None, warm_start_from=None, loss_reduction=tf.compat.v1.losses.Reduction.SUM, sparse_combiner='sum'): _validate_linear_sdca_optimizer_for_linear_regressor( feature_columns=feature_columns, label_dimension=label_dimension, optimizer=optimizer, sparse_combiner=sparse_combiner) head = head_lib._regression_head( # pylint: disable=protected-access label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction) estimator._canned_estimator_api_gauge.get_cell('Regressor').set('Linear') # pylint: disable=protected-access def _model_fn(features, labels, mode, config): """Call the defined shared _linear_model_fn.""" return _linear_model_fn( features=features, labels=labels, mode=mode, head=head, feature_columns=tuple(feature_columns or []), optimizer=optimizer, partitioner=partitioner, config=config, sparse_combiner=sparse_combiner) super(LinearRegressor, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from) class _LinearModelLayer(tf_keras.layers.Layer): """Layer that contains logic for `LinearModel`.""" def __init__(self, feature_columns, units=1, sparse_combiner='sum', trainable=True, name=None, **kwargs): super(_LinearModelLayer, self).__init__( name=name, trainable=trainable, **kwargs) self._feature_columns = fc_v2._normalize_feature_columns(feature_columns) # pylint: disable=protected-access for column in self._feature_columns: if not isinstance(column, (tf.compat.v2.__internal__.feature_column.DenseColumn, fc_v2.CategoricalColumn)): raise ValueError( 'Items of feature_columns must be either a ' 'DenseColumn or CategoricalColumn. Given: {}'.format(column)) self._units = units self._sparse_combiner = sparse_combiner self._state_manager = tf.compat.v2.__internal__.feature_column.StateManager(self, self.trainable) # pylint: disable=protected-access self.bias = None def build(self, _): # We need variable scopes for now because we want the variable partitioning # information to percolate down. We also use _pure_variable_scope's here # since we want to open up a name_scope in the `call` method while creating # the ops. with variable_scope._pure_variable_scope(self.name): # pylint: disable=protected-access for column in self._feature_columns: with variable_scope._pure_variable_scope( # pylint: disable=protected-access fc_v2._sanitize_column_name_for_variable_scope(column.name)): # pylint: disable=protected-access # Create the state for each feature column column.create_state(self._state_manager) # Create a weight variable for each column. if isinstance(column, fc_v2.CategoricalColumn): first_dim = column.num_buckets else: first_dim = column.variable_shape.num_elements() self._state_manager.create_variable( column, name='weights', dtype=tf.float32, shape=(first_dim, self._units), initializer=tf_keras.initializers.zeros(), trainable=self.trainable) # Create a bias variable. self.bias = self.add_weight( name='bias_weights', dtype=tf.float32, shape=[self._units], initializer=tf_keras.initializers.zeros(), trainable=self.trainable, use_resource=True, # TODO(rohanj): Get rid of this hack once we have a mechanism for # specifying a default partitioner for an entire layer. In that case, # the default getter for Layers should work. getter=tf.compat.v1.get_variable) super(_LinearModelLayer, self).build(None) def call(self, features): if not isinstance(features, dict): raise ValueError('We expected a dictionary here. Instead we got: {}' .format(features)) with ops.name_scope(self.name): transformation_cache = tf.compat.v2.__internal__.feature_column.FeatureTransformationCache(features) weighted_sums = [] for column in self._feature_columns: with ops.name_scope( fc_v2._sanitize_column_name_for_variable_scope(column.name)): # pylint: disable=protected-access # All the weights used in the linear model are owned by the state # manager associated with this Linear Model. weight_var = self._state_manager.get_variable(column, 'weights') weighted_sum = fc_v2._create_weighted_sum( # pylint: disable=protected-access column=column, transformation_cache=transformation_cache, state_manager=self._state_manager, sparse_combiner=self._sparse_combiner, weight_var=weight_var) weighted_sums.append(weighted_sum) fc_v2._verify_static_batch_size_equality( # pylint: disable=protected-access weighted_sums, self._feature_columns) predictions_no_bias = tf.math.add_n( weighted_sums, name='weighted_sum_no_bias') predictions = tf.nn.bias_add( predictions_no_bias, self.bias, name='weighted_sum') return predictions def get_config(self): # Import here to avoid circular imports. from tensorflow.python.feature_column import serialization # pylint: disable=g-import-not-at-top column_configs = serialization.serialize_feature_columns( self._feature_columns) config = { 'feature_columns': column_configs, 'units': self._units, 'sparse_combiner': self._sparse_combiner } base_config = super( # pylint: disable=bad-super-call _LinearModelLayer, self).get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config, custom_objects=None): # Import here to avoid circular imports. from tensorflow.python.feature_column import serialization # pylint: disable=g-import-not-at-top config_cp = config.copy() columns = serialization.deserialize_feature_columns( config_cp['feature_columns'], custom_objects=custom_objects) del config_cp['feature_columns'] return cls(feature_columns=columns, **config_cp) class LinearModel(tf_keras.Model): """Produces a linear prediction `Tensor` based on given `feature_columns`. This layer generates a weighted sum based on output dimension `units`. Weighted sum refers to logits in classification problems. It refers to the prediction itself for linear regression problems. Note on supported columns: `LinearLayer` treats categorical columns as `indicator_column`s. To be specific, assume the input as `SparseTensor` looks like: ```python shape = [2, 2] { [0, 0]: "a" [1, 0]: "b" [1, 1]: "c" } ``` `linear_model` assigns weights for the presence of "a", "b", "c' implicitly, just like `indicator_column`, while `input_layer` explicitly requires wrapping each of categorical columns with an `embedding_column` or an `indicator_column`. Example of usage: ```python price = numeric_column('price') price_buckets = bucketized_column(price, boundaries=[0., 10., 100., 1000.]) keywords = categorical_column_with_hash_bucket("keywords", 10K) keywords_price = crossed_column('keywords', price_buckets, ...) columns = [price_buckets, keywords, keywords_price ...] linear_model = LinearLayer(columns) features = tf.io.parse_example(..., features=make_parse_example_spec(columns)) prediction = linear_model(features) ``` """ def __init__(self, feature_columns, units=1, sparse_combiner='sum', trainable=True, name=None, **kwargs): """Constructs a LinearLayer. Args: feature_columns: An iterable containing the FeatureColumns to use as inputs to your model. All items should be instances of classes derived from `_FeatureColumn`s. units: An integer, dimensionality of the output space. Default value is 1. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. Except `numeric_column`, almost all columns passed to `linear_model` are considered as categorical columns. It combines each categorical column independently. Currently "mean", "sqrtn" and "sum" are supported, with "sum" the default for linear model. "sqrtn" often achieves good accuracy, in particular with bag-of-words columns. * "sum": do not normalize features in the column * "mean": do l1 normalization on features in the column * "sqrtn": do l2 normalization on features in the column For example, for two features represented as the categorical columns: ```python # Feature 1 shape = [2, 2] { [0, 0]: "a" [0, 1]: "b" [1, 0]: "c" } # Feature 2 shape = [2, 3] { [0, 0]: "d" [1, 0]: "e" [1, 1]: "f" [1, 2]: "g" } ``` with `sparse_combiner` as "mean", the linear model outputs conceptually are ``` y_0 = 1.0 / 2.0 * ( w_a + w_ b) + w_c + b_0 y_1 = w_d + 1.0 / 3.0 * ( w_e + w_ f + w_g) + b_1 ``` where `y_i` is the output, `b_i` is the bias, and `w_x` is the weight assigned to the presence of `x` in the input features. trainable: If `True` also add the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). name: Name to give to the Linear Model. All variables and ops created will be scoped by this name. **kwargs: Keyword arguments to construct a layer. Raises: ValueError: if an item in `feature_columns` is neither a `DenseColumn` nor `CategoricalColumn`. """ super(LinearModel, self).__init__(name=name, **kwargs) self.layer = _LinearModelLayer( feature_columns, units, sparse_combiner, trainable, name=self.name, **kwargs) def call(self, features): """Returns a `Tensor` the represents the predictions of a linear model. Args: features: A mapping from key to tensors. `_FeatureColumn`s look up via these keys. For example `numeric_column('price')` will look at 'price' key in this dict. Values are `Tensor` or `SparseTensor` depending on corresponding `_FeatureColumn`. Returns: A `Tensor` which represents predictions/logits of a linear model. Its shape is (batch_size, units) and its dtype is `float32`. Raises: ValueError: If features are not a dictionary. """ return self.layer(features) @property def bias(self): return self.layer.bias ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_estimator_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for LinearEstimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.canned import linear_testing_utils from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.head import multi_class_head from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.inputs import numpy_io def _linear_estimator_fn(weight_column=None, label_dimension=1, **kwargs): """Returns a LinearEstimator that uses regression_head.""" return linear.LinearEstimatorV2( head=regression_head.RegressionHead( weight_column=weight_column, label_dimension=label_dimension, # Tests in core (from which this test inherits) test the sum loss. loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE), **kwargs) def _linear_estimator_classifier_fn(n_classes=3, **kwargs): return linear.LinearEstimatorV2( head=multi_class_head.MultiClassHead(n_classes=n_classes), **kwargs) class LinearEstimatorEvaluateTest( linear_testing_utils.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__( self, _linear_estimator_fn) class LinearEstimatorPredictTest( linear_testing_utils.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorPredictTest.__init__( self, _linear_estimator_fn) class LinearEstimatorTrainTest( linear_testing_utils.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorTrainingTest.__init__( self, _linear_estimator_fn) class LinearEstimatorWarmStartingTest( linear_testing_utils.BaseLinearWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearWarmStartingTest.__init__( self, _linear_estimator_classifier_fn, _linear_estimator_fn) class LinearEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, optimizer='Ftrl'): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = linear.LinearEstimatorV2( head=regression_head.RegressionHead(label_dimension=label_dimension), feature_columns=feature_columns, model_dir=self._model_dir, optimizer=optimizer) # Train num_steps = 10 est.train(train_input_fn, steps=num_steps) # Evaluate scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # Predict predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # Export feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _create_input_fn(self, label_dimension, batch_size): """Creates input_fn for integration test.""" data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) return train_input_fn, eval_input_fn, predict_input_fn def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_numpy_input_fn_with_optimizer_instance(self): """Tests complete flow with optimizer_v2 instance.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, optimizer=tf_keras.optimizers.legacy.Ftrl(0.01)) # Test with optimizer_v2 instance if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_model_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for feature_column.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.framework import test_util from tensorflow.python.platform import flags from tensorflow_estimator.python.estimator.canned import linear def _initialized_session(config=None): sess = tf.compat.v1.Session(config=config) sess.run(tf.compat.v1.initializers.global_variables()) sess.run(tf.compat.v1.tables_initializer()) return sess def get_linear_model_bias(name='linear_model'): with tf.compat.v1.variable_scope(name, reuse=True): return tf.compat.v1.get_variable('bias_weights') def get_linear_model_column_var(column, name='linear_model'): return tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, name + '/' + column.name)[0] class BaseFeatureColumnForTests(tf.compat.v2.__internal__.feature_column.FeatureColumn): """A base FeatureColumn useful to avoid boiler-plate in tests. Provides dummy implementations for abstract methods that raise ValueError in order to avoid re-defining all abstract methods for each test sub-class. """ @property def parents(self): raise ValueError('Should not use this method.') @classmethod def from_config(cls, config, custom_objects=None, columns_by_name=None): raise ValueError('Should not use this method.') def get_config(self): raise ValueError('Should not use this method.') class SortableFeatureColumnTest(tf.test.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): price = tf.feature_column.numeric_column('price') with tf.Graph().as_default(): features = {'price': [[1.], [5.]]} model = linear.LinearModel([price]) predictions = model(features) price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias)) self.assertAllClose([[0.]], self.evaluate(price_var)) self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) sess.run(price_var.assign([[10.]])) self.assertAllClose([[10.], [50.]], self.evaluate(predictions)) @test_util.run_deprecated_v1 def test_linear_model_sanitizes_scope_names(self): price = tf.feature_column.numeric_column('price > 100') with tf.Graph().as_default(): features = {'price > 100': [[1.], [5.]]} model = linear.LinearModel([price]) predictions = model(features) price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias)) self.assertAllClose([[0.]], self.evaluate(price_var)) self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) sess.run(price_var.assign([[10.]])) self.assertAllClose([[10.], [50.]], self.evaluate(predictions)) class BucketizedColumnTest(tf.test.TestCase): def test_linear_model_one_input_value(self): """Tests linear_model() for input with shape=[1].""" price = tf.feature_column.numeric_column('price', shape=[1]) bucketized_price = tf.feature_column.bucketized_column(price, boundaries=[0, 2, 4, 6]) with tf.Graph().as_default(): features = {'price': [[-1.], [1.], [5.], [6.]]} model = linear.LinearModel([bucketized_price]) predictions = model(features) bucketized_price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias)) # One weight variable per bucket, all initialized to zero. self.assertAllClose([[0.], [0.], [0.], [0.], [0.]], self.evaluate(bucketized_price_var)) self.assertAllClose([[0.], [0.], [0.], [0.]], self.evaluate(predictions)) sess.run( bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]])) # price -1. is in the 0th bucket, whose weight is 10. # price 1. is in the 1st bucket, whose weight is 20. # price 5. is in the 3rd bucket, whose weight is 40. # price 6. is in the 4th bucket, whose weight is 50. self.assertAllClose([[10.], [20.], [40.], [50.]], self.evaluate(predictions)) sess.run(bias.assign([1.])) self.assertAllClose([[11.], [21.], [41.], [51.]], self.evaluate(predictions)) def test_linear_model_two_input_values(self): """Tests linear_model() for input with shape=[2].""" price = tf.feature_column.numeric_column('price', shape=[2]) bucketized_price = tf.feature_column.bucketized_column(price, boundaries=[0, 2, 4, 6]) with tf.Graph().as_default(): features = {'price': [[-1., 1.], [5., 6.]]} model = linear.LinearModel([bucketized_price]) predictions = model(features) bucketized_price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias)) # One weight per bucket per input column, all initialized to zero. self.assertAllClose( [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.]], self.evaluate(bucketized_price_var)) self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) sess.run( bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.], [60.], [70.], [80.], [90.], [100.]])) # 1st example: # price -1. is in the 0th bucket, whose weight is 10. # price 1. is in the 6th bucket, whose weight is 70. # 2nd example: # price 5. is in the 3rd bucket, whose weight is 40. # price 6. is in the 9th bucket, whose weight is 100. self.assertAllClose([[80.], [140.]], self.evaluate(predictions)) sess.run(bias.assign([1.])) self.assertAllClose([[81.], [141.]], self.evaluate(predictions)) class HashedCategoricalColumnTest(tf.test.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): wire_column = tf.feature_column.categorical_column_with_hash_bucket('wire', 4) self.assertEqual(4, wire_column.num_buckets) with tf.Graph().as_default(): model = linear.LinearModel((wire_column,)) predictions = model({ wire_column.name: tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) }) wire_var, bias = model.variables self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), self.evaluate(wire_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) self.evaluate(wire_var.assign(((1.,), (2.,), (3.,), (4.,)))) # 'marlo' -> 3: wire_var[3] = 4 # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 self.assertAllClose(((4.,), (6.,)), self.evaluate(predictions)) class CrossedColumnTest(tf.test.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): """Tests linear_model. Uses data from test_get_sparse_tensors_simple. """ a = tf.feature_column.numeric_column('a', dtype=tf.int32, shape=(2,)) b = tf.feature_column.bucketized_column(a, boundaries=(0, 1)) crossed = tf.feature_column.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) with tf.Graph().as_default(): model = linear.LinearModel((crossed,)) predictions = model({ 'a': tf.compat.v2.constant(((-1., .5), (.5, 1.))), 'c': tf.sparse.SparseTensor( indices=((0, 0), (1, 0), (1, 1)), values=['cA', 'cB', 'cC'], dense_shape=(2, 2)), }) crossed_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)), self.evaluate(crossed_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) # Expected ids after cross = (1, 0, 1, 3, 4, 2) self.assertAllClose(((3.,), (14.,)), self.evaluate(predictions)) sess.run(bias.assign((.1,))) self.assertAllClose(((3.1,), (14.1,)), self.evaluate(predictions)) def test_linear_model_with_weights(self): class _TestColumnWithWeights(BaseFeatureColumnForTests, fc.CategoricalColumn): """Produces sparse IDs and sparse weights.""" @property def _is_v2_column(self): return True @property def name(self): return 'test_column' @property def parse_example_spec(self): return { self.name: tf.io.VarLenFeature(tf.int32), '{}_weights'.format(self.name): tf.io.VarLenFeature(tf.float32), } @property def num_buckets(self): return 5 def transform_feature(self, transformation_cache, state_manager): return (transformation_cache.get(self.name, state_manager), transformation_cache.get('{}_weights'.format(self.name), state_manager)) def get_sparse_tensors(self, transformation_cache, state_manager): """Populates both id_tensor and weight_tensor.""" ids_and_weights = transformation_cache.get(self, state_manager) return fc.CategoricalColumn.IdWeightPair( id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1]) t = _TestColumnWithWeights() crossed = tf.feature_column.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) with tf.Graph().as_default(): with self.assertRaisesRegexp( ValueError, 'crossed_column does not support weight_tensor.*{}'.format(t.name)): model = linear.LinearModel((crossed,)) model({ t.name: tf.sparse.SparseTensor( indices=((0, 0), (1, 0), (1, 1)), values=[0, 1, 2], dense_shape=(2, 2)), '{}_weights'.format(t.name): tf.sparse.SparseTensor( indices=((0, 0), (1, 0), (1, 1)), values=[1., 10., 2.], dense_shape=(2, 2)), 'c': tf.sparse.SparseTensor( indices=((0, 0), (1, 0), (1, 1)), values=['cA', 'cB', 'cC'], dense_shape=(2, 2)), }) class LinearModelTest(tf.test.TestCase): def test_raises_if_empty_feature_columns(self): with self.assertRaisesRegexp(ValueError, 'feature_columns must not be empty'): linear.LinearModel(feature_columns=[]) def test_should_be_feature_column(self): with self.assertRaisesRegexp(ValueError, 'must be a FeatureColumn'): linear.LinearModel(feature_columns='NotSupported') def test_should_be_dense_or_categorical_column(self): class NotSupportedColumn(BaseFeatureColumnForTests): @property def _is_v2_column(self): return True @property def name(self): return 'NotSupportedColumn' def transform_feature(self, transformation_cache, state_manager): pass @property def parse_example_spec(self): pass with self.assertRaisesRegexp( ValueError, 'must be either a DenseColumn or CategoricalColumn'): linear.LinearModel(feature_columns=[NotSupportedColumn()]) def test_does_not_support_dict_columns(self): with self.assertRaisesRegexp( ValueError, 'Expected feature_columns to be iterable, found dict.'): linear.LinearModel(feature_columns={'a': tf.feature_column.numeric_column('a')}) def test_raises_if_duplicate_name(self): with self.assertRaisesRegexp( ValueError, 'Duplicate feature column name found for columns'): linear.LinearModel( feature_columns=[tf.feature_column.numeric_column('a'), tf.feature_column.numeric_column('a')]) def test_not_dict_input_features(self): price = tf.feature_column.numeric_column('price') with tf.Graph().as_default(): features = [[1.], [5.]] model = linear.LinearModel([price]) with self.assertRaisesRegexp(ValueError, 'We expected a dictionary here'): model(features) def test_dense_bias(self): price = tf.feature_column.numeric_column('price') with tf.Graph().as_default(): features = {'price': [[1.], [5.]]} model = linear.LinearModel([price]) predictions = model(features) price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias)) sess.run(price_var.assign([[10.]])) sess.run(bias.assign([5.])) self.assertAllClose([[15.], [55.]], self.evaluate(predictions)) def test_sparse_bias(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) with tf.Graph().as_default(): wire_tensor = tf.sparse.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} model = linear.LinearModel([wire_cast]) predictions = model(features) wire_cast_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias)) self.assertAllClose([[0.], [0.], [0.], [0.]], self.evaluate(wire_cast_var)) sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[1005.], [10015.]], self.evaluate(predictions)) def test_dense_and_sparse_bias(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) price = tf.feature_column.numeric_column('price') with tf.Graph().as_default(): wire_tensor = tf.sparse.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]} model = linear.LinearModel([wire_cast, price]) predictions = model(features) price_var, wire_cast_var, bias = model.variables with _initialized_session() as sess: sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(bias.assign([5.])) sess.run(price_var.assign([[10.]])) self.assertAllClose([[1015.], [10065.]], self.evaluate(predictions)) def test_dense_and_sparse_column(self): """When the column is both dense and sparse, uses sparse tensors.""" class _DenseAndSparseColumn(BaseFeatureColumnForTests, tf.compat.v2.__internal__.feature_column.DenseColumn, fc.CategoricalColumn): @property def _is_v2_column(self): return True @property def name(self): return 'dense_and_sparse_column' @property def parse_example_spec(self): return {self.name: tf.io.VarLenFeature(self.dtype)} def transform_feature(self, transformation_cache, state_manager): return transformation_cache.get(self.name, state_manager) @property def variable_shape(self): raise ValueError('Should not use this method.') def get_dense_tensor(self, transformation_cache, state_manager): raise ValueError('Should not use this method.') @property def num_buckets(self): return 4 def get_sparse_tensors(self, transformation_cache, state_manager): sp_tensor = tf.sparse.SparseTensor( indices=[[0, 0], [1, 0], [1, 1]], values=[2, 0, 3], dense_shape=[2, 2]) return fc.CategoricalColumn.IdWeightPair(sp_tensor, None) dense_and_sparse_column = _DenseAndSparseColumn() with tf.Graph().as_default(): sp_tensor = tf.sparse.SparseTensor( values=['omar', 'stringer', 'marlo'], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {dense_and_sparse_column.name: sp_tensor} model = linear.LinearModel([dense_and_sparse_column]) predictions = model(features) dense_and_sparse_column_var, bias = model.variables with _initialized_session() as sess: sess.run( dense_and_sparse_column_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[1005.], [10015.]], self.evaluate(predictions)) def test_dense_multi_output(self): price = tf.feature_column.numeric_column('price') with tf.Graph().as_default(): features = {'price': [[1.], [5.]]} model = linear.LinearModel([price], units=3) predictions = model(features) price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose(np.zeros((3,)), self.evaluate(bias)) self.assertAllClose(np.zeros((1, 3)), self.evaluate(price_var)) sess.run(price_var.assign([[10., 100., 1000.]])) sess.run(bias.assign([5., 6., 7.])) self.assertAllClose([[15., 106., 1007.], [55., 506., 5007.]], self.evaluate(predictions)) def test_sparse_multi_output(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) with tf.Graph().as_default(): wire_tensor = tf.sparse.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} model = linear.LinearModel([wire_cast], units=3) predictions = model(features) wire_cast_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose(np.zeros((3,)), self.evaluate(bias)) self.assertAllClose(np.zeros((4, 3)), self.evaluate(wire_cast_var)) sess.run( wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], [1000., 1100., 1200.], [10000., 11000., 12000.]])) sess.run(bias.assign([5., 6., 7.])) self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]], self.evaluate(predictions)) def test_dense_multi_dimension(self): price = tf.feature_column.numeric_column('price', shape=2) with tf.Graph().as_default(): features = {'price': [[1., 2.], [5., 6.]]} model = linear.LinearModel([price]) predictions = model(features) price_var, _ = model.variables with _initialized_session() as sess: self.assertAllClose([[0.], [0.]], self.evaluate(price_var)) sess.run(price_var.assign([[10.], [100.]])) self.assertAllClose([[210.], [650.]], self.evaluate(predictions)) def test_sparse_multi_rank(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) with tf.Graph().as_default(): wire_tensor = tf.compat.v1.sparse_placeholder(tf.string) wire_value = tf.compat.v1.SparseTensorValue( values=['omar', 'stringer', 'marlo', 'omar'], # hashed = [2, 0, 3, 2] indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]], dense_shape=[2, 2, 2]) features = {'wire_cast': wire_tensor} model = linear.LinearModel([wire_cast]) predictions = model(features) wire_cast_var, _ = model.variables with _initialized_session() as sess: self.assertAllClose(np.zeros((4, 1)), self.evaluate(wire_cast_var)) self.assertAllClose( np.zeros((2, 1)), predictions.eval(feed_dict={wire_tensor: wire_value})) sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) self.assertAllClose( [[1010.], [11000.]], predictions.eval(feed_dict={wire_tensor: wire_value})) def test_sparse_combiner(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) with tf.Graph().as_default(): wire_tensor = tf.sparse.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} model = linear.LinearModel([wire_cast], sparse_combiner='mean') predictions = model(features) wire_cast_var, bias = model.variables with _initialized_session() as sess: sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[1005.], [5010.]], self.evaluate(predictions)) def test_sparse_combiner_sqrtn(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) with tf.Graph().as_default(): wire_tensor = tf.sparse.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} model = linear.LinearModel([wire_cast], sparse_combiner='sqrtn') predictions = model(features) wire_cast_var, bias = model.variables with _initialized_session() as sess: self.evaluate(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) self.evaluate(bias.assign([5.])) self.assertAllClose([[1005.], [7083.139]], self.evaluate(predictions)) def test_sparse_combiner_with_negative_weights(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) wire_cast_weights = tf.feature_column.weighted_categorical_column(wire_cast, 'weights') with tf.Graph().as_default(): wire_tensor = tf.sparse.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = { 'wire_cast': wire_tensor, 'weights': tf.compat.v2.constant([[1., 1., -1.0]]) } model = linear.LinearModel([wire_cast_weights], sparse_combiner='sum') predictions = model(features) wire_cast_var, bias = model.variables with _initialized_session() as sess: sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[1005.], [-9985.]], self.evaluate(predictions)) def test_dense_multi_dimension_multi_output(self): price = tf.feature_column.numeric_column('price', shape=2) with tf.Graph().as_default(): features = {'price': [[1., 2.], [5., 6.]]} model = linear.LinearModel([price], units=3) predictions = model(features) price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose(np.zeros((3,)), self.evaluate(bias)) self.assertAllClose(np.zeros((2, 3)), self.evaluate(price_var)) sess.run(price_var.assign([[1., 2., 3.], [10., 100., 1000.]])) sess.run(bias.assign([2., 3., 4.])) self.assertAllClose([[23., 205., 2007.], [67., 613., 6019.]], self.evaluate(predictions)) def test_raises_if_shape_mismatch(self): price = tf.feature_column.numeric_column('price', shape=2) with tf.Graph().as_default(): features = {'price': [[1.], [5.]]} with self.assertRaisesRegexp( Exception, r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): model = linear.LinearModel([price]) model(features) def test_dense_reshaping(self): price = tf.feature_column.numeric_column('price', shape=[1, 2]) with tf.Graph().as_default(): features = {'price': [[[1., 2.]], [[5., 6.]]]} model = linear.LinearModel([price]) predictions = model(features) price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias)) self.assertAllClose([[0.], [0.]], self.evaluate(price_var)) self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) sess.run(price_var.assign([[10.], [100.]])) self.assertAllClose([[210.], [650.]], self.evaluate(predictions)) def test_dense_multi_column(self): price1 = tf.feature_column.numeric_column('price1', shape=2) price2 = tf.feature_column.numeric_column('price2') with tf.Graph().as_default(): features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} model = linear.LinearModel([price1, price2]) predictions = model(features) price1_var, price2_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias)) self.assertAllClose([[0.], [0.]], self.evaluate(price1_var)) self.assertAllClose([[0.]], self.evaluate(price2_var)) self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) sess.run(price1_var.assign([[10.], [100.]])) sess.run(price2_var.assign([[1000.]])) sess.run(bias.assign([7.])) self.assertAllClose([[3217.], [4657.]], self.evaluate(predictions)) def test_dense_trainable_default(self): price = tf.feature_column.numeric_column('price') with tf.Graph().as_default() as g: features = {'price': [[1.], [5.]]} model = linear.LinearModel([price]) model(features) price_var, bias = model.variables trainable_vars = g.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self.assertIn(bias, trainable_vars) self.assertIn(price_var, trainable_vars) def test_sparse_trainable_default(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) with tf.Graph().as_default() as g: wire_tensor = tf.sparse.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) features = {'wire_cast': wire_tensor} model = linear.LinearModel([wire_cast]) model(features) trainable_vars = g.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) wire_cast_var, bias = model.variables self.assertIn(bias, trainable_vars) self.assertIn(wire_cast_var, trainable_vars) def test_dense_trainable_false(self): price = tf.feature_column.numeric_column('price') with tf.Graph().as_default() as g: features = {'price': [[1.], [5.]]} model = linear.LinearModel([price], trainable=False) model(features) trainable_vars = g.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual([], trainable_vars) def test_sparse_trainable_false(self): wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) with tf.Graph().as_default() as g: wire_tensor = tf.sparse.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) features = {'wire_cast': wire_tensor} model = linear.LinearModel([wire_cast], trainable=False) model(features) trainable_vars = g.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual([], trainable_vars) def test_column_order(self): price_a = tf.feature_column.numeric_column('price_a') price_b = tf.feature_column.numeric_column('price_b') wire_cast = tf.feature_column.categorical_column_with_hash_bucket('wire_cast', 4) with tf.Graph().as_default(): features = { 'price_a': [[1.]], 'price_b': [[3.]], 'wire_cast': tf.sparse.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) } model = linear.LinearModel([price_a, wire_cast, price_b]) model(features) my_vars = model.variables self.assertIn('price_a', my_vars[0].name) self.assertIn('price_b', my_vars[1].name) self.assertIn('wire_cast', my_vars[2].name) with tf.Graph().as_default(): features = { 'price_a': [[1.]], 'price_b': [[3.]], 'wire_cast': tf.sparse.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) } model = linear.LinearModel([wire_cast, price_b, price_a]) model(features) my_vars = model.variables self.assertIn('price_a', my_vars[0].name) self.assertIn('price_b', my_vars[1].name) self.assertIn('wire_cast', my_vars[2].name) def test_variable_names(self): price1 = tf.feature_column.numeric_column('price1') dense_feature = tf.feature_column.numeric_column('dense_feature') dense_feature_bucketized = tf.feature_column.bucketized_column( dense_feature, boundaries=[0.]) some_sparse_column = tf.feature_column.categorical_column_with_hash_bucket( 'sparse_feature', hash_bucket_size=5) some_embedding_column = tf.feature_column.embedding_column( some_sparse_column, dimension=10) all_cols = [price1, dense_feature_bucketized, some_embedding_column] with tf.Graph().as_default(): model = linear.LinearModel(all_cols) features = { 'price1': [[3.], [4.]], 'dense_feature': [[-1.], [4.]], 'sparse_feature': [['a'], ['x']], } model(features) for var in model.variables: self.assertIsInstance(var, tf.Variable) variable_names = [var.name for var in model.variables] self.assertCountEqual([ 'linear_model/dense_feature_bucketized/weights:0', 'linear_model/price1/weights:0', 'linear_model/sparse_feature_embedding/embedding_weights:0', 'linear_model/sparse_feature_embedding/weights:0', 'linear_model/bias_weights:0', ], variable_names) def test_fit_and_predict(self): columns = [tf.feature_column.numeric_column('a')] model = linear.LinearModel(columns) model.compile( optimizer=tf.compat.v1.train.RMSPropOptimizer(1e-3), loss='binary_crossentropy', metrics=['accuracy']) x = {'a': np.random.random((10, 1))} y = np.random.randint(0, 2, size=(10, 1)) model.fit(x, y, epochs=1, batch_size=5) model.fit(x, y, epochs=1, batch_size=5) model.evaluate(x, y, batch_size=5) model.predict(x, batch_size=5) def test_static_batch_size_mismatch(self): price1 = tf.feature_column.numeric_column('price1') price2 = tf.feature_column.numeric_column('price2') with tf.Graph().as_default(): features = { 'price1': [[1.], [5.], [7.]], # batchsize = 3 'price2': [[3.], [4.]] # batchsize = 2 } with self.assertRaisesRegexp( ValueError, r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string model = linear.LinearModel([price1, price2]) model(features) def test_subset_of_static_batch_size_mismatch(self): price1 = tf.feature_column.numeric_column('price1') price2 = tf.feature_column.numeric_column('price2') price3 = tf.feature_column.numeric_column('price3') with tf.Graph().as_default(): features = { 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 3 'price2': [[3.], [4.]], # batchsize = 2 'price3': [[3.], [4.], [5.]] # batchsize = 3 } with self.assertRaisesRegexp( ValueError, r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string model = linear.LinearModel([price1, price2, price3]) model(features) def test_runtime_batch_size_mismatch(self): price1 = tf.feature_column.numeric_column('price1') price2 = tf.feature_column.numeric_column('price2') with tf.Graph().as_default(): features = { 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 3 'price2': [[3.], [4.]] # batchsize = 2 } model = linear.LinearModel([price1, price2]) predictions = model(features) with _initialized_session() as sess: with self.assertRaisesRegexp(tf.errors.OpError, 'must have the same size and shape'): sess.run( predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]}) def test_runtime_batch_size_matches(self): price1 = tf.feature_column.numeric_column('price1') price2 = tf.feature_column.numeric_column('price2') with tf.Graph().as_default(): features = { 'price1': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 2 'price2': tf.compat.v1.placeholder(dtype=tf.int64), # batchsize = 2 } model = linear.LinearModel([price1, price2]) predictions = model(features) with _initialized_session() as sess: sess.run( predictions, feed_dict={ features['price1']: [[1.], [5.]], features['price2']: [[1.], [5.]], }) @test_util.run_deprecated_v1 def test_with_1d_sparse_tensor(self): price = tf.feature_column.numeric_column('price') price_buckets = tf.feature_column.bucketized_column( price, boundaries=[ 0., 10., 100., ]) body_style = tf.feature_column.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # Provides 1-dim tensor and dense tensor. features = { 'price': tf.compat.v2.constant([ -1., 12., ]), 'body-style': tf.sparse.SparseTensor( indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)), } self.assertEqual(1, features['price'].shape.ndims) self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) model = linear.LinearModel([price_buckets, body_style]) net = model(features) with _initialized_session() as sess: body_style_var, price_buckets_var, bias = model.variables sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], self.evaluate(net)) @test_util.run_deprecated_v1 def test_with_1d_unknown_shape_sparse_tensor(self): price = tf.feature_column.numeric_column('price') price_buckets = tf.feature_column.bucketized_column( price, boundaries=[ 0., 10., 100., ]) body_style = tf.feature_column.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) country = tf.feature_column.categorical_column_with_vocabulary_list( 'country', vocabulary_list=['US', 'JP', 'CA']) # Provides 1-dim tensor and dense tensor. features = { 'price': tf.compat.v1.placeholder(tf.float32), 'body-style': tf.compat.v1.sparse_placeholder(tf.string), 'country': tf.compat.v1.placeholder(tf.string), } self.assertIsNone(features['price'].shape.ndims) self.assertIsNone(features['body-style'].get_shape().ndims) price_data = np.array([-1., 12.]) body_style_data = tf.compat.v1.SparseTensorValue( indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) country_data = np.array(['US', 'CA']) model = linear.LinearModel([price_buckets, body_style, country]) net = model(features) body_style_var, _, price_buckets_var, bias = model.variables with _initialized_session() as sess: sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run( net, feed_dict={ features['price']: price_data, features['body-style']: body_style_data, features['country']: country_data })) @test_util.run_deprecated_v1 def test_with_rank_0_feature(self): price = tf.feature_column.numeric_column('price') features = { 'price': tf.compat.v2.constant(0), } self.assertEqual(0, features['price'].shape.ndims) # Static rank 0 should fail with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): model = linear.LinearModel([price]) model(features) # Dynamic rank 0 should fail features = { 'price': tf.compat.v1.placeholder(tf.float32), } model = linear.LinearModel([price]) net = model(features) self.assertEqual(1, net.shape[1]) with _initialized_session() as sess: with self.assertRaisesOpError('Feature .* cannot have rank 0'): sess.run(net, feed_dict={features['price']: np.array(1)}) def test_multiple_linear_models(self): price = tf.feature_column.numeric_column('price') with tf.Graph().as_default(): features1 = {'price': [[1.], [5.]]} features2 = {'price': [[2.], [10.]]} model1 = linear.LinearModel([price]) model2 = linear.LinearModel([price]) predictions1 = model1(features1) predictions2 = model2(features2) price_var1, bias1 = model1.variables price_var2, bias2 = model2.variables with _initialized_session() as sess: self.assertAllClose([0.], self.evaluate(bias1)) sess.run(price_var1.assign([[10.]])) sess.run(bias1.assign([5.])) self.assertAllClose([[15.], [55.]], self.evaluate(predictions1)) self.assertAllClose([0.], self.evaluate(bias2)) sess.run(price_var2.assign([[10.]])) sess.run(bias2.assign([5.])) self.assertAllClose([[25.], [105.]], self.evaluate(predictions2)) class VocabularyFileCategoricalColumnTest(tf.test.TestCase): def setUp(self): super(VocabularyFileCategoricalColumnTest, self).setUp() # Contains strings, character names from 'The Wire': omar, stringer, marlo self._wire_vocabulary_file_name = os.path.join( flags.FLAGS['test_srcdir'].value, 'org_tensorflow_estimator/tensorflow_estimator', 'python/estimator/canned/testdata/wire_vocabulary.txt') # self._wire_vocabulary_file_name = test.test_src_dir_path( # 'python/estimator/canned/testdata/wire_vocabulary.txt') self._wire_vocabulary_size = 3 # TODO(scottzhu): Reenable test once the issue for reading test file is fixed. @test_util.run_deprecated_v1 def DISABLED_test_linear_model(self): wire_column = tf.compat.v1.feature_column.categorical_column_with_vocabulary_file( key='wire', vocabulary_file=self._wire_vocabulary_file_name, vocabulary_size=self._wire_vocabulary_size, num_oov_buckets=1) self.assertEqual(4, wire_column.num_buckets) with tf.Graph().as_default(): model = linear.LinearModel((wire_column,)) predictions = model({ wire_column.name: tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) }) wire_var, bias = model.variables self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), self.evaluate(wire_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) self.evaluate(wire_var.assign(((1.,), (2.,), (3.,), (4.,)))) # 'marlo' -> 2: wire_var[2] = 3 # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), self.evaluate(predictions)) class VocabularyListCategoricalColumnTest(tf.test.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): wire_column = tf.feature_column.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo'), num_oov_buckets=1) self.assertEqual(4, wire_column.num_buckets) with tf.Graph().as_default(): model = linear.LinearModel((wire_column,)) predictions = model({ wire_column.name: tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) }) wire_var, bias = model.variables self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), self.evaluate(wire_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) self.evaluate(wire_var.assign(((1.,), (2.,), (3.,), (4.,)))) # 'marlo' -> 2: wire_var[2] = 3 # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), self.evaluate(predictions)) class IdentityCategoricalColumnTest(tf.test.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): column = tf.feature_column.categorical_column_with_identity(key='aaa', num_buckets=3) self.assertEqual(3, column.num_buckets) with tf.Graph().as_default(): model = linear.LinearModel((column,)) predictions = model({ column.name: tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(0, 2, 1), dense_shape=(2, 2)) }) weight_var, bias = model.variables self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,)), self.evaluate(weight_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) self.evaluate(weight_var.assign(((1.,), (2.,), (3.,)))) # weight_var[0] = 1 # weight_var[2] + weight_var[1] = 3+2 = 5 self.assertAllClose(((1.,), (5.,)), self.evaluate(predictions)) class IndicatorColumnTest(tf.test.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): animal = tf.feature_column.indicator_column( tf.feature_column.categorical_column_with_identity('animal', num_buckets=4)) with tf.Graph().as_default(): features = { 'animal': tf.sparse.SparseTensor( indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) } model = linear.LinearModel([animal]) predictions = model(features) weight_var, _ = model.variables self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) # All should be zero-initialized. self.assertAllClose([[0.], [0.], [0.], [0.]], self.evaluate(weight_var)) self.assertAllClose([[0.]], self.evaluate(predictions)) self.evaluate(weight_var.assign([[1.], [2.], [3.], [4.]])) self.assertAllClose([[2. + 3.]], self.evaluate(predictions)) class EmbeddingColumnTest(tf.test.TestCase, parameterized.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): # Inputs. batch_size = 4 vocabulary_size = 3 sparse_input = tf.compat.v1.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 4), (3, 0)), values=(2, 0, 1, 1), dense_shape=(batch_size, 5)) # Embedding variable. embedding_dimension = 2 embedding_shape = (vocabulary_size, embedding_dimension) zeros_embedding_values = np.zeros(embedding_shape) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual(embedding_shape, shape) self.assertEqual(tf.float32, dtype) self.assertIsNone(partition_info) return zeros_embedding_values # Build columns. categorical_column = tf.feature_column.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = tf.feature_column.embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer) with tf.Graph().as_default(): model = linear.LinearModel((embedding_column,)) predictions = model({categorical_column.name: sparse_input}) expected_var_names = ( 'linear_model/bias_weights:0', 'linear_model/aaa_embedding/weights:0', 'linear_model/aaa_embedding/embedding_weights:0', ) self.assertCountEqual( expected_var_names, [v.name for v in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)]) trainable_vars = { v.name: v for v in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) } self.assertCountEqual(expected_var_names, trainable_vars.keys()) bias = trainable_vars['linear_model/bias_weights:0'] embedding_weights = trainable_vars[ 'linear_model/aaa_embedding/embedding_weights:0'] linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0'] self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) # Predictions with all zero weights. self.assertAllClose(np.zeros((1,)), self.evaluate(bias)) self.assertAllClose(zeros_embedding_values, self.evaluate(embedding_weights)) self.assertAllClose( np.zeros((embedding_dimension, 1)), self.evaluate(linear_weights)) self.assertAllClose(np.zeros((batch_size, 1)), self.evaluate(predictions)) # Predictions with all non-zero weights. self.evaluate( embedding_weights.assign(( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ))) self.evaluate(linear_weights.assign(((4.,), (6.,)))) # example 0, ids [2], embedding[0] = [7, 11] # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] # example 2, ids [], embedding[2] = [0, 0] # example 3, ids [1], embedding[3] = [3, 5] # sum(embeddings * linear_weights) # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), self.evaluate(predictions)) class SharedEmbeddingColumnTest(tf.test.TestCase, parameterized.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): # Inputs. batch_size = 2 vocabulary_size = 3 # -1 values are ignored. input_a = np.array([ [2, -1, -1], # example 0, ids [2] [0, 1, -1] ]) # example 1, ids [0, 1] input_b = np.array([ [0, -1, -1], # example 0, ids [0] [-1, -1, -1] ]) # example 1, ids [] # Embedding variable. embedding_dimension = 2 embedding_shape = (vocabulary_size, embedding_dimension) zeros_embedding_values = np.zeros(embedding_shape) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual(embedding_shape, shape) self.assertEqual(tf.float32, dtype) self.assertIsNone(partition_info) return zeros_embedding_values # Build columns. categorical_column_a = tf.feature_column.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = tf.feature_column.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_a, embedding_column_b = tf.compat.v2.feature_column.shared_embeddings( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=_initializer) with tf.Graph().as_default(): model = linear.LinearModel((embedding_column_a, embedding_column_b)) predictions = model({ categorical_column_a.name: input_a, categorical_column_b.name: input_b }) # Linear weights do not follow the column name. But this is a rare use # case, and fixing it would add too much complexity to the code. expected_var_names = ( 'linear_model/bias_weights:0', 'linear_model/aaa_shared_embedding/weights:0', 'aaa_bbb_shared_embedding:0', 'linear_model/bbb_shared_embedding/weights:0', ) self.assertCountEqual( expected_var_names, [v.name for v in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)]) trainable_vars = { v.name: v for v in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) } self.assertCountEqual(expected_var_names, trainable_vars.keys()) bias = trainable_vars['linear_model/bias_weights:0'] embedding_weights = trainable_vars['aaa_bbb_shared_embedding:0'] linear_weights_a = trainable_vars[ 'linear_model/aaa_shared_embedding/weights:0'] linear_weights_b = trainable_vars[ 'linear_model/bbb_shared_embedding/weights:0'] self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) # Predictions with all zero weights. self.assertAllClose(np.zeros((1,)), self.evaluate(bias)) self.assertAllClose(zeros_embedding_values, self.evaluate(embedding_weights)) self.assertAllClose( np.zeros((embedding_dimension, 1)), self.evaluate(linear_weights_a)) self.assertAllClose( np.zeros((embedding_dimension, 1)), self.evaluate(linear_weights_b)) self.assertAllClose(np.zeros((batch_size, 1)), self.evaluate(predictions)) # Predictions with all non-zero weights. self.evaluate( embedding_weights.assign(( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ))) self.evaluate(linear_weights_a.assign(((4.,), (6.,)))) # example 0, ids [2], embedding[0] = [7, 11] # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] # sum(embeddings * linear_weights) # = [4*7 + 6*11, 4*2 + 6*3.5] = [94, 29] self.evaluate(linear_weights_b.assign(((3.,), (5.,)))) # example 0, ids [0], embedding[0] = [1, 2] # example 1, ids [], embedding[1] = 0, 0] # sum(embeddings * linear_weights) # = [3*1 + 5*2, 3*0 +5*0] = [13, 0] self.assertAllClose([[94. + 13.], [29.]], self.evaluate(predictions)) class WeightedCategoricalColumnTest(tf.test.TestCase): @test_util.run_deprecated_v1 def test_linear_model(self): column = tf.feature_column.weighted_categorical_column( categorical_column=tf.feature_column.categorical_column_with_identity( key='ids', num_buckets=3), weight_feature_key='values') with tf.Graph().as_default(): model = linear.LinearModel((column,)) predictions = model({ 'ids': tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(0, 2, 1), dense_shape=(2, 2)), 'values': tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(.5, 1., .1), dense_shape=(2, 2)) }) weight_var, bias = model.variables self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,)), self.evaluate(weight_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) self.evaluate(weight_var.assign(((1.,), (2.,), (3.,)))) # weight_var[0] * weights[0, 0] = 1 * .5 = .5 # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] # = 3*1 + 2*.1 = 3+.2 = 3.2 self.assertAllClose(((.5,), (3.2,)), self.evaluate(predictions)) def test_linear_model_mismatched_shape(self): column = tf.feature_column.weighted_categorical_column( categorical_column=tf.feature_column.categorical_column_with_identity( key='ids', num_buckets=3), weight_feature_key='values') with tf.Graph().as_default(): with self.assertRaisesRegexp(ValueError, r'Dimensions.*are not compatible'): model = linear.LinearModel((column,)) model({ 'ids': tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(0, 2, 1), dense_shape=(2, 2)), 'values': tf.compat.v1.SparseTensorValue( indices=((0, 0), (0, 1), (1, 0), (1, 1)), values=(.5, 11., 1., .1), dense_shape=(2, 2)) }) def test_linear_model_mismatched_dense_values(self): column = tf.feature_column.weighted_categorical_column( categorical_column=tf.feature_column.categorical_column_with_identity( key='ids', num_buckets=3), weight_feature_key='values') with tf.Graph().as_default(): model = linear.LinearModel((column,), sparse_combiner='mean') predictions = model({ 'ids': tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(0, 2, 1), dense_shape=(2, 2)), 'values': ((.5,), (1.,)) }) # Disabling the constant folding optimizer here since it changes the # error message differently on CPU and GPU. config = tf.compat.v1.ConfigProto() config.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) with _initialized_session(config): with self.assertRaisesRegexp(tf.errors.OpError, 'Incompatible shapes'): self.evaluate(predictions) def test_linear_model_mismatched_dense_shape(self): column = tf.feature_column.weighted_categorical_column( categorical_column=tf.feature_column.categorical_column_with_identity( key='ids', num_buckets=3), weight_feature_key='values') with tf.Graph().as_default(): model = linear.LinearModel((column,)) predictions = model({ 'ids': tf.compat.v1.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(0, 2, 1), dense_shape=(2, 2)), 'values': ((.5,), (1.,), (.1,)) }) weight_var, bias = model.variables self.evaluate(tf.compat.v1.initializers.global_variables()) self.evaluate(tf.compat.v1.tables_initializer()) self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,)), self.evaluate(weight_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) self.evaluate(weight_var.assign(((1.,), (2.,), (3.,)))) # weight_var[0] * weights[0, 0] = 1 * .5 = .5 # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] # = 3*1 + 2*.1 = 3+.2 = 3.2 self.assertAllClose(((.5,), (3.2,)), self.evaluate(predictions)) @test_util.run_all_in_graph_and_eager_modes class LinearModelLayerSerializationTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters( ('trainable', 6, 'mean', True, 'trainable'), ('not_trainable', 10, 'sum', False, 'frozen')) def test_get_config(self, units, sparse_combiner, trainable, name): cols = [tf.feature_column.numeric_column('a'), tf.feature_column.categorical_column_with_identity(key='b', num_buckets=3)] layer = linear._LinearModelLayer( cols, units=units, sparse_combiner=sparse_combiner, trainable=trainable, name=name) config = layer.get_config() self.assertEqual(config['name'], layer.name) self.assertEqual(config['trainable'], trainable) self.assertEqual(config['units'], units) self.assertEqual(config['sparse_combiner'], sparse_combiner) self.assertLen(config['feature_columns'], 2) self.assertEqual( config['feature_columns'][0]['class_name'], 'NumericColumn') self.assertEqual( config['feature_columns'][1]['class_name'], 'IdentityCategoricalColumn') @parameterized.named_parameters( ('trainable', 6, 'mean', True, 'trainable'), ('not_trainable', 10, 'sum', False, 'frozen')) def test_from_config(self, units, sparse_combiner, trainable, name): cols = [tf.feature_column.numeric_column('a'), tf.feature_column.categorical_column_with_vocabulary_list( 'b', vocabulary_list=('1', '2', '3')), tf.feature_column.categorical_column_with_hash_bucket( key='c', hash_bucket_size=3)] orig_layer = linear._LinearModelLayer( cols, units=units, sparse_combiner=sparse_combiner, trainable=trainable, name=name) config = orig_layer.get_config() new_layer = linear._LinearModelLayer.from_config(config) self.assertEqual(new_layer.name, orig_layer.name) self.assertEqual(new_layer._units, units) self.assertEqual(new_layer._sparse_combiner, sparse_combiner) self.assertEqual(new_layer.trainable, trainable) self.assertLen(new_layer._feature_columns, 3) self.assertEqual(new_layer._feature_columns[0].name, 'a') self.assertEqual( new_layer._feature_columns[1].vocabulary_list, ('1', '2', '3')) self.assertEqual(new_layer._feature_columns[2].num_buckets, 3) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_optimizer/BUILD ================================================ # Placeholder: load py_library load("//tensorflow_estimator:estimator.bzl", "py_test") package(default_visibility = ["//tensorflow_estimator:__subpackages__"]) licenses(["notice"]) py_test( name = "sdca_test", size = "medium", srcs = ["python/sdca_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", deps = [ "//tensorflow_estimator/python/estimator", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", "//tensorflow_estimator/python/estimator:linear", ], ) py_library( name = "sdca_ops_py", srcs = [ "__init__.py", "python/utils/sdca_ops.py", ], srcs_version = "PY3", deps = [ ":sharded_mutable_dense_hashtable_py", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "sdca_ops_test", size = "medium", srcs = ["python/utils/sdca_ops_test.py"], python_version = "PY3", shard_count = 4, srcs_version = "PY3", tags = [ "no_gpu", "no_pip_gpu", ], deps = [ ":sdca_ops_py", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_library( name = "sharded_mutable_dense_hashtable_py", srcs = ["python/utils/sharded_mutable_dense_hashtable.py"], srcs_version = "PY3", deps = [ "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) py_test( name = "sharded_mutable_dense_hashtable_test", size = "small", srcs = ["python/utils/sharded_mutable_dense_hashtable_test.py"], python_version = "PY3", srcs_version = "PY3", deps = [ ":sharded_mutable_dense_hashtable_py", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_keras_installed", ], ) ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_optimizer/__init__.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Ops for training linear models. ## This package provides optimizers to train linear models. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.util.all_util import remove_undocumented remove_undocumented(__name__) ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_optimizer/doc/sdca.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "DzJ8FQ_HsP7Q" }, "source": [ "# Distributed SDCA\n", "\n", "$\\def\\a{\\alpha} \\def\\d{\\Delta\\a} \\def\\l{\\ell} \\def\\P{\\mathcal{P}}$\n", "We want to minimize on $K$ machines the following objective\n", "\n", "$$ P(w) = \\frac{1}{n}\\sum_{i=1}^n \\l_i(x_i^T w)+\\lambda g(w) $$\n", "\n", "By Fenchel duality, this is equivalent to maximizing its dual\n", "\n", "$$ D(\\a) = \\frac{1}{n} \\left(\\sum_{i=1}^n -\\l_i^\\star(-\\a_i)\\right) -\\lambda g^\\star\\left(\\tfrac{1}{\\lambda n} X\\a\\right) $$\n", "\n", "which can be done very efficiently on a single machine with SDCA [3].\n", "\n", "Here $f^\\star$ denotes the convex dual of a convex function $f$, $\\l_i$ is the loss for the example $i$, $n$ is the total number of examples and $\\lambda n$ is the L2 parameter.\n", "\n", "Following [1,2], we use a data partition $\\P_1,\\dots,\\P_K$ of $\\{1,2,\\dots,n\\}$ such that $\\P_k$ contains the examples on machine $k$.\n", "For an $n$-dimensional vector $h$, we denote by $h_{[k]}$ the $n$-dimensional vector restricted to the machine $k$: $(h_{[k]})_i = h_i$ if $i\\in\\P_k$ and $0$ otherwise.\n", "\n", "## CoCoA+ Local Solver\n", "\n", "The local subproblem on machine $k$ is [1, 2]\n", "\n", "$$ \\max_{\\d_{[k]}} \\mathcal{G}^{\\sigma}_k (\\d_{[k]}) $$\n", "\n", "with\n", "\n", "$$\n", "\\mathcal{G}^{\\sigma}_k (\\d_{[k]}) =\n", "-\\frac{1}{n} \\sum_{i\\in\\P_k}\\l_i^\\star(-\\a_i-(\\d_{[k]})_i) -\\frac{1}{n} w^T X\n", "\\d_{[k]}- \\frac{\\lambda}{2}\\sigma \\left\\| \\frac{1}{\\lambda n} X \\d_{[k]}\n", "\\right\\|^2 $$\n", "\n", "$\\sigma$ is a parameter the measures the difficulty of the data partition. CoCoA+ makes the choice $ \\sigma = K $\n", "\n", "This decision is motivated in [2] and shown to be more efficient than the previous CoCoA choice ($\\sigma = 1$).\n", "\n", "For one example, the problem is simply\n", "\n", "$$ \\max_{\\d} \\left\\{ D_i(\\d) = -\\l_i^\\star(-(\\a_i+\\d)) - \\bar{y}_i \\d - \\frac{A}{2} \\d^2 \\right\\} $$\n", "\n", "where we have defined $A=\\sigma X_i^2/(\\lambda n)$ and $ \\bar{y}_i = w^T X_i$\n", "\n", "To take into account example weights, it suffices to replace $1/n$ by $s_i/S$ where $s_i$ is the weight of the i-th example and $S=\\sum s_i$. For our problem, this will only change $A$ to $\\sigma X_i^2s_i/(\\lambda S)$.\n", "\n", "### Hinge Loss\n", "\n", "Hinge loss is given by $ \\l_i(u) = \\max(0,1-y u) $. Its convex dual is $\\l_i^\\star(-a) = -a y$ with the constraint $ a y\\in [0,1] $.\n", "\n", "The solution for the update is given explicitly in [3]. To derive the CoCoA+ formulation, we replace $\\lambda$ by $\\frac{\\lambda}{\\sigma}$. This gives\n", "\n", "$$ \\d = \\frac{y - \\bar{y}}{A} $$\n", "\n", "with the restriction that $y(\\a+\\d)\\in(0,1)$.\n", "\n", "### Smooth Hinge Loss\n", "\n", "Smooth hinge loss is given by\n", "\n", "$$ \\l_i(u) =\n", "\\begin{cases}\n", "0 \\:\\:\\: \u0026 y_i u \\geq 1\\\\\n", "1-y_i u -\\gamma/2 \\:\\:\\:\u0026 y_i u \\leq1-\\gamma \\\\\n", "\\frac{(1-y_i u)^2}{2\\gamma} \u0026 \\text{otherwise}\n", "\\end{cases} $$\n", "\n", "The optimal $\\d$ is computed to be\n", "\n", "$$\\d = \\frac{y-\\bar{y}-\\gamma\\a}{A+\\gamma} $$\n", "\n", "with the restriction that $y(\\a+\\d)\\in(0,1)$. We see that we recover standard hinge update for $\\gamma = 0$. The details of the computation can be found in Appendix.\n", "\n", "### Squared Loss\n", "\n", "Squared loss is $ \\l_i(u) = \\frac{1}{2}(u-y)^2 $ with dual $ \\l_i^\\star(v) =\\frac{1}{2}v^2+y v$.\n", "\n", "The closed form solution for squared loss is given in [4]. By replacing again $\\lambda$ by $\\frac{\\lambda}{\\sigma}$ we obtain\n", "\n", "$$ \\d = -\\frac{\\a + w^T X_i - y}{1 + \\frac{\\sigma X_i^2}{2 \\lambda n}} $$\n", "\n", "### Logistic loss\n", "\n", "Logistic loss is $ \\l_i(u) = \\log (1+e^{-uy_i}) $ and its dual is\n", "\n", "$$ \\l_i^\\star(v) = -vy_i\\log(-vy_i) + (1+vy_i)\n", "\\log(1+vy_i) $$\n", "\n", "The label $y_i$ is $\\pm 1$ and the dual loss is only defined for $ -y_i v\\in (0,1) $. We then have the constraint\n", "\n", "$$ y_i (\\a+\\d) \\in (0,1) $$\n", "\n", "The problem of finding the maximum of $ D(\\d) $ can be reformulated as the problem of finding the unique zero of its derivative. Newton method works well for finding the zero of $ D'(\\d) $ but can be a bit unstable due to the constraint requiring $y_i(\\a+\\d)$ be in the range $(0,1)$ (more on this below).\n", "\n", "To avoid this problem, we make the following change of variable\n", "\n", "$$ y(\\a+\\d) = \\frac{1}{2}(1+\\tanh x) $$\n", "\n", "This enforces the constraint and is well suited because the objective derivative\n", "has the following simple form:\n", "\n", "$$ D' = H(x) = -2y x - \\bar{y} + A\\a -\\frac{A}{2y}(1+\\tanh x) $$\n", "\n", "with derivative\n", "\n", "$$ H'(x) = -2y - \\frac{A}{2y}(1-\\tanh^2 x) $$\n", "\n", "This function is always positive or always negative so that $H$ is strictly monotonic.\n", "\n", "We can start Newton algorithm at $x_0=0$ which corresponds to $ y(\\a+\\d) = 0.5 $. A Newton step is given by\n", "\n", "$$x_{k+1} = x_k - \\frac{H(x_k)}{H'(x_k)} $$\n", "\n", "The convergence is very fast with the modified function and 5 Newton steps should be largely enough.\n", "\n", "#### Proof of convergence\n", "\n", "The second derivative of $H$\n", "\n", "$$ H''(x) = \\frac{A}{y} \\tanh x (1-\\tanh^2 x) $$\n", "\n", "is bounded and quadratic convergence should be guaranteed if we are close enough to the solution (see proof [here](https://en.wikipedia.org/wiki/Newton%27s_method#Proof_of_quadratic_convergence_for_Newton.27s_iterative_method)).\n", "\n", "However we can't really know if we are close to the zero. To prove the convergence in any cases, we can use Kantovitch Theorem (reviewed in [5]). The sufficient condition to have convergence is that we start at a point $ x_0 $ such that\n", "\n", "$$\n", "\\left|\\frac{4A H(x_0)}{H'(x_0)^2} \\right|\\leq 1\n", "$$\n", "\n", "If $ A$ is not small, the starting point $x_0 = 0$ doesn't satisfy this condition and we may solve the above inequality to find a starting point which does.\n", "\n", "However, in practice, convergence with $x_0 = 0$ always happens (tested for a sample of generic values for the parameters).\n", "\n", "### Poisson log loss\n", "\n", "Poisson log loss is defined as $ \\l(u) = e^u - uy $ for label $y \\geq 0.$ Its dual is\n", "\n", "$$ \\l^\\star(v) = (y+v) (\\log(y+v) - 1) $$\n", "\n", "and is only defined for $ y+v \u003e 0 $. We then have the constraint\n", "\n", "$$ y \u003e \\a+\\d. $$\n", "\n", "The dual is\n", "\n", "$$ D(\\d) = -(y-\\a-\\d) (\\log(y-\\a-\\d) - 1) - \\bar{y} \\d - \\frac{A}{2} \\d^2 $$\n", "\n", "and its derivative is,\n", "\n", "$$ D'(\\d) = \\log(y-\\a-\\d) - \\bar{y} - A\\d $$\n", "\n", "Similar to the logistic loss, we perform a change of variable to handle the constraint on $ \\d $\n", "\n", "$$ y - (\\a+\\d) = e^x $$\n", "\n", "After this change of variable, the goal is to find the zero of this function\n", "\n", "$$ H(x) = x - \\bar{y} -A(y-\\a-e^x) $$\n", "\n", "whose first derivative is\n", "\n", "$$ H'(x) = 1+Ae^x $$\n", "\n", "Since this function is always positive, $H$ is increasing and has a unique zero.\n", "\n", "We can start Newton algorithm at $\\d=0$ which corresponds to $ x =\\log(y-\\a)$. As before the Newton step is given by\n", "\n", "$$x_{k+1} = x_k - \\frac{H(x_k)}{H'(x_k)}. $$\n", "\n", "### References\n", "\n", "[1] C. Ma et al., [Adding vs. Averaging in Distributed Primal-Dual Optimization](https://arxiv.org/pdf/1502.03508.pdf), 2015.\n", "\n", "[2] C. Ma et al., [Distributed Optimization with Arbitrary Local Solvers](https://arxiv.org/pdf/1512.04039.pdf), 2015.\n", "\n", "[3] S. Shalev-Shwartz, T. Zhang, [Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization](http://www.jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf), 2013.\n", "\n", "[4] S. Shalev-Shwartz, T. Zhang, [Accelerated Proximal Stochastic Dual Coordinate Ascent for Regularized Loss Minimization](https://arxiv.org/pdf/1309.2375.pdf), 2013.\n", "\n", "[5] A. Galantai, [The theory of Newton’s method](https://www.sciencedirect.com/science/article/pii/S0377042700004350), 2000.\n", "\n", "## Appendix\n", "\n", "#### Dual computation for smooth hinge loss\n", "\n", "We want to compute $\\l^\\star(v) = \\max_u [ uv-\\l(u) ] $ where $\\l$ is smooth hinge loss. We thus have to solve $v=\\l'(u)$. The derivative of smooth hinge loss is given by\n", "\n", "$$ \\l'(u) =\n", "\\begin{cases}\n", "0 \\:\\:\\: \u0026 y_i u \\geq 1\\\\\n", "-y \\:\\:\\:\u0026 y_i u \\leq1-\\gamma \\\\\n", "\\frac{u-y}{\\gamma} \u0026 \\text{otherwise}\n", "\\end{cases} $$\n", "\n", "By solving for $v$, we find the dual of smooth hinge loss as\n", "\n", "$$ \\l^\\star(v) = yv + \\frac{\\gamma}{2}v^2 $$\n", "\n", "with the restriction $ yv \\in (0,1) $.\n", "\n", "Now, we can now minimize the dual objective with respect to $\\d$\n", "\n", "$$ D(\\a+\\d) = -\\l^\\star(-\\a-\\d)-\\bar{y}\\d-\\frac{A}{2} \\d^2 $$\n", "\n", "which gives the expected result\n", "\n", "$$\\d = \\frac{y-\\bar{y}-\\gamma\\a}{A+\\gamma} $$\n", "\n", "with the constraint $ y(\\a+\\d) \\in (0,1)$." ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "SDCA.ipynb", "provenance": [ { "file_id": "1dYYnnjfGC6CIpfwy__EXL81Xnl26wiKM", "timestamp": 1539909050558 } ], "version": "0.3.2" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_optimizer/python/sdca_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for canned linear estimators with the SDCA optimizer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow_estimator.python.estimator.canned import linear class SDCAClassifierTest(tf.test.TestCase): def testRealValuedFeatures(self): """Tests LinearClassifier with LinearSDCA and real valued features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2']), 'maintenance_cost': tf.constant([[500.0], [200.0]]), 'sq_footage': tf.constant([[800.0], [600.0]]), 'weights': tf.constant([[1.0], [1.0]]) }, tf.constant([[0], [1]]) maintenance_cost = tf.feature_column.numeric_column('maintenance_cost') sq_footage = tf.feature_column.numeric_column('sq_footage') optimizer = linear.LinearSDCA(example_id_column='example_id') classifier = linear.LinearClassifierV2( feature_columns=[maintenance_cost, sq_footage], weight_column='weights', optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) def testRealValuedFeatureWithHigherDimension(self): """Tests LinearSDCA with real valued features of higher dimension.""" # input_fn is identical to the one in testRealValuedFeatures # where 2 1-dimensional dense features have been replaced by 1 2-dimensional # feature. def input_fn(): return { 'example_id': tf.constant(['1', '2']), 'dense_feature': tf.constant([[500.0, 800.0], [200.0, 600.0]]) }, tf.constant([[0], [1]]) dense_feature = tf.feature_column.numeric_column('dense_feature', shape=2) optimizer = linear.LinearSDCA(example_id_column='example_id') classifier = linear.LinearClassifierV2( feature_columns=[dense_feature], optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) def testBucketizedFeatures(self): """Tests LinearClassifier with LinearSDCA and bucketized features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.constant([[600.0], [1000.0], [400.0]]), 'sq_footage': tf.constant([[1000.0], [600.0], [700.0]]), 'weights': tf.constant([[1.0], [1.0], [1.0]]) }, tf.constant([[1], [0], [1]]) price_bucket = tf.feature_column.bucketized_column( tf.feature_column.numeric_column('price'), boundaries=[500.0, 700.0]) sq_footage_bucket = tf.feature_column.bucketized_column( tf.feature_column.numeric_column('sq_footage'), boundaries=[650.0]) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[price_bucket, sq_footage_bucket], weight_column='weights', optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) def testSparseFeatures(self): """Tests LinearClassifier with LinearSDCA and sparse features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'country': tf.sparse.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]), 'weights': tf.constant([[1.0], [1.0], [1.0]]) }, tf.constant([[1], [0], [1]]) country = tf.feature_column.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[country], weight_column='weights', optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) def testWeightedSparseFeatures(self): """LinearClassifier with LinearSDCA and weighted sparse features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.sparse.SparseTensor( values=[2., 3., 1.], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]), 'country': tf.sparse.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]) }, tf.constant([[1], [0], [1]]) country = tf.feature_column.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) country_weighted_by_price = ( tf.feature_column.weighted_categorical_column(country, 'price')) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[country_weighted_by_price], optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) def testWeightedSparseFeaturesOOVWithNoOOVBuckets(self): """LinearClassifier with LinearSDCA with OOV features (-1 IDs).""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.sparse.SparseTensor( values=[2., 3., 1.], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]), 'country': tf.sparse.SparseTensor( # 'GB' is out of the vocabulary. values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 5]) }, tf.constant([[1], [0], [1]]) country = tf.feature_column.categorical_column_with_vocabulary_list( 'country', vocabulary_list=['US', 'CA', 'MK', 'IT', 'CN']) country_weighted_by_price = ( tf.feature_column.weighted_categorical_column(country, 'price')) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[country_weighted_by_price], optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) def testCrossedFeatures(self): """Tests LinearClassifier with LinearSDCA and crossed features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'language': tf.sparse.SparseTensor( values=['english', 'italian', 'spanish'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 1]), 'country': tf.sparse.SparseTensor( values=['US', 'IT', 'MX'], indices=[[0, 0], [1, 0], [2, 0]], dense_shape=[3, 1]) }, tf.constant([[0], [0], [1]]) country_language = tf.feature_column.crossed_column(['language', 'country'], hash_bucket_size=100) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[country_language], optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) def testMixedFeatures(self): """Tests LinearClassifier with LinearSDCA and a mix of features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.constant([[0.6], [0.8], [0.3]]), 'sq_footage': tf.constant([[900.0], [700.0], [600.0]]), 'country': tf.sparse.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': tf.constant([[3.0], [1.0], [1.0]]) }, tf.constant([[1], [0], [1]]) price = tf.feature_column.numeric_column('price') sq_footage_bucket = tf.feature_column.bucketized_column( tf.feature_column.numeric_column('sq_footage'), boundaries=[650.0, 800.0]) country = tf.feature_column.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) sq_footage_country = tf.feature_column.crossed_column( [sq_footage_bucket, 'country'], hash_bucket_size=10) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifierV2( feature_columns=[price, sq_footage_bucket, country, sq_footage_country], weight_column='weights', optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) def testPartitionedVariables(self): """Tests LinearClassifier with LinearSDCA with partitioned variables.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.constant([[0.6], [0.8], [0.3]]), 'sq_footage': tf.constant([[900.0], [700.0], [600.0]]), 'country': tf.sparse.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': tf.constant([[3.0], [1.0], [1.0]]) }, tf.constant([[1], [0], [1]]) price = tf.feature_column.numeric_column('price') sq_footage_bucket = tf.feature_column.bucketized_column( tf.feature_column.numeric_column('sq_footage'), boundaries=[650.0, 800.0]) country = tf.feature_column.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) sq_footage_country = tf.feature_column.crossed_column( [sq_footage_bucket, 'country'], hash_bucket_size=10) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.01) classifier = linear.LinearClassifier( feature_columns=[price, sq_footage_bucket, country, sq_footage_country], weight_column='weights', partitioner=tf.compat.v1.fixed_size_partitioner(num_shards=2, axis=0), optimizer=optimizer) classifier.train(input_fn=input_fn, steps=100) loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.2) class SDCARegressorTest(tf.test.TestCase): def testRealValuedLinearFeatures(self): """Tests LinearRegressor with LinearSDCA and real valued features.""" x = [[1.2, 2.0, -1.5], [-2.0, 3.0, -0.5], [1.0, -0.5, 4.0]] weights = [[3.0], [-1.2], [0.5]] y = np.dot(x, weights) def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'x': tf.constant(x), 'weights': tf.constant([[10.0], [10.0], [10.0]]) }, tf.constant(y) x_column = tf.feature_column.numeric_column('x', shape=3) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2( feature_columns=[x_column], weight_column='weights', optimizer=optimizer) regressor.train(input_fn=input_fn, steps=20) loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.01) self.assertIn('linear/linear_model/x/weights', regressor.get_variable_names()) regressor_weights = regressor.get_variable_value( 'linear/linear_model/x/weights') self.assertAllClose([w[0] for w in weights], regressor_weights.flatten(), rtol=0.1) def testMixedFeaturesArbitraryWeights(self): """Tests LinearRegressor with LinearSDCA and a mix of features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.constant([0.6, 0.8, 0.3]), 'sq_footage': tf.constant([[900.0], [700.0], [600.0]]), 'country': tf.sparse.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': tf.constant([[3.0], [5.0], [7.0]]) }, tf.constant([[1.55], [-1.25], [-3.0]]) price = tf.feature_column.numeric_column('price') sq_footage_bucket = tf.feature_column.bucketized_column( tf.feature_column.numeric_column('sq_footage'), boundaries=[650.0, 800.0]) country = tf.feature_column.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) sq_footage_country = tf.feature_column.crossed_column( [sq_footage_bucket, 'country'], hash_bucket_size=10) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2( feature_columns=[price, sq_footage_bucket, country, sq_footage_country], weight_column='weights', optimizer=optimizer) regressor.train(input_fn=input_fn, steps=20) loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.05) def testPartitionedVariables(self): """Tests LinearRegressor with LinearSDCA with partitioned variables.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.constant([0.6, 0.8, 0.3]), 'sq_footage': tf.constant([[900.0], [700.0], [600.0]]), 'country': tf.sparse.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': tf.constant([[3.0], [5.0], [7.0]]) }, tf.constant([[1.55], [-1.25], [-3.0]]) price = tf.feature_column.numeric_column('price') sq_footage_bucket = tf.feature_column.bucketized_column( tf.feature_column.numeric_column('sq_footage'), boundaries=[650.0, 800.0]) country = tf.feature_column.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) sq_footage_country = tf.feature_column.crossed_column( [sq_footage_bucket, 'country'], hash_bucket_size=10) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressor( feature_columns=[price, sq_footage_bucket, country, sq_footage_country], weight_column='weights', partitioner=tf.compat.v1.fixed_size_partitioner(num_shards=2, axis=0), optimizer=optimizer) regressor.train(input_fn=input_fn, steps=20) loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.05) def testSparseFeaturesWithL1Reg(self): """Tests LinearRegressor with LinearSDCA and sparse features.""" def input_fn(): return { 'example_id': tf.constant(['1', '2', '3']), 'price': tf.constant([[0.4], [0.6], [0.3]]), 'country': tf.sparse.SparseTensor( values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': tf.constant([[10.0], [10.0], [10.0]]) }, tf.constant([[1.4], [-0.8], [2.6]]) price = tf.feature_column.numeric_column('price') country = tf.feature_column.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) # Regressor with no L1 regularization. optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2( feature_columns=[price, country], weight_column='weights', optimizer=optimizer) regressor.train(input_fn=input_fn, steps=20) no_l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] variable_names = regressor.get_variable_names() self.assertIn('linear/linear_model/price/weights', variable_names) self.assertIn('linear/linear_model/country/weights', variable_names) no_l1_reg_weights = { 'linear/linear_model/price/weights': regressor.get_variable_value('linear/linear_model/price/weights'), 'linear/linear_model/country/weights': regressor.get_variable_value('linear/linear_model/country/weights'), } # Regressor with L1 regularization. optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l1_regularization=1.0, symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2( feature_columns=[price, country], weight_column='weights', optimizer=optimizer) regressor.train(input_fn=input_fn, steps=20) l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] l1_reg_weights = { 'linear/linear_model/price/weights': regressor.get_variable_value('linear/linear_model/price/weights'), 'linear/linear_model/country/weights': regressor.get_variable_value('linear/linear_model/country/weights'), } # Unregularized loss is lower when there is no L1 regularization. self.assertLess(no_l1_reg_loss, l1_reg_loss) self.assertLess(no_l1_reg_loss, 0.05) # But weights returned by the regressor with L1 regularization have smaller # L1 norm. l1_reg_weights_norm, no_l1_reg_weights_norm = 0.0, 0.0 for var_name in sorted(l1_reg_weights): l1_reg_weights_norm += sum( np.absolute(l1_reg_weights[var_name].flatten())) no_l1_reg_weights_norm += sum( np.absolute(no_l1_reg_weights[var_name].flatten())) print('Var name: %s, value: %s' % (var_name, no_l1_reg_weights[var_name].flatten())) self.assertLess(l1_reg_weights_norm, no_l1_reg_weights_norm) def testBiasOnly(self): """Tests LinearRegressor with LinearSDCA and validates bias weight.""" def input_fn(): """Testing the bias weight when it's the only feature present. All of the instances in this input only have the bias feature, and a 1/4 of the labels are positive. This means that the expected weight for the bias should be close to the average prediction, i.e 0.25. Returns: Training data for the test. """ num_examples = 40 return { 'example_id': tf.constant([str(x + 1) for x in range(num_examples)]), # place_holder is an empty column which is always 0 (absent), because # LinearClassifier requires at least one column. 'place_holder': tf.constant([[0.0]] * num_examples), }, tf.constant([1 if i % 4 == 0 else 0 for i in range(num_examples)]) place_holder = tf.feature_column.numeric_column('place_holder') optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2( feature_columns=[place_holder], optimizer=optimizer) regressor.train(input_fn=input_fn, steps=100) self.assertNear( regressor.get_variable_value('linear/linear_model/bias_weights')[0], 0.25, err=0.1) def testBiasAndOtherColumns(self): """Tests LinearRegressor with LinearSDCA and validates bias weight.""" def input_fn(): """Testing the bias weight when there are other features present. 1/2 of the instances in this input have feature 'a', the rest have feature 'b', and we expect the bias to be added to each instance as well. 0.4 of all instances that have feature 'a' are positive, and 0.2 of all instances that have feature 'b' are positive. The labels in the dataset are ordered to appear shuffled since SDCA expects shuffled data, and converges faster with this pseudo-random ordering. If the bias was not regularized we would expect the weights to be: bias: 0.3 a: 0.1 b: -0.1 Bu with bias regularization the optimal values are: bias: 0.2 a: 0.2 b: 0.0 Returns: The test dataset. """ num_examples = 200 half = int(num_examples / 2) return { 'example_id': tf.constant([str(x + 1) for x in range(num_examples)]), 'a': tf.constant([[1]] * int(half) + [[0]] * int(half)), 'b': tf.constant([[0]] * int(half) + [[1]] * int(half)), }, tf.constant([[x] for x in [1, 0, 0, 1, 1, 0, 0, 0, 1, 0] * int(half / 10) + [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] * int(half / 10)]) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2( feature_columns=[ tf.feature_column.numeric_column('a'), tf.feature_column.numeric_column('b') ], optimizer=optimizer) regressor.train(input_fn=input_fn, steps=200) variable_names = regressor.get_variable_names() self.assertIn('linear/linear_model/bias_weights', variable_names) self.assertIn('linear/linear_model/a/weights', variable_names) self.assertIn('linear/linear_model/b/weights', variable_names) # TODO(b/29339026): Change the expected results to expect a centered bias. self.assertNear( regressor.get_variable_value('linear/linear_model/bias_weights')[0], 0.2, err=0.05) self.assertNear( regressor.get_variable_value('linear/linear_model/a/weights')[0], 0.2, err=0.05) self.assertNear( regressor.get_variable_value('linear/linear_model/b/weights')[0], 0.0, err=0.05) def testBiasAndOtherColumnsFabricatedCentered(self): """Tests LinearRegressor with LinearSDCA and validates bias weight.""" def input_fn(): """Testing the bias weight when there are other features present. 1/2 of the instances in this input have feature 'a', the rest have feature 'b', and we expect the bias to be added to each instance as well. 0.1 of all instances that have feature 'a' have a label of 1, and 0.1 of all instances that have feature 'b' have a label of -1. We can expect the weights to be: bias: 0.0 a: 0.1 b: -0.1 Returns: The test dataset. """ num_examples = 200 half = int(num_examples / 2) return { 'example_id': tf.constant([str(x + 1) for x in range(num_examples)]), 'a': tf.constant([[1]] * int(half) + [[0]] * int(half)), 'b': tf.constant([[0]] * int(half) + [[1]] * int(half)), }, tf.constant([[1 if x % 10 == 0 else 0] for x in range(half)] + [[-1 if x % 10 == 0 else 0] for x in range(half)]) optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2( feature_columns=[ tf.feature_column.numeric_column('a'), tf.feature_column.numeric_column('b') ], optimizer=optimizer) regressor.train(input_fn=input_fn, steps=100) variable_names = regressor.get_variable_names() self.assertIn('linear/linear_model/bias_weights', variable_names) self.assertIn('linear/linear_model/a/weights', variable_names) self.assertIn('linear/linear_model/b/weights', variable_names) self.assertNear( regressor.get_variable_value('linear/linear_model/bias_weights')[0], 0.0, err=0.05) self.assertNear( regressor.get_variable_value('linear/linear_model/a/weights')[0], 0.1, err=0.05) self.assertNear( regressor.get_variable_value('linear/linear_model/b/weights')[0], -0.1, err=0.05) def testUnknownBatchSize(self): """Tests LinearRegressor with LinearSDCA and unknown batch size.""" def input_fn(): # Similar to testBiasOnly but use placeholder_with_default in order to # let the static batch size unspecified. return { 'example_id': tf.compat.v1.placeholder_with_default( tf.constant(['0', '1']), shape=[None]), # always_zero is an empty column which is always 0 (absent), because # LinearClassifier requires at least one column. 'always_zero': tf.compat.v1.placeholder_with_default( tf.constant([[0.0]] * 2), shape=[None, 1]), }, tf.compat.v1.placeholder_with_default( tf.constant([0.0, 1.0]), shape=[None]) always_zero = tf.feature_column.numeric_column('always_zero') optimizer = linear.LinearSDCA( example_id_column='example_id', symmetric_l2_regularization=0.1, num_table_shards=3) regressor = linear.LinearRegressorV2( feature_columns=[always_zero], optimizer=optimizer) regressor.train(input_fn=input_fn, steps=100) self.assertNear( regressor.get_variable_value('linear/linear_model/bias_weights')[0], 0.5, err=0.1) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_optimizer/python/utils/sdca_ops.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Proximal stochastic dual coordinate ascent optimizer for linear models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections from six.moves import range import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.framework.ops import internal_convert_to_tensor from tensorflow.python.framework.ops import name_scope from tensorflow.python.ops import gen_sdca_ops from tensorflow.python.ops import variables as var_ops from tensorflow.python.ops.nn import log_poisson_loss from tensorflow.python.ops.nn import sigmoid_cross_entropy_with_logits from tensorflow_estimator.python.estimator.canned.linear_optimizer.python.utils.sharded_mutable_dense_hashtable import _ShardedMutableDenseHashTable class _SparseFeatureColumn(object): """Represents a sparse feature column. This is meant to be a more efficient representation than tf.SparseFeature for the purpose of SDCA optimization. Contains three tensors representing a sparse feature column, they are example indices (`int64`), feature indices (`int64`), and feature values (`float`). Feature weights are optional, and are treated as `1.0f` if missing. For example, consider a batch of 4 examples, which contains the following features in a particular `_SparseFeatureColumn`: * Example 0: feature 5, value 1 * Example 1: feature 6, value 1 and feature 10, value 0.5 * Example 2: no features * Example 3: two copies of feature 2, value 1 This _SparseFeatureColumn will be represented as follows: ``` <0, 5, 1> <1, 6, 1> <1, 10, 0.5> <3, 2, 1> <3, 2, 1> ``` For a batch of 2 examples below: * Example 0: feature 5 * Example 1: feature 6 is represented by `_SparseFeatureColumn` as: ``` <0, 5, 1> <1, 6, 1> ``` @@__init__ @@example_indices @@feature_indices @@feature_values """ def __init__(self, example_indices, feature_indices, feature_values): """Creates a `_SparseFeatureColumn` representation. Args: example_indices: A 1-D int64 tensor of shape `[N]`. Also, accepts python lists, or numpy arrays. feature_indices: A 1-D int64 tensor of shape `[N]`. Also, accepts python lists, or numpy arrays. feature_values: An optional 1-D tensor float tensor of shape `[N]`. Also, accepts python lists, or numpy arrays. Returns: A `_SparseFeatureColumn` """ with name_scope(None, 'SparseFeatureColumn', [example_indices, feature_indices]): self._example_indices = internal_convert_to_tensor( example_indices, name='example_indices', dtype=tf.dtypes.int64) self._feature_indices = internal_convert_to_tensor( feature_indices, name='feature_indices', dtype=tf.dtypes.int64) self._feature_values = None if feature_values is not None: with name_scope(None, 'SparseFeatureColumn', [feature_values]): self._feature_values = internal_convert_to_tensor( feature_values, name='feature_values', dtype=tf.dtypes.float32) @property def example_indices(self): """The example indices represented as a dense tensor. Returns: A 1-D Tensor of int64 with shape `[N]`. """ return self._example_indices @property def feature_indices(self): """The feature indices represented as a dense tensor. Returns: A 1-D Tensor of int64 with shape `[N]`. """ return self._feature_indices @property def feature_values(self): """The feature values represented as a dense tensor. Returns: May return None, or a 1-D Tensor of float32 with shape `[N]`. """ return self._feature_values class _SDCAModel(object): """Stochastic dual coordinate ascent solver for linear models. Loss functions supported: * Binary logistic loss * Squared loss * Hinge loss * Smooth hinge loss * Poisson log loss ### Usage ```python # Create a solver with the desired parameters. lr = _SDCAModel(examples, variables, options) min_op = lr.minimize() opt_op = lr.update_weights(min_op) predictions = lr.predictions(examples) # Primal loss + L1 loss + L2 loss. regularized_loss = lr.regularized_loss(examples) # Primal loss only unregularized_loss = lr.unregularized_loss(examples) examples: { sparse_features: list of SparseFeatureColumn. dense_features: list of dense tensors of type float32. example_labels: a tensor of type float32 and shape [Num examples] example_weights: a tensor of type float32 and shape [Num examples] example_ids: a tensor of type string and shape [Num examples] } variables: { sparse_features_weights: list of tensors of shape [vocab size] dense_features_weights: list of tensors of shape [dense_feature_dimension] } options: { symmetric_l1_regularization: 0.0 symmetric_l2_regularization: 1.0 loss_type: "logistic_loss" num_loss_partitions: 1 (Optional, with default value of 1. Number of partitions of the global loss function, 1 means single machine solver, and >1 when we have more than one optimizer working concurrently.) num_table_shards: 1 (Optional, with default value of 1. Number of shards of the internal state table, typically set to match the number of parameter servers for large data sets. } ``` In the training program you will just have to run the returned Op from minimize(). ```python # Execute opt_op and train for num_steps. for _ in range(num_steps): opt_op.run() # You can also check for convergence by calling lr.approximate_duality_gap() ``` """ def __init__(self, examples, variables, options): """Create a new sdca optimizer.""" if not examples or not variables or not options: raise ValueError('examples, variables and options must all be specified.') supported_losses = ('logistic_loss', 'squared_loss', 'hinge_loss', 'smooth_hinge_loss', 'poisson_loss') if options['loss_type'] not in supported_losses: raise ValueError('Unsupported loss_type: ', options['loss_type']) self._assert_specified([ 'example_labels', 'example_weights', 'example_ids', 'sparse_features', 'dense_features' ], examples) self._assert_list(['sparse_features', 'dense_features'], examples) self._assert_specified( ['sparse_features_weights', 'dense_features_weights'], variables) self._assert_list(['sparse_features_weights', 'dense_features_weights'], variables) self._assert_specified([ 'loss_type', 'symmetric_l2_regularization', 'symmetric_l1_regularization' ], options) if options['symmetric_l2_regularization'] <= 0.0: raise ValueError('symmetric_l2_regularization should be positive.') if options['symmetric_l2_regularization'] <= 1.0: tf.compat.v1.logging.warn( 'symmetric_l2_regularization for SDCA should typically be ' 'larger than for online optimization methods. Recommended ' 'value is of the order of the average L2 norm of the ' 'training examples.') if options['symmetric_l1_regularization'] < 0.0: raise ValueError('symmetric_l1_regularization should be non-negative.') self._examples = examples self._variables = variables self._options = options self._create_slots() self._hashtable = _ShardedMutableDenseHashTable( key_dtype=tf.dtypes.int64, value_dtype=tf.dtypes.float32, num_shards=self._num_table_shards(), default_value=[0.0, 0.0, 0.0, 0.0], # SdcaFprint never returns 0 or 1 for the low64 bits, so this a safe # empty_key (that will never collide with actual payloads). empty_key=[0, 0], deleted_key=[1, 1]) tf.compat.v1.summary.scalar('approximate_duality_gap', self.approximate_duality_gap()) tf.compat.v1.summary.scalar('examples_seen', self._hashtable.size()) def _symmetric_l1_regularization(self): return self._options['symmetric_l1_regularization'] def _symmetric_l2_regularization(self): return self._options['symmetric_l2_regularization'] def _num_loss_partitions(self): # Number of partitions of the global objective. return self._options.get('num_loss_partitions', 1) def _adaptive(self): # Perform adaptive sampling. return self._options.get('adaptive', True) def _num_table_shards(self): # Number of hash table shards. # Return 1 if not specified or if the value is 'None' num_shards = self._options.get('num_table_shards') return 1 if num_shards is None else num_shards def _create_slots(self): """Make unshrunk internal variables (slots).""" # Unshrunk variables have the updates before applying L1 regularization. # Each unshrunk slot variable is either a `Variable` or list of # `Variable`, depending on the value of its corresponding primary variable. # We avoid using `PartitionedVariable` for the unshrunk slots since we do # not need any of the extra information. self._slots = collections.defaultdict(list) for name in ['sparse_features_weights', 'dense_features_weights']: for var in self._variables[name]: # Our primary variable may be either a PartitionedVariable, or a list # of Variables (each representing a partition). if (isinstance(var, var_ops.PartitionedVariable) or isinstance(var, list)): var_list = [] for v in var: with ops.colocate_with(v): slot_var = tf.Variable( initial_value=tf.compat.v1.zeros_like( tf.cond( tf.compat.v1.is_variable_initialized(v), v.read_value, lambda: v.initial_value), tf.dtypes.float32), name=v.op.name + '_unshrunk') var_list.append(slot_var) self._slots['unshrunk_' + name].append(var_list) else: with tf.compat.v1.device(var.device): self._slots['unshrunk_' + name].append( tf.Variable( tf.compat.v1.zeros_like( tf.cond( tf.compat.v1.is_variable_initialized(var), var.read_value, lambda: var.initial_value), tf.dtypes.float32), name=var.op.name + '_unshrunk')) def _assert_specified(self, items, check_in): for x in items: if check_in[x] is None: raise ValueError(check_in[x] + ' must be specified.') def _assert_list(self, items, check_in): for x in items: if not isinstance(check_in[x], list): raise ValueError(x + ' must be a list.') def _var_to_list(self, var): """Wraps var in a list if it is not a list or PartitionedVariable.""" if not isinstance(var, (list, var_ops.PartitionedVariable)): var = [var] return var def _l1_loss(self): """Computes the (un-normalized) l1 loss of the model.""" with name_scope('sdca/l1_loss'): sums = [] for name in ['sparse_features_weights', 'dense_features_weights']: for var in self._variables[name]: for v in self._var_to_list(var): weights = internal_convert_to_tensor(v) with tf.compat.v1.device(weights.device): sums.append( tf.math.reduce_sum( tf.math.abs(tf.cast(weights, tf.dtypes.float64)))) # SDCA L1 regularization cost is: l1 * sum(|weights|) return self._symmetric_l1_regularization() * tf.math.add_n(sums) def _l2_loss(self): """Computes the (un-normalized) l2 loss of the model.""" with name_scope('sdca/l2_loss'): sums = [] for name in ['sparse_features_weights', 'dense_features_weights']: for var in self._variables[name]: for v in self._var_to_list(var): weights = internal_convert_to_tensor(v) with tf.compat.v1.device(weights.device): sums.append( tf.math.reduce_sum( tf.math.square(tf.cast(weights, tf.dtypes.float64)))) # SDCA L2 regularization cost is: l2 * sum(weights^2) / 2 return self._symmetric_l2_regularization() * tf.math.add_n(sums) / 2.0 def _convert_n_to_tensor(self, input_list, as_ref=False): """Converts input list to a set of tensors.""" # input_list can be a list of Variables (that are implicitly partitioned), # in which case the underlying logic in internal_convert_to_tensor will not # concatenate the partitions together. This method takes care of the # concatenating (we only allow partitioning on the first axis). output_list = [] for x in input_list: tensor_to_convert = x if isinstance(x, list) or isinstance(x, var_ops.PartitionedVariable): # We only allow for partitioning on the first axis. tensor_to_convert = tf.concat(x, axis=0) output_list.append( internal_convert_to_tensor(tensor_to_convert, as_ref=as_ref)) return output_list def _get_first_dimension_size_statically(self, w, num_partitions): """Compute the static size of the first dimension for a sharded variable.""" dim_0_size = w[0].get_shape()[0] for p in range(1, num_partitions): dim_0_size += w[p].get_shape()[0] return dim_0_size def _linear_predictions(self, examples): """Returns predictions of the form w*x. Args: examples: Examples to compute predictions on. """ with name_scope('sdca/prediction'): batch_size = tf.compat.v1.shape(examples['example_ids'])[0] predictions = tf.zeros([batch_size]) sparse_variables = self._convert_n_to_tensor( self._variables['sparse_features_weights']) for sfc, sv in zip(examples['sparse_features'], sparse_variables): unpadded_dot_product = tf.math.segment_sum( tf.math.multiply( tf.compat.v1.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) predictions += tf.compat.v1.pad( unpadded_dot_product, [[0, batch_size - tf.compat.v1.shape(unpadded_dot_product)[0]]]) dense_features = self._convert_n_to_tensor(examples['dense_features']) dense_variables = self._convert_n_to_tensor( self._variables['dense_features_weights']) for i in range(len(dense_variables)): predictions += tf.compat.v1.squeeze( tf.linalg.matmul(dense_features[i], tf.compat.v1.expand_dims(dense_variables[i], -1))) return predictions def predictions(self, examples): """Add operations to compute predictions by the model. If logistic_loss is being used, predicted probabilities are returned. If poisson_loss is being used, predictions are exponentiated. Otherwise, (raw) linear predictions (w*x) are returned. Args: examples: Examples to compute predictions on. Returns: An Operation that computes the predictions for examples. Raises: ValueError: if examples are not well defined. """ self._assert_specified( ['example_weights', 'sparse_features', 'dense_features'], examples) self._assert_list(['sparse_features', 'dense_features'], examples) result = self._linear_predictions(examples) if self._options['loss_type'] == 'logistic_loss': # Convert logits to probability for logistic loss predictions. with name_scope('sdca/logistic_prediction'): result = tf.math.sigmoid(result) elif self._options['loss_type'] == 'poisson_loss': # Exponeniate the prediction for poisson loss predictions. with name_scope('sdca/poisson_prediction'): result = tf.math.exp(result) return result def _get_partitioned_update_ops(self, v_num, num_partitions_by_var, p_assignments_by_var, gather_ids_by_var, weights, full_update, p_assignments, num_partitions): """Get updates for partitioned variables.""" num_partitions = num_partitions_by_var[v_num] p_assignments = p_assignments_by_var[v_num] gather_ids = gather_ids_by_var[v_num] updates = tf.dynamic_partition(full_update, p_assignments, num_partitions) update_ops = [] for p in range(num_partitions): with ops.colocate_with(weights[p]): result = tf.compat.v1.scatter_add(weights[p], gather_ids[p], updates[p]) update_ops.append(result) return update_ops def minimize(self, global_step=None, name=None): """Add operations to train a linear model by minimizing the loss function. Args: global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Returns: An Operation that updates the variables passed in the constructor. """ # Technically, the op depends on a lot more than the variables, # but we'll keep the list short. with name_scope(name, 'sdca/minimize'): sparse_example_indices = [] sparse_feature_indices = [] sparse_features_values = [] for sf in self._examples['sparse_features']: sparse_example_indices.append(sf.example_indices) sparse_feature_indices.append(sf.feature_indices) # If feature values are missing, sdca assumes a value of 1.0f. if sf.feature_values is not None: sparse_features_values.append(sf.feature_values) example_ids_hashed = tf.compat.v1.train.sdca_fprint( internal_convert_to_tensor(self._examples['example_ids'])) example_state_data = self._hashtable.lookup(example_ids_hashed) # Solver returns example_state_update, new delta sparse_feature_weights # and delta dense_feature_weights. sparse_weights = [] sparse_indices = [] # If we have partitioned variables, keep a few dictionaries of Tensors # around that we need for the assign_add after the op call to # gen_sdca_ops.sdca_optimizer(). These are keyed because we may have a # mix of partitioned and un-partitioned variables. num_partitions_by_var = {} p_assignments_by_var = {} gather_ids_by_var = {} for v_num, (w, i) in enumerate( zip(self._slots['unshrunk_sparse_features_weights'], sparse_feature_indices)): # Append the sparse_indices (in full-variable space). sparse_idx = tf.cast( tf.unique(tf.cast(i, tf.dtypes.int32))[0], tf.dtypes.int64) sparse_indices.append(sparse_idx) if isinstance(w, list) or isinstance(w, var_ops.PartitionedVariable): num_partitions = len(w) flat_ids = tf.reshape(sparse_idx, [-1]) # We use div partitioning, which is easiest to support downstream. # Compute num_total_ids as the sum of dim-0 of w, then assign # to partitions based on a constant number of ids per partition. # Optimize if we already know the full shape statically. dim_0_size = self._get_first_dimension_size_statically( w, num_partitions) if tf.compat.dimension_value(dim_0_size): num_total_ids = tf.constant( tf.compat.dimension_value(dim_0_size), flat_ids.dtype) else: dim_0_sizes = [] for p in range(num_partitions): if tf.compat.dimension_value(w[p].shape[0]) is not None: dim_0_sizes.append(tf.compat.dimension_value(w[p].shape[0])) else: with ops.colocate_with(w[p]): dim_0_sizes.append(tf.compat.v1.shape(w[p])[0]) num_total_ids = tf.math.reduce_sum( tf.cast(tf.stack(dim_0_sizes), flat_ids.dtype)) ids_per_partition = num_total_ids // num_partitions extras = num_total_ids % num_partitions p_assignments = tf.math.maximum(flat_ids // (ids_per_partition + 1), (flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor new_ids = tf.where(p_assignments < extras, flat_ids % (ids_per_partition + 1), (flat_ids - extras) % ids_per_partition) # Cast partition assignments to int32 for use in dynamic_partition. # There really should not be more than 2^32 partitions. p_assignments = tf.cast(p_assignments, tf.dtypes.int32) # Partition list of ids based on assignments into num_partitions # separate lists. gather_ids = tf.dynamic_partition(new_ids, p_assignments, num_partitions) # Add these into the dictionaries for use in the later update. num_partitions_by_var[v_num] = num_partitions p_assignments_by_var[v_num] = p_assignments gather_ids_by_var[v_num] = gather_ids # Gather the weights from each partition. partition_gathered_weights = [] for p in range(num_partitions): with ops.colocate_with(w[p]): partition_gathered_weights.append( tf.compat.v1.gather(w[p], gather_ids[p])) # Stitch the weights back together in the same order they were before # we dynamic_partitioned them. condition_indices = tf.dynamic_partition( tf.range(tf.compat.v1.shape(new_ids)[0]), p_assignments, num_partitions) batch_gathered_weights = tf.dynamic_stitch( condition_indices, partition_gathered_weights) else: w_as_tensor = internal_convert_to_tensor(w) with tf.compat.v1.device(w_as_tensor.device): batch_gathered_weights = tf.compat.v1.gather( w_as_tensor, sparse_idx) sparse_weights.append(batch_gathered_weights) if tf.compat.forward_compatible(year=2018, month=10, day=30): esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2( sparse_example_indices, sparse_feature_indices, sparse_features_values, self._convert_n_to_tensor(self._examples['dense_features']), internal_convert_to_tensor(self._examples['example_weights']), internal_convert_to_tensor(self._examples['example_labels']), sparse_indices, sparse_weights, self._convert_n_to_tensor( self._slots['unshrunk_dense_features_weights']), example_state_data, loss_type=self._options['loss_type'], l1=self._symmetric_l1_regularization(), l2=self._symmetric_l2_regularization(), num_loss_partitions=self._num_loss_partitions(), num_inner_iterations=1, adaptive=self._adaptive()) else: esu, sfw, dfw = tf.compat.v1.train.sdca_optimizer( sparse_example_indices, sparse_feature_indices, sparse_features_values, self._convert_n_to_tensor(self._examples['dense_features']), internal_convert_to_tensor(self._examples['example_weights']), internal_convert_to_tensor(self._examples['example_labels']), sparse_indices, sparse_weights, self._convert_n_to_tensor( self._slots['unshrunk_dense_features_weights']), example_state_data, loss_type=self._options['loss_type'], l1=self._symmetric_l1_regularization(), l2=self._symmetric_l2_regularization(), num_loss_partitions=self._num_loss_partitions(), num_inner_iterations=1, adaptative=self._adaptive()) with tf.control_dependencies([esu]): update_ops = [self._hashtable.insert(example_ids_hashed, esu)] # Update the weights before the proximal step. for v_num, (w, i, u) in enumerate( zip(self._slots['unshrunk_sparse_features_weights'], sparse_indices, sfw)): if (isinstance(w, var_ops.PartitionedVariable) or isinstance(w, list)): update_ops += self._get_partitioned_update_ops( v_num, num_partitions_by_var, p_assignments_by_var, gather_ids_by_var, w, u, p_assignments, num_partitions) else: update_ops.append(tf.compat.v1.scatter_add(w, i, u)) for w, u in zip(self._slots['unshrunk_dense_features_weights'], dfw): if (isinstance(w, var_ops.PartitionedVariable) or isinstance(w, list)): split_updates = tf.split( u, num_or_size_splits=[v.shape.as_list()[0] for v in w]) for v, split_update in zip(w, split_updates): update_ops.append(tf.compat.v1.assign_add(v, split_update)) else: update_ops.append(tf.compat.v1.assign_add(w, u)) if global_step is None: return tf.group(*update_ops) with tf.control_dependencies(update_ops): return tf.compat.v1.assign_add(global_step, 1, name=name).op def update_weights(self, train_op): """Updates the model weights. This function must be called on at least one worker after `minimize`. In distributed training this call can be omitted on non-chief workers to speed up training. Args: train_op: The operation returned by the `minimize` call. Returns: An Operation that updates the model weights. """ with tf.control_dependencies([train_op]): update_ops = [] # Copy over unshrunk weights to user provided variables. for name in ['sparse_features_weights', 'dense_features_weights']: for var, slot_var in zip(self._variables[name], self._slots['unshrunk_' + name]): for v, sv in zip(self._var_to_list(var), self._var_to_list(slot_var)): update_ops.append(v.assign(sv)) # Apply proximal step. if self._symmetric_l1_regularization() > 0: shrinkage = ( self._symmetric_l1_regularization() / self._symmetric_l2_regularization()) with tf.control_dependencies(update_ops): update_ops = [] for name in ['sparse_features_weights', 'dense_features_weights']: for var in self._variables[name]: for v in self._var_to_list(var): with tf.compat.v1.device(v.device): v_shrunk = tf.math.sign(v) * tf.math.maximum( 0.0, tf.math.abs(v) - shrinkage) update_ops.append(v.assign(v_shrunk)) return tf.group(*update_ops) else: return tf.group(*update_ops) def approximate_duality_gap(self): """Add operations to compute the approximate duality gap. Returns: An Operation that computes the approximate duality gap over all examples. """ with name_scope('sdca/approximate_duality_gap'): _, values_list = self._hashtable.export_sharded() shard_sums = [] for values in values_list: with tf.compat.v1.device(values.device): # For large tables to_double() below allocates a large temporary # tensor that is freed once the sum operation completes. To reduce # peak memory usage in cases where we have multiple large tables on a # single device, we serialize these operations. # Note that we need double precision to get accurate results. with tf.control_dependencies(shard_sums): shard_sums.append( tf.math.reduce_sum(tf.cast(values, dtype=tf.dtypes.float64), 0)) summed_values = tf.math.add_n(shard_sums) primal_loss = summed_values[1] dual_loss = summed_values[2] example_weights = summed_values[3] # Note: we return NaN if there are no weights or all weights are 0, e.g. # if no examples have been processed return (primal_loss + dual_loss + self._l1_loss() + (2.0 * self._l2_loss())) / example_weights def unregularized_loss(self, examples): """Add operations to compute the loss (without the regularization loss). Args: examples: Examples to compute unregularized loss on. Returns: An Operation that computes mean (unregularized) loss for given set of examples. Raises: ValueError: if examples are not well defined. """ self._assert_specified([ 'example_labels', 'example_weights', 'sparse_features', 'dense_features' ], examples) self._assert_list(['sparse_features', 'dense_features'], examples) with name_scope('sdca/unregularized_loss'): predictions = tf.cast( self._linear_predictions(examples), tf.dtypes.float64) labels = tf.cast( internal_convert_to_tensor(examples['example_labels']), tf.dtypes.float64) weights = tf.cast( internal_convert_to_tensor(examples['example_weights']), tf.dtypes.float64) if self._options['loss_type'] == 'logistic_loss': return tf.math.reduce_sum( tf.math.multiply( sigmoid_cross_entropy_with_logits( labels=labels, logits=predictions), weights)) / tf.math.reduce_sum(weights) if self._options['loss_type'] == 'poisson_loss': return tf.math.reduce_sum( tf.math.multiply( log_poisson_loss(targets=labels, log_input=predictions), weights)) / tf.math.reduce_sum(weights) if self._options['loss_type'] in ['hinge_loss', 'smooth_hinge_loss']: # hinge_loss = max{0, 1 - y_i w*x} where y_i \in {-1, 1}. So, we need to # first convert 0/1 labels into -1/1 labels. all_ones = tf.compat.v1.ones_like(predictions) adjusted_labels = tf.math.subtract(2 * labels, all_ones) # Tensor that contains (unweighted) error (hinge loss) per # example. error = tf.nn.relu( tf.math.subtract(all_ones, tf.math.multiply(adjusted_labels, predictions))) weighted_error = tf.math.multiply(error, weights) return tf.math.reduce_sum(weighted_error) / tf.math.reduce_sum(weights) # squared loss err = tf.math.subtract(labels, predictions) weighted_squared_err = tf.math.multiply(tf.math.square(err), weights) # SDCA squared loss function is sum(err^2) / (2*sum(weights)) return (tf.math.reduce_sum(weighted_squared_err) / (2.0 * tf.math.reduce_sum(weights))) def regularized_loss(self, examples): """Add operations to compute the loss with regularization loss included. Args: examples: Examples to compute loss on. Returns: An Operation that computes mean (regularized) loss for given set of examples. Raises: ValueError: if examples are not well defined. """ self._assert_specified([ 'example_labels', 'example_weights', 'sparse_features', 'dense_features' ], examples) self._assert_list(['sparse_features', 'dense_features'], examples) with name_scope('sdca/regularized_loss'): weights = internal_convert_to_tensor(examples['example_weights']) return ((self._l1_loss() + self._l2_loss()) / tf.math.reduce_sum(tf.cast(weights, tf.dtypes.float64)) + self.unregularized_loss(examples)) ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_optimizer/python/utils/sdca_ops_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for SdcaModel.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import random import threading import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.framework.test_util import TensorFlowTestCase from tensorflow.python.platform import googletest from tensorflow_estimator.python.estimator.canned.linear_optimizer.python.utils.sdca_ops import _SDCAModel from tensorflow_estimator.python.estimator.canned.linear_optimizer.python.utils.sdca_ops import _SparseFeatureColumn _MAX_ITERATIONS = 100 _SHARD_NUMBERS = [None, 1, 3] _NUM_LOSS_PARTITIONS = [4] def make_example_proto(feature_dict, target, value=1.0): e = example_pb2.Example() features = e.features features.feature['target'].float_list.value.append(target) for key, values in feature_dict.items(): features.feature[key + '_indices'].int64_list.value.extend(values) features.feature[key + '_values'].float_list.value.extend([value] * len(values)) return e def make_example_dict(example_protos, example_weights): def parse_examples(example_protos): features = { 'target': tf.io.FixedLenFeature( shape=[1], dtype=tf.dtypes.float32, default_value=0), 'age_indices': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'age_values': tf.io.VarLenFeature(dtype=tf.dtypes.float32), 'gender_indices': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'gender_values': tf.io.VarLenFeature(dtype=tf.dtypes.float32) } return tf.compat.v1.io.parse_example( [e.SerializeToString() for e in example_protos], features) parsed = parse_examples(example_protos) sparse_features = [ _SparseFeatureColumn( tf.reshape( tf.split( value=parsed['age_indices'].indices, num_or_size_splits=2, axis=1)[0], [-1]), tf.reshape(parsed['age_indices'].values, [-1]), tf.reshape(parsed['age_values'].values, [-1])), _SparseFeatureColumn( tf.reshape( tf.split( value=parsed['gender_indices'].indices, num_or_size_splits=2, axis=1)[0], [-1]), tf.reshape(parsed['gender_indices'].values, [-1]), tf.reshape(parsed['gender_values'].values, [-1])) ] return dict( sparse_features=sparse_features, dense_features=[], example_weights=example_weights, example_labels=tf.reshape(parsed['target'], [-1]), example_ids=['%d' % i for i in range(0, len(example_protos))]) def make_random_examples_and_variables_dicts(num_examples, dim, num_non_zero): random.seed(1) sparse_features = [ _SparseFeatureColumn( [i for i in range(num_examples) for _ in range(num_non_zero)], [ i for _ in range(num_examples) for i in random.sample(range(dim), num_non_zero) ], [num_non_zero**(-0.5) for _ in range(num_examples * num_non_zero)]) ] examples_dict = dict( sparse_features=sparse_features, dense_features=[], example_weights=[random.random() for _ in range(num_examples)], example_labels=[ 1. if random.random() > 0.5 else 0. for _ in range(num_examples) ], example_ids=[str(i) for i in range(num_examples)]) weights = tf.compat.v1.Variable(tf.zeros([dim], dtype=tf.dtypes.float32)) variables_dict = dict( sparse_features_weights=[weights], dense_features_weights=[]) return examples_dict, variables_dict def make_variable_dict(max_age, max_gender, num_shards=None, partitioned=False): # TODO(dbaylor): Figure out how to derive max_age & max_gender from # examples_dict. partitioner = None if partitioned: partitioner = tf.compat.v1.fixed_size_partitioner(num_shards=2, axis=0) with tf.compat.v1.variable_scope( name_or_scope=('variables/shard_{}'.format(num_shards) if num_shards else 'variables'), partitioner=partitioner): age_weights = tf.compat.v1.get_variable( name='age', initializer=tf.zeros([max_age + 1], dtype=tf.dtypes.float32)) gender_weights = tf.compat.v1.get_variable( name='gender', initializer=tf.zeros([max_gender + 1], dtype=tf.dtypes.float32)) return dict( sparse_features_weights=[age_weights, gender_weights], dense_features_weights=[]) def make_dense_examples_and_variables_dicts(dense_features_values, weights, labels): """Creates examples and variables dictionaries for dense features. Variables shapes are inferred from the list of dense feature values passed as argument. Args: dense_features_values: The values of the dense features weights: The example weights. labels: The example labels. Returns: One dictionary for the examples and one for the variables. """ dense_tensors = [] dense_weights = [] for dense_feature in dense_features_values: dense_tensor = ops.convert_to_tensor(dense_feature, dtype=tf.dtypes.float32) check_shape_op = tf.debugging.Assert( tf.math.less_equal(tf.rank(dense_tensor), 2), ['dense_tensor shape must be [batch_size, dimension] or [batch_size]']) # Reshape to [batch_size, dense_column_dimension]. with tf.control_dependencies([check_shape_op]): dense_tensor = tf.reshape(dense_tensor, [dense_tensor.get_shape().as_list()[0], -1]) dense_tensors.append(dense_tensor) # Add variables of shape [feature_column_dimension]. dense_weights.append( tf.compat.v1.Variable( tf.zeros([dense_tensor.get_shape().as_list()[1]], dtype=tf.dtypes.float32))) examples_dict = dict( sparse_features=[], dense_features=dense_tensors, example_weights=weights, example_labels=labels, example_ids=['%d' % i for i in range(0, len(labels))]) variables_dict = dict( sparse_features_weights=[], dense_features_weights=dense_weights) return examples_dict, variables_dict def get_binary_predictions_for_logistic(predictions, cutoff=0.5): return tf.cast( tf.math.greater_equal(predictions, tf.compat.v1.ones_like(predictions) * cutoff), dtype=tf.dtypes.int32) def get_binary_predictions_for_hinge(predictions): return tf.cast( tf.math.greater_equal(predictions, tf.compat.v1.zeros_like(predictions)), dtype=tf.dtypes.int32) # TODO(pmol): Refactor tests to avoid repetition of boilerplate code. class _SDCAModelTest(TensorFlowTestCase): """Base SDCA optimizer test class for any loss type.""" def _single_threaded_test_session(self): config = tf.compat.v1.ConfigProto( inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) return self.test_session(use_gpu=False, config=config) # ResourceVariable only runs in graph mode @test_util.deprecated_graph_mode_only class SdcaWithLogisticLossTest(_SDCAModelTest): """SDCA optimizer test class for logistic loss.""" def testSimple(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) self.assertAllClose(0.693147, unregularized_loss.eval()) self.assertAllClose(0.693147, loss.eval()) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The high tolerance in unregularized_loss comparisons is due to the # fact that it's possible to trade off unregularized_loss vs. # regularization and still have a sum that is quite close to the # optimal regularized_loss value. SDCA's duality gap only ensures that # the regularized_loss is within 0.01 of optimal. # 0.525457 is the optimal regularized_loss. # 0.411608 is the unregularized_loss at that optimum. self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) self.assertAllClose(0.525457, loss.eval(), atol=0.01) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 1], predicted_labels.eval()) self.assertAllClose( 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) def testPartitionedPrimals(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1, num_shards, partitioned=True) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) self.assertAllClose(0.693147, unregularized_loss.eval()) self.assertAllClose(0.693147, loss.eval()) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The high tolerance in unregularized_loss comparisons is due to the # fact that it's possible to trade off unregularized_loss vs. # regularization and still have a sum that is quite close to the # optimal regularized_loss value. SDCA's duality gap only ensures that # the regularized_loss is within 0.01 of optimal. # 0.525457 is the optimal regularized_loss. # 0.411608 is the unregularized_loss at that optimum. self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) self.assertAllClose(0.525457, loss.eval(), atol=0.01) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 1], predicted_labels.eval()) self.assertAllClose( 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) def testSomePartitionedPrimals(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [0], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) # Explicitly make age a [1]-shaped Variable (which cannot be # partitioned), while making gender a PartitionedVariable. age_weights = tf.compat.v1.Variable( tf.zeros([1], dtype=tf.dtypes.float32)) with tf.compat.v1.variable_scope( name_or_scope=('variables/shard_{}'.format(num_shards) if num_shards else 'variables'), partitioner=tf.compat.v1.fixed_size_partitioner( num_shards=2, axis=0)): gender_weights = tf.compat.v1.get_variable( name='gender', initializer=tf.zeros([2], dtype=tf.dtypes.float32)) variables = dict( sparse_features_weights=[age_weights, gender_weights], dense_features_weights=[]) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) self.assertAllClose(0.693147, unregularized_loss.eval()) self.assertAllClose(0.693147, loss.eval()) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The high tolerance in unregularized_loss comparisons is due to the # fact that it's possible to trade off unregularized_loss vs. # regularization and still have a sum that is quite close to the # optimal regularized_loss value. SDCA's duality gap only ensures that # the regularized_loss is within 0.01 of optimal. # 0.525457 is the optimal regularized_loss. # 0.593014 is the unregularized_loss at that optimum. self.assertAllClose(0.512591, unregularized_loss.eval(), atol=0.05) self.assertAllClose(0.593014, loss.eval(), atol=0.01) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 1], predicted_labels.eval()) self.assertAllClose( 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) def testSparseRandom(self): dim = 20 num_examples = 1000 # Number of non-zero features per example. non_zeros = 10 # Setup test data. with self._single_threaded_test_session(): examples, variables = make_random_examples_and_variables_dicts( num_examples, dim, non_zeros) options = dict( symmetric_l2_regularization=.1, symmetric_l1_regularization=0, num_table_shards=1, adaptive=False, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() train_op = lr.minimize() for _ in range(10): train_op.run() lr.update_weights(train_op).run() self.assertNear(0.0, lr.approximate_duality_gap().eval(), err=1e-2) def testSparseDuplicate(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0] * 5, 'gender': [0] * 5 }, 0), make_example_proto({ 'age': [1] * 5, 'gender': [1] * 5 }, 1), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() train_op = lr.minimize() with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, 'Duplicate'): train_op.run() def testDistributedSimple(self): # Distributed SDCA may not converge if the workers update concurrently the # same example. In this test the examples are partitioned across workers. # The examples are the same for all workers, just the example_ids are # different. example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] examples = make_example_dict(example_protos, example_weights) example_ids = tf.compat.v1.placeholder( tf.dtypes.string, shape=(len(example_weights),)) examples['example_ids'] = example_ids variables = make_variable_dict(1, 1) # We need each thread to keep its own device stack or the device scopes # won't be properly nested. tf.compat.v1.get_default_graph().switch_to_thread_local() for num_shards in _SHARD_NUMBERS: for num_loss_partitions in _NUM_LOSS_PARTITIONS: with self._single_threaded_test_session(): options = dict( # Keep the same solution as for TestSimple: since the number of # examples is multplied by num_loss_partitions, multiply also # L2 by the same value. symmetric_l2_regularization=num_loss_partitions, symmetric_l1_regularization=0, loss_type='logistic_loss', num_table_shards=num_shards, num_loss_partitions=num_loss_partitions) lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) self.assertAllClose(0.693147, unregularized_loss.eval()) self.assertAllClose(0.693147, loss.eval()) train_op = lr.minimize() def minimize(worker_id): with context.graph_mode(), self._single_threaded_test_session(): feed_dict = { example_ids: [ str(i + worker_id * len(example_weights)) for i in range(len(example_weights)) ] } for _ in range(_MAX_ITERATIONS): train_op.run(feed_dict=feed_dict) # pylint: disable=cell-var-from-loop threads = [] for worker_id in range(num_loss_partitions): threads.append(threading.Thread(target=minimize, args=(worker_id,))) threads[-1].start() for t in threads: t.join() lr.update_weights(train_op).run(feed_dict={ example_ids: [str(i) for i in range(len(example_weights))] }) # Test only the unregularized loss because the optimal value of the # regularized loss depends on num_loss_partitions. self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.02) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 1], predicted_labels.eval()) self.assertNear(0.0, lr.approximate_duality_gap().eval(), 0.02) def testSimpleNoL2(self): # L2 regularization of SDCA should be positive. example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1, 1) options = dict( symmetric_l2_regularization=0, symmetric_l1_regularization=0, num_table_shards=1, loss_type='logistic_loss') with self.assertRaises(ValueError): _SDCAModel(examples, variables, options) def testSomeUnweightedExamples(self): # Setup test data with 4 examples, but should produce the same # results as testSimple. example_protos = [ # Will be used. make_example_proto({ 'age': [0], 'gender': [0] }, 0), # Will be ignored. make_example_proto({ 'age': [1], 'gender': [0] }, 0), # Will be used. make_example_proto({ 'age': [1], 'gender': [1] }, 1), # Will be ignored. make_example_proto({ 'age': [1], 'gender': [0] }, 1), ] example_weights = [1.0, 0.0, 1.0, 0.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): # Only use examples 0 and 2 examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) self.assertAllClose(0.525457, loss.eval(), atol=0.01) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllClose([0, 1, 1, 1], predicted_labels.eval()) self.assertAllClose( 0.0, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) def testFractionalExampleLabel(self): # Setup test data with 1 positive, and 1 mostly-negative example. example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0.1), make_example_proto({ 'age': [1], 'gender': [1] }, 0.9), ] example_weights = [1.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() with self.assertRaisesOpError( 'Only labels of 0.0 or 1.0 are supported right now.'): lr.minimize().run() def testImbalanced(self): # Setup test data with 1 positive, and 3 negative examples. example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [2], 'gender': [0] }, 0), make_example_proto({ 'age': [3], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0, 1.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(3, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() self.assertAllClose( 0.226487 + 0.102902, unregularized_loss.eval(), atol=0.08) self.assertAllClose(0.328394 + 0.131364, loss.eval(), atol=0.01) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 0, 0, 1], predicted_labels.eval()) self.assertAllClose( 0.0, lr.approximate_duality_gap().eval(), rtol=2e-2, atol=1e-2) def testImbalancedWithExampleWeights(self): # Setup test data with 1 positive, and 1 negative example. example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [3.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() self.assertAllClose(0.284860, unregularized_loss.eval(), atol=0.08) self.assertAllClose(0.408044, loss.eval(), atol=0.012) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 1], predicted_labels.eval()) self.assertAllClose( 0.0, lr.approximate_duality_gap().eval(), rtol=2e-2, atol=1e-2) def testInstancesOfOneClassOnly(self): # Setup test data with 1 positive (ignored), and 1 negative example. example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [0] }, 1), # Shares gender with the instance above. ] example_weights = [1.0, 0.0] # Second example "omitted" from training. for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) self.assertAllClose(0.525457, loss.eval(), atol=0.01) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 0], predicted_labels.eval()) self.assertAllClose( 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) def testOutOfRangeSparseFeatures(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(0, 0) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() train_op = lr.minimize() with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, 'indices.*'): train_op.run() def testOutOfRangeDenseFeatures(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]], weights=[20.0, 10.0], labels=[1.0, 0.0]) # Replace with a variable of size 1 instead of 2. variables['dense_features_weights'] = [ tf.compat.v1.Variable(tf.zeros([1], dtype=tf.dtypes.float32)) ] options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() train_op = lr.minimize() with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, 'More dense features than we have parameters for.*'): train_op.run() def testMissingFeature(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), make_example_proto({ 'age': [], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1, 1) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=1, loss_type='logistic_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() unregularized_loss = lr.unregularized_loss(examples) self.assertAllClose(0.693147, unregularized_loss.eval()) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() self.assertAllClose( 0.0, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) # TODO(katsiaspis): add a test for the case when examples at the end of an # epoch are repeated, since example id may be duplicated. # ResourceVariable only runs in graph mode @test_util.deprecated_graph_mode_only class SdcaWithLinearLossTest(_SDCAModelTest): """SDCA optimizer test class for linear (squared) loss.""" def testSimple(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, -10.0), make_example_proto({ 'age': [1], 'gender': [1] }, 14.0), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type='squared_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # Predictions should be 2/3 of label due to minimizing regularized loss: # (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 self.assertAllClose([-20.0 / 3.0, 28.0 / 3.0], predictions.eval(), rtol=0.005) # Approximate gap should be very close to 0.0. (In fact, because the gap # is only approximate, it is likely that upon convergence the duality gap # can have a tiny negative value). self.assertAllClose(0.0, lr.approximate_duality_gap().eval(), atol=1e-2) def testL2Regularization(self): # Setup test data example_protos = [ # 2 identical examples make_example_proto({ 'age': [0], 'gender': [0] }, -10.0), make_example_proto({ 'age': [0], 'gender': [0] }, -10.0), # 2 more identical examples make_example_proto({ 'age': [1], 'gender': [1] }, 14.0), make_example_proto({ 'age': [1], 'gender': [1] }, 14.0), ] example_weights = [1.0, 1.0, 1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=16, symmetric_l1_regularization=0, loss_type='squared_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # Predictions should be 1/5 of label due to minimizing regularized loss: # (label - 2 * weight)^2 + L2 * 16 * weight^2 optimal1 = -10.0 / 5.0 optimal2 = 14.0 / 5.0 self.assertAllClose([optimal1, optimal1, optimal2, optimal2], predictions.eval(), rtol=0.01) def testL1Regularization(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, -10.0), make_example_proto({ 'age': [1], 'gender': [1] }, 14.0), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=4.0, loss_type='squared_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() prediction = lr.predictions(examples) loss = lr.regularized_loss(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # Predictions should be -4, 20/3 due to minimizing regularized loss: # (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight self.assertAllClose([-4.0, 20.0 / 3.0], prediction.eval(), rtol=0.08) # Loss should be the sum of the regularized loss value from above per # example after plugging in the optimal weights. self.assertAllClose(308.0 / 6.0, loss.eval(), atol=0.01) def testFeatureValues(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, -10.0, -2.0), make_example_proto({ 'age': [1], 'gender': [1] }, 14.0, 2.0), ] example_weights = [5.0, 3.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type='squared_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1, # y_2 be the labels for examples 1 and 2 respectively and s_1, s_2 the # corresponding *example* weights. With the given feature values, the loss # function is given by: # s_1/2(y_1 + 2w_1 + 2w_3)^2 + s_2/2(y_2 - 2w_2 - 2w_4)^2 # + \lambda/2 (w_1^2 + w_2^2 + w_3^2 + w_4^2). Solving for the optimal, it # can be verified that: # w_1* = w_3* = -2.0 s_1 y_1/(\lambda + 8 s_1) and # w_2* = w_4* = 2 \cdot s_2 y_2/(\lambda + 8 s_2). Equivalently, due to # regularization and example weights, the predictions are within: # 8 \cdot s_i /(\lambda + 8 \cdot s_i) of the labels. self.assertAllClose([-10 * 40.0 / 41.0, 14.0 * 24 / 25.0], predictions.eval(), atol=0.01) def testDenseFeaturesWithDefaultWeights(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0], [0.0]], [0.0, 1.0]], weights=[1.0, 1.0], labels=[10.0, -5.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='squared_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The loss function for these particular features is given by: # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So, # differentiating wrt to w_1, w_2 yields the following optimal values: # w_1* = label_1/(\lambda + 1)= 10/2, w_2* =label_2/(\lambda + 1)= -5/2. # In this case the (unnormalized regularized) loss will be: # 1/2(10-5)^2 + 1/2(5-5/2)^2 + 1/2(5^2 + (5/2)^2) = 125.0/4. The actual # loss should be further normalized by the sum of example weights. self.assertAllClose([5.0, -2.5], predictions.eval(), rtol=0.01) loss = lr.regularized_loss(examples) self.assertAllClose(125.0 / 8.0, loss.eval(), atol=0.01) def testDenseFeaturesWithArbitraryWeights(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]], weights=[20.0, 10.0], labels=[10.0, -5.0]) options = dict( symmetric_l2_regularization=5.0, symmetric_l1_regularization=0, loss_type='squared_loss') lr = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The loss function for these particular features is given by: # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 + # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It # turns out that the optimal (variable) weights are given by: # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3. # In this case the (unnormalized regularized) loss will be: # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The # actual loss should be further normalized by the sum of example weights. self.assertAllClose([8.0, -10.0 / 3], predictions.eval(), rtol=0.01) loss = lr.regularized_loss(examples) self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01) # ResourceVariable only runs in graph mode @test_util.deprecated_graph_mode_only class SdcaWithHingeLossTest(_SDCAModelTest): """SDCA optimizer test class for hinge loss.""" def testSimple(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() # Before minimization, the weights default to zero. There is no loss due # to regularization, only unregularized loss which is 0.5 * (1+1) = 1.0. predictions = model.predictions(examples) self.assertAllClose([0.0, 0.0], predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(1.0, unregularized_loss.eval()) self.assertAllClose(1.0, regularized_loss.eval()) # After minimization, the model separates perfectly the data points. There # are 4 sparse weights: 2 for age (say w1, w2) and 2 for gender (say w3 # and w4). Solving the system w1 + w3 = 1.0, w2 + w4 = -1.0 and minimizing # wrt to \|\vec{w}\|_2, gives w1=w3=1/2 and w2=w4=-1/2. This gives 0.0 # unregularized loss and 0.25 L2 loss. train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() binary_predictions = get_binary_predictions_for_hinge(predictions) self.assertAllEqual([-1.0, 1.0], predictions.eval()) self.assertAllEqual([0, 1], binary_predictions.eval()) self.assertAllClose(0.0, unregularized_loss.eval()) self.assertAllClose(0.25, regularized_loss.eval(), atol=0.05) def testDenseFeaturesPerfectlySeparable(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[1.0, 1.0], [1.0, -1.0]], weights=[1.0, 1.0], labels=[1.0, 0.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) # (1.0, 1.0) and (1.0, -1.0) are perfectly separable by x-axis (that is, # the SVM's functional margin >=1), so the unregularized loss is ~0.0. # There is only loss due to l2-regularization. For these datapoints, it # turns out that w_1~=0.0 and w_2~=1.0 which means that l2 loss is ~0.25. unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.0, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.25, regularized_loss.eval(), atol=0.02) def testDenseFeaturesSeparableWithinMargins(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.5], [1.0, -0.5]]], weights=[1.0, 1.0], labels=[1.0, 0.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints # are within the margins so there is unregularized loss (1/2 per example). # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which # gives an L2 loss of ~0.25. self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02) def testDenseFeaturesWeightedExamples(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0], [1.0]], [[0.5], [-0.5]]], weights=[3.0, 1.0], labels=[1.0, 0.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will # try to increase the margin from (1.0, 0.5). Due to regularization, # (1.0, -0.5) will be within the margin. For these points and example # weights, the optimal weights are w_1~=0.4 and w_2~=1.2 which give an L2 # loss of 0.5 * 0.25 * 0.25 * 1.6 = 0.2. The binary predictions will be # correct, but the boundary will be much closer to the 2nd point than the # first one. self.assertAllClose([1.0, -0.2], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.2, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.4, regularized_loss.eval(), atol=0.02) # ResourceVariable only runs in graph mode @test_util.deprecated_graph_mode_only class SdcaWithSmoothHingeLossTest(_SDCAModelTest): """SDCA optimizer test class for smooth hinge loss.""" def testSimple(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 1), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='smooth_hinge_loss') model = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() # Before minimization, the weights default to zero. There is no loss due # to regularization, only unregularized loss which is 0.5 * (1+1) = 1.0. predictions = model.predictions(examples) self.assertAllClose([0.0, 0.0], predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(1.0, unregularized_loss.eval()) self.assertAllClose(1.0, regularized_loss.eval()) # After minimization, the model separates perfectly the data points. There # are 4 sparse weights: 2 for age (say w1, w2) and 2 for gender (say w3 # and w4). The minimization leads to w1=w3=1/3 and w2=w4=-1/3. This gives # an unregularized hinge loss of 0.33 and a 0.11 L2 loss train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() binary_predictions = get_binary_predictions_for_hinge(predictions) self.assertAllClose([-0.67, 0.67], predictions.eval(), atol=0.05) self.assertAllEqual([0, 1], binary_predictions.eval()) self.assertAllClose(0.33, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.44, regularized_loss.eval(), atol=0.02) # ResourceVariable only runs in graph mode @test_util.deprecated_graph_mode_only class SdcaWithPoissonLossTest(_SDCAModelTest): """SDCA optimizer test class for poisson loss.""" def testSimple(self): # Setup test data example_protos = [ make_example_proto({ 'age': [0], 'gender': [0] }, 0), make_example_proto({ 'age': [1], 'gender': [1] }, 2), ] example_weights = [100.0, 100.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='poisson_loss') model = _SDCAModel(examples, variables, options) tf.compat.v1.initializers.global_variables().run() # Before minimization, the weights default to zero. There is no loss due # to regularization, only unregularized loss which is 1 for each example. predictions = model.predictions(examples) self.assertAllClose([1.0, 1.0], predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) approximate_duality_gap = model.approximate_duality_gap() self.assertAllClose(1.0, unregularized_loss.eval()) self.assertAllClose(1.0, regularized_loss.eval()) # There are 4 sparse weights: 2 for age (say w1, w2) and 2 for gender # (say w3 and w4). The minimization leads to: # w1=w3=-1.96487, argmin of 100*(exp(2*w)-2*w*0)+w**2. # w2=w4=0.345708, argmin of 100*(exp(2*w)-2*w*2)+w**2. # This gives an unregularized loss of .3167 and .3366 with regularization. train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() self.assertAllClose([0.0196, 1.9965], predictions.eval(), atol=1e-4) self.assertAllClose(0.3167, unregularized_loss.eval(), atol=1e-4) self.assertAllClose(0.3366, regularized_loss.eval(), atol=1e-4) self.assertAllClose(0., approximate_duality_gap.eval(), atol=1e-6) class SdcaFprintTest(_SDCAModelTest): """Tests for the SdcaFprint op. This is one way of enforcing the platform-agnostic nature of SdcaFprint. Basically we are checking against exact values and this test could be running across different platforms. Note that it is fine for expected values to change in the future, if the implementation of SdcaFprint changes (ie this is *not* a frozen test). """ def testFprint(self): with self._single_threaded_test_session(): in_data = tf.constant(['abc', 'very looooooong string', 'def']) out_data = tf.compat.v1.train.sdca_fprint(in_data) self.assertAllEqual([[4143508125394299908, -6879828354153669051], [5849691694103072671, -4874542629849009556], [603227410218889250, 8762207001949257490]], self.evaluate(out_data)) class _SparseFeatureColumnTest(TensorFlowTestCase): """Tests for _SparseFeatureColumn.""" def testBasic(self): expected_example_indices = [1, 1, 1, 2] expected_feature_indices = [0, 1, 2, 0] sfc = _SparseFeatureColumn(expected_example_indices, expected_feature_indices, None) self.assertIsInstance(sfc.example_indices, tf.Tensor) self.assertIsInstance(sfc.feature_indices, tf.Tensor) self.assertEqual(sfc.feature_values, None) with self.cached_session(): self.assertAllEqual(expected_example_indices, self.evaluate(sfc.example_indices)) self.assertAllEqual(expected_feature_indices, self.evaluate(sfc.feature_indices)) expected_feature_values = [1.0, 2.0, 3.0, 4.0] sfc = _SparseFeatureColumn([1, 1, 1, 2], [0, 1, 2, 0], expected_feature_values) with self.cached_session(): self.assertAllEqual(expected_feature_values, self.evaluate(sfc.feature_values)) if __name__ == '__main__': googletest.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_optimizer/python/utils/sharded_mutable_dense_hashtable.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Sharded mutable dense hash table.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import functools from six.moves import range import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import gen_lookup_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.training.saver import BaseSaverBuilder from tensorflow.python.checkpoint import saveable_compat @saveable_compat.legacy_saveable_name("table") class _MutableDenseHashTable(lookup_ops.LookupInterface): """Copy of tf.contrib.lookup.MutableDenseHashTable.""" # TODO(b/118148303): Swap this with the core version def __init__(self, key_dtype, value_dtype, default_value, empty_key, deleted_key, initial_num_buckets=None, shared_name=None, name="MutableDenseHashTable", checkpoint=True): """Creates an empty `_MutableDenseHashTable` object. Creates a table, the type of its keys and values are specified by key_dtype and value_dtype, respectively. Args: key_dtype: the type of the key tensors. value_dtype: the type of the value tensors. default_value: The value to use if a key is missing in the table. empty_key: the key to use to represent empty buckets internally. Must not be used in insert, remove or lookup operations. deleted_key: the key to use to represent deleted buckets internally. Must not be used in insert, remove or lookup operations and be different from the empty_key. initial_num_buckets: the initial number of buckets. shared_name: If non-empty, this table will be shared under the given name across multiple sessions. name: A name for the operation (optional). checkpoint: if True, the contents of the table are saved to and restored from checkpoints. If `shared_name` is empty for a checkpointed table, it is shared using the table node name. Returns: A `_MutableDenseHashTable` object. Raises: ValueError: If checkpoint is True and no name was specified. """ self._default_value = ops.convert_to_tensor( default_value, dtype=value_dtype, name="default_value") self._key_dtype = key_dtype self._value_dtype = value_dtype self._initial_num_buckets = initial_num_buckets self._value_shape = self._default_value.get_shape() self._checkpoint = checkpoint self._name = name self._empty_key = ops.convert_to_tensor( empty_key, dtype=key_dtype, name="empty_key") self._deleted_key = ops.convert_to_tensor( deleted_key, dtype=key_dtype, name="deleted_key") if tf.executing_eagerly() and shared_name is None: # TODO(allenl): This will leak memory due to kernel caching by the # shared_name attribute value (but is better than the alternative of # sharing everything by default when executing eagerly; hopefully creating # tables in a loop is uncommon). shared_name = "table_%d" % (ops.uid(),) self._shared_name = shared_name super(_MutableDenseHashTable, self).__init__(key_dtype, value_dtype) self._resource_handle = self._create_resource() if checkpoint: saveable = _MutableDenseHashTable._Saveable(self, name) if not tf.executing_eagerly(): tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.SAVEABLE_OBJECTS, saveable) def _create_resource(self): # The table must be shared if checkpointing is requested for multi-worker # training to work correctly. Use the node name if no shared_name has been # explicitly specified. use_node_name_sharing = self._checkpoint and self._shared_name is None table_ref = gen_lookup_ops.mutable_dense_hash_table_v2( empty_key=self._empty_key, deleted_key=self._deleted_key, shared_name=self._shared_name, use_node_name_sharing=use_node_name_sharing, value_dtype=self._value_dtype, value_shape=self._value_shape, initial_num_buckets=self._initial_num_buckets, name=self._name) if tf.executing_eagerly(): self._table_name = None else: self._table_name = table_ref.op.name.split("/")[-1] return table_ref @property def name(self): return self._table_name def size(self, name=None): """Compute the number of elements in this table. Args: name: A name for the operation (optional). Returns: A scalar tensor containing the number of elements in this table. """ with ops.name_scope(name, "%s_Size" % self.name, [self.resource_handle]) as name: with ops.colocate_with(self.resource_handle): return gen_lookup_ops.lookup_table_size_v2( self.resource_handle, name=name) def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. The `default_value` is used for keys not present in the table. Args: keys: Keys to look up. Can be a tensor of any shape. Must match the table's key_dtype. name: A name for the operation (optional). Returns: A tensor containing the values in the same shape as `keys` using the table's value type. Raises: TypeError: when `keys` do not match the table data types. """ with ops.name_scope(name, "%s_lookup_table_find" % self.name, [self.resource_handle, keys]) as name: keys = ops.convert_to_tensor(keys, dtype=self._key_dtype, name="keys") with ops.colocate_with(self.resource_handle): values = gen_lookup_ops.lookup_table_find_v2( self.resource_handle, keys, self._default_value, name=name) return values def insert(self, keys, values, name=None): """Associates `keys` with `values`. Args: keys: Keys to insert. Can be a tensor of any shape. Must match the table's key type. values: Values to be associated with keys. Must be a tensor of the same shape as `keys` and match the table's value type. name: A name for the operation (optional). Returns: The created Operation. Raises: TypeError: when `keys` or `values` doesn't match the table data types. """ with ops.name_scope(name, "%s_lookup_table_insert" % self.name, [self.resource_handle, keys, values]) as name: keys = ops.convert_to_tensor(keys, dtype=self._key_dtype, name="keys") values = ops.convert_to_tensor( values, dtype=self._value_dtype, name="values") with ops.colocate_with(self.resource_handle): op = gen_lookup_ops.lookup_table_insert_v2( self.resource_handle, keys, values, name=name) return op def export(self, name=None): """Returns tensors of all keys and values in the table. Args: name: A name for the operation (optional). Returns: A pair of tensors with the first tensor containing all keys and the second tensors containing all values in the table. """ with ops.name_scope(name, "%s_lookup_table_export_values" % self.name, [self.resource_handle]) as name: with ops.colocate_with(self.resource_handle): exported_keys, exported_values = gen_lookup_ops.lookup_table_export_v2( self.resource_handle, self._key_dtype, self._value_dtype, name=name) return exported_keys, exported_values def _serialize_to_tensors(self): tesnors = self.export() return {"-keys": tesnors[0], "-values": tesnors[1]} def _restore_from_tensors(self, restored_tensors): with ops.colocate_with(self.resource_handle): return gen_lookup_ops.lookup_table_import_v2(self.resource_handle, restored_tensors["-keys"], restored_tensors["-values"]) class _Saveable(BaseSaverBuilder.SaveableObject): """SaveableObject implementation for _MutableDenseHashTable.""" def __init__(self, table, name): tensors = table.export() specs = [ BaseSaverBuilder.SaveSpec(tensors[0], "", name + "-keys"), BaseSaverBuilder.SaveSpec(tensors[1], "", name + "-values") ] # pylint: disable=protected-access super(_MutableDenseHashTable._Saveable, self).__init__(table, specs, name) def restore(self, restored_tensors, restored_shapes): del restored_shapes # unused # pylint: disable=protected-access with ops.colocate_with(self.op.resource_handle): return gen_lookup_ops.lookup_table_import_v2(self.op.resource_handle, restored_tensors[0], restored_tensors[1]) # TODO(rohanj): This should subclass Checkpointable and implement # _gather_saveables_for_checkpoint. class _ShardedMutableDenseHashTable(object): """A sharded version of _MutableDenseHashTable. It is designed to be interface compatible with LookupInterface and MutableDenseHashTable, with the exception of the export method, which is replaced by an export_sharded method. The _ShardedMutableDenseHashTable keeps `num_shards` _MutableDenseHashTable internally. The shard is computed via the modulo operation on the key. """ def __init__(self, key_dtype, value_dtype, default_value, empty_key, deleted_key, num_shards=1, checkpoint=True, name="ShardedMutableHashTable"): self._key_dtype = key_dtype self._value_dtype = value_dtype with ops.name_scope(name, "sharded_mutable_hash_table") as scope: table_shards = [] for i in range(num_shards): self._table_name = scope table_shards.append( _MutableDenseHashTable( key_dtype=key_dtype, value_dtype=value_dtype, default_value=default_value, empty_key=empty_key, deleted_key=deleted_key, checkpoint=checkpoint, name="%s-%d-of-%d" % (name, i + 1, num_shards))) self._table_shards = table_shards # TODO(andreasst): add a value_shape() method to LookupInterface # pylint: disable=protected-access self._value_shape = self._table_shards[0]._value_shape # pylint: enable=protected-access @property def name(self): return self._table_name @property def _num_shards(self): return len(self._table_shards) @property def table_shards(self): return self._table_shards def size(self, name=None): with ops.name_scope(name, "sharded_mutable_hash_table_size"): sizes = [self._table_shards[i].size() for i in range(self._num_shards)] return tf.math.add_n(sizes) def _shard_indices(self, keys): key_shape = keys.get_shape() if key_shape.ndims > 1: # If keys are a matrix (i.e. a single key is a vector), we use the first # element of each key vector to determine the shard. keys = tf.reshape(tf.slice(keys, [0, 0], [-1, 1]), [-1]) indices = tf.math.floormod(tf.math.abs(keys), self._num_shards) return tf.cast(indices, tf.dtypes.int32) def _check_keys(self, keys): if keys.get_shape().ndims != 1 and keys.get_shape().ndims != 2: raise ValueError("Expected a vector or matrix for keys, got %s." % keys.get_shape()) def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values.""" if keys.dtype.base_dtype != self._key_dtype: raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % (self._key_dtype, keys.dtype)) self._check_keys(keys) num_shards = self._num_shards if num_shards == 1: return self._table_shards[0].lookup(keys, name=name) shard_indices = self._shard_indices(keys) key_shards = tf.dynamic_partition(keys, shard_indices, num_shards) value_shards = [ self._table_shards[i].lookup(key_shards[i], name=name) for i in range(num_shards) ] num_keys = tf.compat.v1.shape(keys)[0] original_indices = tf.range(num_keys) partitioned_indices = tf.dynamic_partition(original_indices, shard_indices, num_shards) return tf.dynamic_stitch(partitioned_indices, value_shards) def insert(self, keys, values, name=None): """Inserts `keys` in a table.""" self._check_keys(keys) num_shards = self._num_shards if num_shards == 1: return self._table_shards[0].insert(keys, values, name=name) shard_indices = self._shard_indices(keys) key_shards = tf.dynamic_partition(keys, shard_indices, num_shards) value_shards = tf.dynamic_partition(values, shard_indices, num_shards) return_values = [ self._table_shards[i].insert(key_shards[i], value_shards[i], name=name) for i in range(num_shards) ] return tf.group(*return_values) def export_sharded(self, name=None): """Returns lists of the keys and values tensors in the sharded table. Args: name: name of the table. Returns: A pair of lists with the first list containing the key tensors and the second list containing the value tensors from each shard. """ keys_list = [] values_list = [] for table_shard in self._table_shards: exported_keys, exported_values = table_shard.export(name=name) keys_list.append(exported_keys) values_list.append(exported_values) return keys_list, values_list ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_optimizer/python/utils/sharded_mutable_dense_hashtable_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for sharded_mutable_dense_hashtable.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.platform import googletest from tensorflow_estimator.python.estimator.canned.linear_optimizer.python.utils.sharded_mutable_dense_hashtable import _ShardedMutableDenseHashTable class _ShardedMutableDenseHashTableTest(tf.test.TestCase): """Tests for the ShardedMutableHashTable class.""" def testShardedMutableHashTable(self): for num_shards in [1, 3, 10]: with self.cached_session(): default_val = -1 empty_key = 0 deleted_key = -1 keys = tf.constant([11, 12, 13], tf.dtypes.int64) values = tf.constant([0, 1, 2], tf.dtypes.int64) table = _ShardedMutableDenseHashTable( tf.dtypes.int64, tf.dtypes.int64, default_val, empty_key, deleted_key, num_shards=num_shards) self.assertAllEqual(0, self.evaluate(table.size())) self.evaluate(table.insert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) input_string = tf.constant([11, 12, 14], tf.dtypes.int64) output = table.lookup(input_string) self.assertAllEqual([3], output.get_shape()) self.assertAllEqual([0, 1, -1], self.evaluate(output)) def testShardedMutableHashTableVectors(self): for num_shards in [1, 3, 10]: with self.cached_session(): default_val = [-0.1, 0.2] empty_key = [0, 1] deleted_key = [1, 0] keys = tf.constant([[11, 12], [13, 14], [15, 16]], tf.dtypes.int64) values = tf.constant([[0.5, 0.6], [1.5, 1.6], [2.5, 2.6]], tf.dtypes.float32) table = _ShardedMutableDenseHashTable( tf.dtypes.int64, tf.dtypes.float32, default_val, empty_key, deleted_key, num_shards=num_shards) self.assertAllEqual(0, self.evaluate(table.size())) self.evaluate(table.insert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) input_string = tf.constant([[11, 12], [13, 14], [11, 14]], tf.dtypes.int64) output = table.lookup(input_string) self.assertAllEqual([3, 2], output.get_shape()) self.assertAllClose([[0.5, 0.6], [1.5, 1.6], [-0.1, 0.2]], self.evaluate(output)) def testExportSharded(self): with self.cached_session(): empty_key = -2 deleted_key = -3 default_val = -1 num_shards = 2 keys = tf.constant([10, 11, 12], tf.dtypes.int64) values = tf.constant([2, 3, 4], tf.dtypes.int64) table = _ShardedMutableDenseHashTable( tf.dtypes.int64, tf.dtypes.int64, default_val, empty_key, deleted_key, num_shards=num_shards) self.assertAllEqual(0, self.evaluate(table.size())) self.evaluate(table.insert(keys, values)) self.assertAllEqual(3, self.evaluate(table.size())) keys_list, values_list = table.export_sharded() self.assertAllEqual(num_shards, len(keys_list)) self.assertAllEqual(num_shards, len(values_list)) # Exported keys include empty key buckets set to the empty_key self.assertAllEqual( set([-2, 10, 12]), set(self.evaluate(keys_list[0]).flatten())) self.assertAllEqual( set([-2, 11]), set(self.evaluate(keys_list[1]).flatten())) # Exported values include empty value buckets set to 0 self.assertAllEqual( set([0, 2, 4]), set(self.evaluate(values_list[0]).flatten())) self.assertAllEqual( set([0, 3]), set(self.evaluate(values_list[1]).flatten())) if __name__ == '__main__': googletest.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for linear.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.canned import linear_testing_utils def _linear_regressor_fn(*args, **kwargs): return linear.LinearRegressorV2(*args, **kwargs) def _linear_classifier_fn(*args, **kwargs): return linear.LinearClassifierV2(*args, **kwargs) # Tests for Linear Regressor. class LinearRegressorEvaluationV2Test( linear_testing_utils.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) class LinearRegressorPredictV2Test( linear_testing_utils.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorPredictTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) class LinearRegressorIntegrationV2Test( linear_testing_utils.BaseLinearRegressorIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) class LinearRegressorTrainingV2Test( linear_testing_utils.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearRegressorTrainingTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) # Tests for Linear Classifier. class LinearClassifierTrainingV2Test( linear_testing_utils.BaseLinearClassifierTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearClassifierTrainingTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) class LinearClassifierEvaluationV2Test( linear_testing_utils.BaseLinearClassifierEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) class LinearClassifierPredictV2Test( linear_testing_utils.BaseLinearClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearClassifierPredictTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) class LinearClassifierIntegrationV2Test( linear_testing_utils.BaseLinearClassifierIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) # Tests for Linear logit_fn. class LinearLogitFnV2Test(linear_testing_utils.BaseLinearLogitFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearLogitFnTest.__init__( self, fc_lib=feature_column_v2) # Tests for warm-starting with Linear logit_fn. class LinearWarmStartingV2Test(linear_testing_utils.BaseLinearWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils.BaseLinearWarmStartingTest.__init__( self, _linear_classifier_fn, _linear_regressor_fn, fc_lib=feature_column_v2) class ComputeFractionOfZeroTest(tf.test.TestCase): def _assertSparsity(self, expected_sparsity, tensor): sparsity = linear._compute_fraction_of_zero([tensor]) self.assertAllClose(expected_sparsity, sparsity) def test_small_float32(self): self._assertSparsity( 0.75, ops.convert_to_tensor([0, 0, 0, 1], dtype=tf.dtypes.float32)) self._assertSparsity( 0.5, ops.convert_to_tensor([0, 1, 0, 1], dtype=tf.dtypes.float32)) def test_small_int32(self): self._assertSparsity( 0.75, ops.convert_to_tensor([0, 0, 0, 1], dtype=tf.dtypes.int32)) def test_small_float64(self): self._assertSparsity( 0.75, ops.convert_to_tensor([0, 0, 0, 1], dtype=tf.dtypes.float64)) def test_small_int64(self): self._assertSparsity( 0.75, ops.convert_to_tensor([0, 0, 0, 1], dtype=tf.dtypes.int64)) def test_nested(self): self._assertSparsity( 0.75, [ops.convert_to_tensor([0, 0]), ops.convert_to_tensor([0, 1])]) def test_none(self): with self.assertRaises(ValueError): linear._compute_fraction_of_zero([]) def test_empty(self): sparsity = linear._compute_fraction_of_zero([ops.convert_to_tensor([])]) self.assertTrue( self.evaluate(tf.math.is_nan(sparsity)), 'Expected sparsity=nan, got %s' % sparsity) def test_multiple_empty(self): sparsity = linear._compute_fraction_of_zero([ ops.convert_to_tensor([]), ops.convert_to_tensor([]), ]) self.assertTrue( self.evaluate(tf.math.is_nan(sparsity)), 'Expected sparsity=nan, got %s' % sparsity) def test_some_empty(self): with self.test_session(): self._assertSparsity(0.5, [ ops.convert_to_tensor([]), ops.convert_to_tensor([0.]), ops.convert_to_tensor([1.]), ]) def test_mixed_types(self): with self.test_session(): self._assertSparsity(0.6, [ ops.convert_to_tensor([0, 0, 1, 1, 1], dtype=tf.dtypes.float32), ops.convert_to_tensor([0, 0, 0, 0, 1], dtype=tf.dtypes.int32), ]) def test_2_27_zeros__using_512_MiB_of_ram(self): self._assertSparsity(1., tf.zeros([int(2**27 * 1.01)], dtype=tf.dtypes.int8)) def test_2_27_ones__using_512_MiB_of_ram(self): self._assertSparsity(0., tf.ones([int(2**27 * 1.01)], dtype=tf.dtypes.int8)) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/linear_testing_utils.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utils for testing linear estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import os import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False # pylint rules which are disabled by default for test files. # pylint: disable=invalid-name,protected-access,missing-docstring # Names of variables created by model. AGE_WEIGHT_NAME = 'linear/linear_model/age/weights' HEIGHT_WEIGHT_NAME = 'linear/linear_model/height/weights' OCCUPATION_WEIGHT_NAME = 'linear/linear_model/occupation/weights' BIAS_NAME = 'linear/linear_model/bias_weights' LANGUAGE_WEIGHT_NAME = 'linear/linear_model/language/weights' def assert_close(expected, actual, rtol=1e-04, name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs(expected - actual, 'diff') / tf.math.abs(expected) rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=('Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), name=scope) def save_variables_to_ckpt(model_dir): init_all_op = [tf.compat.v1.initializers.global_variables()] with tf.compat.v1.Session() as sess: sess.run(init_all_op) tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) def queue_parsed_features(feature_map): tensors_to_enqueue = [] keys = [] for key, tensor in six.iteritems(feature_map): keys.append(key) tensors_to_enqueue.append(tensor) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = tf.queue.FIFOQueue(capacity=100, dtypes=queue_dtypes) tf.compat.v1.train.queue_runner.add_queue_runner( tf.compat.v1.train.queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} def sorted_key_dict(unsorted_dict): return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} def sigmoid(x): return 1 / (1 + np.exp(-1.0 * x)) def mock_optimizer(testcase, expected_loss=None): expected_var_names = ['%s:0' % AGE_WEIGHT_NAME, '%s:0' % BIAS_NAME] class _Optimizer(tf_keras.optimizers.legacy.Optimizer): def get_updates(self, loss, params): trainable_vars = params testcase.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. testcase.assertEquals(0, loss.shape.ndims) if expected_loss is None: if self.iterations is not None: return [self.iterations.assign_add(1).op] return [tf.no_op()] assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): if self.iterations is not None: return [self.iterations.assign_add(1).op] return [tf.no_op()] def get_config(self): config = super(_Optimizer, self).get_config() return config optimizer = _Optimizer(name='my_optimizer') return optimizer # TODO(b/36813849): Add tests with dynamic shape inputs using placeholders. class BaseLinearRegressorEvaluationTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column_v2): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_evaluation_for_simple_data(self): with tf.Graph().as_default(): tf.Variable([[11.0]], name=AGE_WEIGHT_NAME) tf.Variable([2.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate( input_fn=lambda: ({ 'age': ((1,),) }, ((10.,),)), steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. Loss is 3**2 = 9. self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 9., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_batch(self): """Tests evaluation for batch_size==2.""" with tf.Graph().as_default(): tf.Variable([[11.0]], name=AGE_WEIGHT_NAME) tf.Variable([2.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate( input_fn=lambda: ({ 'age': ((1,), (1,)) }, ((10.,), (10.,))), steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the sum over batch size = (9 + 9) / 2 = 9 # Average loss is the average over batch = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 9., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_weights(self): """Tests evaluation with weights.""" with tf.Graph().as_default(): tf.Variable([[11.0]], name=AGE_WEIGHT_NAME) tf.Variable([2.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} labels = ((10.,), (10.,)) return features, labels linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), weight_column='weights', model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate(input_fn=_input_fn, steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the weighted sum over batch / batch size = # (9 + 2*9) / 2 = 13.5 # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 13.5, metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_for_multi_dimensions(self): x_dim = 3 label_dim = 2 with tf.Graph().as_default(): tf.Variable([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name=AGE_WEIGHT_NAME) tf.Variable([7.0, 8.0], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age', shape=(x_dim,)),), label_dimension=label_dim, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={ 'age': np.array([[2., 4., 5.]]), }, y=np.array([[46., 58.]]), batch_size=1, num_epochs=None, shuffle=False) eval_metrics = linear_regressor.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is # [2., 4., 5.] * [1.0, 2.0] + [7.0, 8.0] = [39, 50] + [7.0, 8.0] # [3.0, 4.0] # [5.0, 6.0] # which is [46, 58] self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) def test_evaluation_for_multiple_feature_columns(self): with tf.Graph().as_default(): tf.Variable([[10.0]], name=AGE_WEIGHT_NAME) tf.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME) tf.Variable([5.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) batch_size = 2 feature_columns = [ self._fc_lib.numeric_column('age'), self._fc_lib.numeric_column('height') ] input_fn = numpy_io.numpy_input_fn( x={ 'age': np.array([20, 40]), 'height': np.array([4, 8]) }, y=np.array([[213.], [421.]]), batch_size=batch_size, num_epochs=None, shuffle=False) est = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir) eval_metrics = est.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] = # [213.0, 421.0], while label is [213., 421.]. Loss = 0. self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) def test_evaluation_for_multiple_feature_columns_mix(self): with tf.Graph().as_default(): tf.Variable([[10.0]], name=AGE_WEIGHT_NAME) tf.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME) tf.Variable([5.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) batch_size = 2 feature_columns = [ tf.feature_column.numeric_column('age'), tf.feature_column.numeric_column('height') ] def _input_fn(): features_ds = tf.compat.v1.data.Dataset.from_tensor_slices({ 'age': np.array([20, 40]), 'height': np.array([4, 8]) }) labels_ds = tf.compat.v1.data.Dataset.from_tensor_slices( np.array([[213.], [421.]])) return (tf.compat.v1.data.Dataset.zip( (features_ds, labels_ds)).batch(batch_size).repeat(None)) est = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir) eval_metrics = est.evaluate(input_fn=_input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] = # [213.0, 421.0], while label is [213., 421.]. Loss = 0. self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) class BaseLinearRegressorPredictTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column_v2): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_1d(self): """Tests predict when all variables are one-dimensional.""" with tf.Graph().as_default(): tf.Variable([[10.]], name='linear/linear_model/x/weights') tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('x'),), model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[2.]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x * weight + bias = 2. * 10. + .2 = 20.2 self.assertAllClose([[20.2]], predicted_scores) def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 x_dim = 4 feature_columns = (self._fc_lib.numeric_column('x', shape=(x_dim,)),) with tf.Graph().as_default(): tf.Variable( # shape=[x_dim, label_dimension] [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]], name='linear/linear_model/x/weights') tf.Variable( # shape=[label_dimension] [.2, .4, .6], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = x * weight + bias, shape=[batch_size, label_dimension] self.assertAllClose([[30.2, 40.4, 50.6], [70.2, 96.4, 122.6]], predicted_scores) def testTwoFeatureColumns(self): """Tests predict with two feature columns.""" with tf.Graph().as_default(): tf.Variable([[10.]], name='linear/linear_model/x0/weights') tf.Variable([[20.]], name='linear/linear_model/x1/weights') tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('x0'), self._fc_lib.numeric_column('x1')), model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={ 'x0': np.array([[2.]]), 'x1': np.array([[3.]]) }, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2 self.assertAllClose([[80.2]], predicted_scores) def testTwoFeatureColumnsMix(self): """Tests predict with two feature columns.""" with tf.Graph().as_default(): tf.Variable([[10.]], name='linear/linear_model/x0/weights') tf.Variable([[20.]], name='linear/linear_model/x1/weights') tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(tf.feature_column.numeric_column('x0'), tf.feature_column.numeric_column('x1')), model_dir=self._model_dir) def _predict_input_fn(): return tf.compat.v1.data.Dataset.from_tensor_slices({ 'x0': np.array([[2.]]), 'x1': np.array([[3.]]) }).batch(1) predictions = linear_regressor.predict(input_fn=_predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2 self.assertAllClose([[80.2]], predicted_scores) def testSparseCombiner(self): w_a = 2.0 w_b = 3.0 w_c = 5.0 bias = 5.0 with tf.Graph().as_default(): tf.Variable([[w_a], [w_b], [w_c]], name=LANGUAGE_WEIGHT_NAME) tf.Variable([bias], name=BIAS_NAME) tf.Variable( 1, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): return tf.compat.v1.data.Dataset.from_tensors({ 'language': tf.sparse.SparseTensor( values=['a', 'c', 'b', 'c'], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }) feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list( 'language', vocabulary_list=['a', 'b', 'c']),) # Check prediction for each sparse_combiner. # With sparse_combiner = 'sum', we have # logits_1 = w_a + w_c + bias # = 2.0 + 5.0 + 5.0 = 12.0 # logits_2 = w_b + w_c + bias # = 3.0 + 5.0 + 5.0 = 13.0 linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir) predictions = linear_regressor.predict(input_fn=_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) self.assertAllClose([[12.0], [13.0]], predicted_scores) # With sparse_combiner = 'mean', we have # logits_1 = 1/2 * (w_a + w_c) + bias # = 1/2 * (2.0 + 5.0) + 5.0 = 8.5 # logits_2 = 1/2 * (w_b + w_c) + bias # = 1/2 * (3.0 + 5.0) + 5.0 = 9.0 linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir, sparse_combiner='mean') predictions = linear_regressor.predict(input_fn=_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) self.assertAllClose([[8.5], [9.0]], predicted_scores) # With sparse_combiner = 'sqrtn', we have # logits_1 = sqrt(2)/2 * (w_a + w_c) + bias # = sqrt(2)/2 * (2.0 + 5.0) + 5.0 = 9.94974 # logits_2 = sqrt(2)/2 * (w_b + w_c) + bias # = sqrt(2)/2 * (3.0 + 5.0) + 5.0 = 10.65685 linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir, sparse_combiner='sqrtn') predictions = linear_regressor.predict(input_fn=_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) self.assertAllClose([[9.94974], [10.65685]], predicted_scores) class BaseLinearRegressorIntegrationTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column_v2): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ self._fc_lib.numeric_column('x', shape=(input_dimension,)) ] est = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return # Pandas DataFrame natually supports 1 dim data only. label_dimension = 1 input_dimension = label_dimension batch_size = 10 data = np.array([1., 2., 3., 4.], dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) prediction_length = 4 train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList( value=datum[:label_dimension])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) class BaseLinearRegressorTrainingTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column_v2): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _assert_checkpoint(self, expected_global_step, expected_age_weight=None, expected_bias=None): shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([1, 1], shapes[AGE_WEIGHT_NAME]) if expected_age_weight is not None: self.assertEqual(expected_age_weight, tf.train.load_variable(self._model_dir, AGE_WEIGHT_NAME)) self.assertEqual([1], shapes[BIAS_NAME]) if expected_bias is not None: self.assertEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def testFromScratchWithDefaultOptimizer(self): # Create LinearRegressor. label = 5. age = 17 linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self._assert_checkpoint(num_steps) def testTrainWithOneDimLabel(self): label_dimension = 1 batch_size = 20 feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))] est = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(200) def testTrainWithOneDimWeight(self): label_dimension = 1 batch_size = 20 feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))] est = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, weight_column='w', model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_1 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(200) def testFromScratch(self): # Create LinearRegressor. label = 5. age = 17 # loss = (logits - label)^2 = (0 - 5.)^2 = 25. mock_opt = mock_optimizer(self, expected_loss=25.) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_opt) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual( num_steps, linear_regressor.get_variable_value(mock_opt.iterations.name)) self._assert_checkpoint( expected_global_step=num_steps, expected_age_weight=0., expected_bias=0.) def testFromCheckpoint(self): # Create initial checkpoint. age_weight = 10.0 bias = 5.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([[age_weight]], name=AGE_WEIGHT_NAME) tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias = 17 * 10. + 5. = 175 # loss = (logits - label)^2 = (175 - 5)^2 = 28900 mock_opt = mock_optimizer(self, expected_loss=28900.) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_opt) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({ 'age': ((17,),) }, ((5.,),)), steps=num_steps) self.assertEqual( initial_global_step + num_steps, linear_regressor.get_variable_value(mock_opt.iterations.name)) self._assert_checkpoint( expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias) def testFromCheckpointMultiBatch(self): # Create initial checkpoint. age_weight = 10.0 bias = 5.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([[age_weight]], name=AGE_WEIGHT_NAME) tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias # logits[0] = 17 * 10. + 5. = 175 # logits[1] = 15 * 10. + 5. = 155 # loss = sum(logits - label)^2 = (175 - 5)^2 + (155 - 3)^2 = 52004 # expected_loss = loss / 2 = 26002 mock_opt = mock_optimizer(self, expected_loss=26002.) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_opt) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({ 'age': ((17,), (15,)) }, ((5.,), (3.,))), steps=num_steps) self.assertEqual( initial_global_step + num_steps, linear_regressor.get_variable_value(mock_opt.iterations.name)) self._assert_checkpoint( expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias) class BaseLinearClassifierTrainingTest(object): def __init__(self, linear_classifier_fn, fc_lib=feature_column_v2): self._linear_classifier_fn = linear_classifier_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _assert_checkpoint(self, n_classes, expected_global_step, expected_age_weight=None, expected_bias=None): logits_dimension = n_classes if n_classes > 2 else 1 shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([1, logits_dimension], shapes[AGE_WEIGHT_NAME]) if expected_age_weight is not None: self.assertAllEqual( expected_age_weight, tf.train.load_variable(self._model_dir, AGE_WEIGHT_NAME)) self.assertEqual([logits_dimension], shapes[BIAS_NAME]) if expected_bias is not None: self.assertAllEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def _testFromScratchWithDefaultOptimizer(self, n_classes): label = 0 age = 17 est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self._assert_checkpoint(n_classes, num_steps) def testBinaryClassesFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=2) def testMultiClassesFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=4) def _testTrainWithTwoDimsLabel(self, n_classes): batch_size = 20 est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) data_rank_2 = np.array([[0], [1]]) self.assertEqual((2,), data_rank_1.shape) self.assertEqual((2, 1), data_rank_2.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_2, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithTwoDimsLabel(self): self._testTrainWithTwoDimsLabel(n_classes=2) def testMultiClassesTrainWithTwoDimsLabel(self): self._testTrainWithTwoDimsLabel(n_classes=4) def _testTrainWithOneDimLabel(self, n_classes): batch_size = 20 est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) self.assertEqual((2,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithOneDimLabel(self): self._testTrainWithOneDimLabel(n_classes=2) def testMultiClassesTrainWithOneDimLabel(self): self._testTrainWithOneDimLabel(n_classes=4) def _testTrainWithTwoDimsWeight(self, n_classes): batch_size = 20 est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), weight_column='w', n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) data_rank_2 = np.array([[0], [1]]) self.assertEqual((2,), data_rank_1.shape) self.assertEqual((2, 1), data_rank_2.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_2 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithTwoDimsWeight(self): self._testTrainWithTwoDimsWeight(n_classes=2) def testMultiClassesTrainWithTwoDimsWeight(self): self._testTrainWithTwoDimsWeight(n_classes=4) def _testTrainWithOneDimWeight(self, n_classes): batch_size = 20 est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), weight_column='w', n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) self.assertEqual((2,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_1 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithOneDimWeight(self): self._testTrainWithOneDimWeight(n_classes=2) def testMultiClassesTrainWithOneDimWeight(self): self._testTrainWithOneDimWeight(n_classes=4) def _testFromScratch(self, n_classes): label = 1 age = 17 # For binary classifier: # loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are # all zero initially) and label = 1 so, # loss = 1 * -log ( sigmoid(logits) ) = 0.69315 # For multi class classifier: # loss = cross_entropy(logits, label) where logits are all 0s (weights are # all zero initially) and label = 1 so, # loss = 1 * -log ( 1.0 / n_classes ) # For this particular test case, as logits are same, the formular # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases. mock_opt = mock_optimizer( self, expected_loss=-1 * math.log(1.0 / n_classes)) est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, optimizer=mock_opt, model_dir=self._model_dir) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(num_steps, est.get_variable_value(mock_opt.iterations.name)) self._assert_checkpoint( n_classes, expected_global_step=num_steps, expected_age_weight=[[0.]] if n_classes == 2 else [[0.] * n_classes], expected_bias=[0.] if n_classes == 2 else [.0] * n_classes) def testBinaryClassesFromScratch(self): self._testFromScratch(n_classes=2) def testMultiClassesFromScratch(self): self._testFromScratch(n_classes=4) def _testFromCheckpoint(self, n_classes): # Create initial checkpoint. label = 1 age = 17 # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[2.0]] if n_classes == 2 else (np.reshape( 2.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # For binary classifier: # logits = age * age_weight + bias = 17 * 2. - 35. = -1. # loss = sigmoid_cross_entropy(logits, label) # so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133 # For multi class classifier: # loss = cross_entropy(logits, label) # where logits = 17 * age_weight + bias and label = 1 # so, loss = 1 * -log ( soft_max(logits)[1] ) if n_classes == 2: expected_loss = 1.3133 else: logits = age_weight * age + bias logits_exp = np.exp(logits) softmax = logits_exp / logits_exp.sum() expected_loss = -1 * math.log(softmax[0, label]) mock_opt = mock_optimizer(self, expected_loss=expected_loss) est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, optimizer=mock_opt, model_dir=self._model_dir) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(initial_global_step + num_steps, est.get_variable_value(mock_opt.iterations.name)) self._assert_checkpoint( n_classes, expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias) def testBinaryClassesFromCheckpoint(self): self._testFromCheckpoint(n_classes=2) def testMultiClassesFromCheckpoint(self): self._testFromCheckpoint(n_classes=4) def _testFromCheckpointFloatLabels(self, n_classes): """Tests float labels for binary classification.""" # Create initial checkpoint. if n_classes > 2: return label = 0.8 age = 17 age_weight = [[2.0]] bias = [-35.0] initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias = 17 * 2. - 35. = -1. # loss = sigmoid_cross_entropy(logits, label) # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617 mock_opt = mock_optimizer(self, expected_loss=1.1132617) est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, optimizer=mock_opt, model_dir=self._model_dir) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(initial_global_step + num_steps, est.get_variable_value(mock_opt.iterations.name)) def testBinaryClassesFromCheckpointFloatLabels(self): self._testFromCheckpointFloatLabels(n_classes=2) def testMultiClassesFromCheckpointFloatLabels(self): self._testFromCheckpointFloatLabels(n_classes=4) def _testFromCheckpointMultiBatch(self, n_classes): # Create initial checkpoint. label = [1, 0] age = [17.0, 18.5] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[2.0]] if n_classes == 2 else (np.reshape( 2.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # For binary classifier: # logits = age * age_weight + bias # logits[0] = 17 * 2. - 35. = -1. # logits[1] = 18.5 * 2. - 35. = 2. # loss = sigmoid_cross_entropy(logits, label) # so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133 # loss[1] = (1 - 0) * -log ( 1- sigmoid(2) ) = 2.1269 # expected_loss = (loss[0] + loss[1]) / batch size (2) # For multi class classifier: # loss = cross_entropy(logits, label) # where logits = [17, 18.5] * age_weight + bias and label = [1, 0] # so, loss = 1 * -log ( soft_max(logits)[label] ) # expected_loss = (loss[0] + loss[1]) / batch size (2) if n_classes == 2: expected_loss = (1.3133 + 2.1269) / 2 else: logits = age_weight * np.reshape(age, (2, 1)) + bias logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) expected_loss = (expected_loss_0 + expected_loss_1) / 2 mock_opt = mock_optimizer(self, expected_loss=expected_loss) est = linear.LinearClassifierV2( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, optimizer=mock_opt, model_dir=self._model_dir) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train(input_fn=lambda: ({'age': (age)}, (label)), steps=num_steps) self.assertEqual(initial_global_step + num_steps, est.get_variable_value(mock_opt.iterations.name)) self._assert_checkpoint( n_classes, expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias) def testBinaryClassesFromCheckpointMultiBatch(self): self._testFromCheckpointMultiBatch(n_classes=2) def testMultiClassesFromCheckpointMultiBatch(self): self._testFromCheckpointMultiBatch(n_classes=4) class BaseLinearClassifierEvaluationTest(object): def __init__(self, linear_classifier_fn, fc_lib=feature_column_v2): self._linear_classifier_fn = linear_classifier_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _test_evaluation_for_simple_data(self, n_classes): label = 1 age = 1. # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[-11.0]] if n_classes == 2 else (np.reshape( -11.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-30.0] if n_classes == 2 else [-30.0] * n_classes with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = self._linear_classifier_fn( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=1) if n_classes == 2: # Binary classes: loss = sum(corss_entropy(41)) = 41. expected_metrics = { metric_keys.MetricKeys.LOSS: 41., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: 41., metric_keys.MetricKeys.ACCURACY: 0., metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0., metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, metric_keys.MetricKeys.AUC: 0., metric_keys.MetricKeys.AUC_PR: 1., } else: # Multi classes: loss = 1 * -log ( soft_max(logits)[label] ) logits = age_weight * age + bias logits_exp = np.exp(logits) softmax = logits_exp / logits_exp.sum() expected_loss = -1 * math.log(softmax[0, label]) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.ACCURACY: 0., } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_for_simple_data(self): self._test_evaluation_for_simple_data(n_classes=2) def test_multi_classes_evaluation_for_simple_data(self): self._test_evaluation_for_simple_data(n_classes=4) def _test_evaluation_batch(self, n_classes): """Tests evaluation for batch_size==2.""" label = [1, 0] age = [17., 18.] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[2.0]] if n_classes == 2 else (np.reshape( 2.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = self._linear_classifier_fn( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': (age) }, (label)), steps=1) if n_classes == 2: # Logits are (-1., 1.) labels are (1, 0). # Loss is # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 # loss for row 2: (1 - 0) * -log(1 - sigmoid(1)) = 1.3133 expected_loss = (1.3133 * 2) / 2 # Divided by batch size expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.ACCURACY: 0., metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.5, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, metric_keys.MetricKeys.AUC: 0., metric_keys.MetricKeys.AUC_PR: 0.3068, } else: # Multi classes: loss = 1 * -log ( soft_max(logits)[label] ) logits = age_weight * np.reshape(age, (2, 1)) + bias logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) expected_loss = (expected_loss_0 + expected_loss_1) / 2 # batch size expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.ACCURACY: 0., } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_batch(self): self._test_evaluation_batch(n_classes=2) def test_multi_classes_evaluation_batch(self): self._test_evaluation_batch(n_classes=4) def _test_evaluation_weights(self, n_classes): """Tests evaluation with weights.""" label = [1, 0] age = [17., 18.] weights = [1., 2.] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[2.0]] if n_classes == 2 else (np.reshape( 2.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = self._linear_classifier_fn( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, weight_column='w', model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': (age), 'w': (weights) }, (label)), steps=1) if n_classes == 2: # Logits are (-1., 1.) labels are (1, 0). # Loss is # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 # loss for row 2: (1 - 0) * -log(1 - sigmoid(1)) = 1.3133 # weights = [1., 2.] expected_loss = (1.3133 * (1. + 2.)) / 2 # Divided by batch size loss_mean = (1.3133 * (1. + 2.)) / (1.0 + 2.0) label_mean = np.average(label, weights=weights) logits = [-1, 1] logistics = sigmoid(np.array(logits)) predictions_mean = np.average(logistics, weights=weights) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, metric_keys.MetricKeys.ACCURACY: 0., metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, metric_keys.MetricKeys.LABEL_MEAN: label_mean, metric_keys.MetricKeys.ACCURACY_BASELINE: (max(label_mean, 1 - label_mean)), metric_keys.MetricKeys.AUC: 0., metric_keys.MetricKeys.AUC_PR: 0.1891, } else: # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) logits = age_weight * np.reshape(age, (2, 1)) + bias logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) loss_mean = np.average([expected_loss_0, expected_loss_1], weights=weights) expected_loss = (loss_mean * np.sum(weights)) / 2 # batch size expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.ACCURACY: 0., } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_weights(self): self._test_evaluation_weights(n_classes=2) def test_multi_classes_evaluation_weights(self): self._test_evaluation_weights(n_classes=4) class BaseLinearClassifierPredictTest(object): def __init__(self, linear_classifier_fn, fc_lib=feature_column_v2): self._linear_classifier_fn = linear_classifier_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _testPredictions(self, n_classes, label_vocabulary, label_output_fn): """Tests predict when all variables are one-dimensional.""" age = 1. # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[-11.0]] if n_classes == 2 else (np.reshape( -11.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [10.0] if n_classes == 2 else [10.0] * n_classes with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = self._linear_classifier_fn( feature_columns=(self._fc_lib.numeric_column('age'),), label_vocabulary=label_vocabulary, n_classes=n_classes, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'age': np.array([[age]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = list(est.predict(input_fn=predict_input_fn)) if n_classes == 2: scalar_logits = np.reshape(np.array(age_weight) * age + bias, (1,)).item() two_classes_logits = [0, scalar_logits] two_classes_logits_exp = np.exp(two_classes_logits) softmax = two_classes_logits_exp / two_classes_logits_exp.sum() expected_predictions = { 'class_ids': [0], 'all_class_ids': [0, 1], 'classes': [label_output_fn(0)], 'all_classes': [label_output_fn(0), label_output_fn(1)], 'logistic': [sigmoid(np.array(scalar_logits))], 'logits': [scalar_logits], 'probabilities': softmax, } else: onedim_logits = np.reshape(np.array(age_weight) * age + bias, (-1,)) class_ids = onedim_logits.argmax() all_class_ids = list(range(len(onedim_logits))) logits_exp = np.exp(onedim_logits) softmax = logits_exp / logits_exp.sum() expected_predictions = { 'class_ids': [class_ids], 'all_class_ids': all_class_ids, 'classes': [label_output_fn(class_ids)], 'all_classes': [label_output_fn(i) for i in all_class_ids], 'logits': onedim_logits, 'probabilities': softmax, } self.assertEqual(1, len(predictions)) # assertAllClose cannot handle byte type. self.assertEqual(expected_predictions['classes'], predictions[0]['classes']) expected_predictions.pop('classes') predictions[0].pop('classes') self.assertAllEqual(expected_predictions['all_classes'], predictions[0]['all_classes']) expected_predictions.pop('all_classes') predictions[0].pop('all_classes') self.assertAllClose( sorted_key_dict(expected_predictions), sorted_key_dict(predictions[0])) def testBinaryClassesWithoutLabelVocabulary(self): n_classes = 2 self._testPredictions( n_classes, label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def testBinaryClassesWithLabelVocabulary(self): n_classes = 2 self._testPredictions( n_classes, label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) def testMultiClassesWithoutLabelVocabulary(self): n_classes = 4 self._testPredictions( n_classes, label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def testMultiClassesWithLabelVocabulary(self): n_classes = 4 self._testPredictions( n_classes, label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) def testSparseCombiner(self): w_a = 2.0 w_b = 3.0 w_c = 5.0 bias = 5.0 with tf.Graph().as_default(): tf.Variable([[w_a], [w_b], [w_c]], name=LANGUAGE_WEIGHT_NAME) tf.Variable([bias], name=BIAS_NAME) tf.Variable( 1, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): return tf.compat.v1.data.Dataset.from_tensors({ 'language': tf.sparse.SparseTensor( values=['a', 'c', 'b', 'c'], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }) feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list( 'language', vocabulary_list=['a', 'b', 'c']),) # Check prediction for each sparse_combiner. # With sparse_combiner = 'sum', we have # logits_1 = w_a + w_c + bias # = 2.0 + 5.0 + 5.0 = 12.0 # logits_2 = w_b + w_c + bias # = 3.0 + 5.0 + 5.0 = 13.0 linear_classifier = self._linear_classifier_fn( feature_columns=feature_columns, model_dir=self._model_dir) predictions = linear_classifier.predict(input_fn=_input_fn) predicted_scores = list([x['logits'] for x in predictions]) self.assertAllClose([[12.0], [13.0]], predicted_scores) # With sparse_combiner = 'mean', we have # logits_1 = 1/2 * (w_a + w_c) + bias # = 1/2 * (2.0 + 5.0) + 5.0 = 8.5 # logits_2 = 1/2 * (w_b + w_c) + bias # = 1/2 * (3.0 + 5.0) + 5.0 = 9.0 linear_classifier = self._linear_classifier_fn( feature_columns=feature_columns, model_dir=self._model_dir, sparse_combiner='mean') predictions = linear_classifier.predict(input_fn=_input_fn) predicted_scores = list([x['logits'] for x in predictions]) self.assertAllClose([[8.5], [9.0]], predicted_scores) # With sparse_combiner = 'sqrtn', we have # logits_1 = sqrt(2)/2 * (w_a + w_c) + bias # = sqrt(2)/2 * (2.0 + 5.0) + 5.0 = 9.94974 # logits_2 = sqrt(2)/2 * (w_b + w_c) + bias # = sqrt(2)/2 * (3.0 + 5.0) + 5.0 = 10.65685 linear_classifier = self._linear_classifier_fn( feature_columns=feature_columns, model_dir=self._model_dir, sparse_combiner='sqrtn') predictions = linear_classifier.predict(input_fn=_input_fn) predicted_scores = list([x['logits'] for x in predictions]) self.assertAllClose([[9.94974], [10.65685]], predicted_scores) class BaseLinearClassifierIntegrationTest(object): def __init__(self, linear_classifier_fn, fc_lib=feature_column_v2): self._linear_classifier_fn = linear_classifier_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, prediction_length): feature_columns = [ self._fc_lib.numeric_column('x', shape=(input_dimension,)) ] est = self._linear_classifier_fn( feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['classes'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, 1), predictions.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _test_numpy_input_fn(self, n_classes): """Tests complete flow with numpy_input_fn.""" input_dimension = 4 batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) target = np.array([1] * batch_size) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=target, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=target, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( n_classes=n_classes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_numpy_input_fn(self): self._test_numpy_input_fn(n_classes=2) def test_multi_classes_numpy_input_fn(self): self._test_numpy_input_fn(n_classes=4) def _test_pandas_input_fn(self, n_classes): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return # Pandas DataFrame natually supports 1 dim data only. input_dimension = 1 batch_size = 10 data = np.array([1., 2., 3., 4.], dtype=np.float32) target = np.array([1, 0, 1, 0], dtype=np.int32) x = pd.DataFrame({'x': data}) y = pd.Series(target) prediction_length = 4 train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( n_classes=n_classes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_pandas_input_fn(self): self._test_pandas_input_fn(n_classes=2) def test_multi_classes_pandas_input_fn(self): self._test_pandas_input_fn(n_classes=4) def _test_input_fn_from_parse_example(self, n_classes): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) target = np.array([1] * batch_size, dtype=np.int64) serialized_examples = [] for x, y in zip(data, target): example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=x)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[y])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( n_classes=n_classes, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_input_fn_from_parse_example(self): self._test_input_fn_from_parse_example(n_classes=2) def test_multi_classes_input_fn_from_parse_example(self): self._test_input_fn_from_parse_example(n_classes=4) class BaseLinearLogitFnTest(object): def __init__(self, fc_lib=feature_column_v2): self._fc_lib = fc_lib def test_basic_logit_correctness(self): """linear_logit_fn simply wraps feature_column_lib.linear_model.""" age = self._fc_lib.numeric_column('age') with tf.Graph().as_default(): logit_fn = linear.linear_logit_fn_builder(units=2, feature_columns=[age]) logits = logit_fn(features={'age': [[23.], [31.]]}) bias_var = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, 'linear_model/bias_weights')[0] age_var = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, 'linear_model/age')[0] with tf.compat.v1.Session() as sess: sess.run([tf.compat.v1.initializers.global_variables()]) self.assertAllClose([[0., 0.], [0., 0.]], logits.eval()) sess.run(bias_var.assign([10., 5.])) self.assertAllClose([[10., 5.], [10., 5.]], logits.eval()) sess.run(age_var.assign([[2.0, 3.0]])) # [2 * 23 + 10, 3 * 23 + 5] = [56, 74]. # [2 * 31 + 10, 3 * 31 + 5] = [72, 98] self.assertAllClose([[56., 74.], [72., 98.]], logits.eval()) def test_compute_fraction_of_zero_v2(self): """Tests the calculation of sparsity.""" if self._fc_lib != feature_column_v2: return age = tf.feature_column.numeric_column('age') occupation = tf.feature_column.categorical_column_with_hash_bucket( 'occupation', hash_bucket_size=5) with tf.Graph().as_default(): model = linear.LinearModel( feature_columns=[age, occupation], units=3, name='linear_model') features = { 'age': [[23.], [31.]], 'occupation': [['doctor'], ['engineer']] } model(features) variables = model.variables variables.remove(model.bias) fraction_zero = linear._compute_fraction_of_zero(variables) age_var = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, 'linear_model/age')[0] with tf.compat.v1.Session() as sess: sess.run([tf.compat.v1.initializers.global_variables()]) # Upon initialization, all variables will be zero. self.assertAllClose(1, fraction_zero.eval()) sess.run(age_var.assign([[2.0, 0.0, -1.0]])) # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets # x 3-dim output) are zero. self.assertAllClose(16. / 18., fraction_zero.eval()) class BaseLinearWarmStartingTest(object): def __init__(self, _linear_classifier_fn, _linear_regressor_fn, fc_lib=feature_column_v2): self._linear_classifier_fn = _linear_classifier_fn self._linear_regressor_fn = _linear_regressor_fn self._fc_lib = fc_lib def setUp(self): # Create a directory to save our old checkpoint and vocabularies to. self._ckpt_and_vocab_dir = tempfile.mkdtemp() # Make a dummy input_fn. def _input_fn(): features = { 'age': [[23.], [31.]], 'age_in_years': [[23.], [31.]], 'occupation': [['doctor'], ['consultant']] } return features, [0, 1] self._input_fn = _input_fn def tearDown(self): # Clean up checkpoint / vocab dir. tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._ckpt_and_vocab_dir) def test_classifier_basic_warm_starting(self): """Tests correctness of LinearClassifier default warm-start.""" age = self._fc_lib.numeric_column('age') # Create a LinearClassifier and train to save a checkpoint. linear_classifier = self._linear_classifier_fn( feature_columns=[age], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') linear_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_linear_classifier = self._linear_classifier_fn( feature_columns=[age], n_classes=4, optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), warm_start_from=linear_classifier.model_dir) warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_linear_classifier.get_variable_names(): # Learning rate is also checkpointed in V2 optimizer. So we need to make # sure it uses the new value after warm started. if 'learning_rate' in variable_name: self.assertAllClose( 0.0, warm_started_linear_classifier.get_variable_value(variable_name)) else: self.assertAllClose( linear_classifier.get_variable_value(variable_name), warm_started_linear_classifier.get_variable_value(variable_name)) def test_regressor_basic_warm_starting(self): """Tests correctness of LinearRegressor default warm-start.""" age = self._fc_lib.numeric_column('age') # Create a LinearRegressor and train to save a checkpoint. linear_regressor = self._linear_regressor_fn( feature_columns=[age], model_dir=self._ckpt_and_vocab_dir, optimizer='SGD') linear_regressor.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearRegressor, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_linear_regressor = self._linear_regressor_fn( feature_columns=[age], optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), warm_start_from=linear_regressor.model_dir) warm_started_linear_regressor.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_linear_regressor.get_variable_names(): # Learning rate is also checkpointed in V2 optimizer. So we need to make # sure it uses the new value after warm started. if 'learning_rate' in variable_name: self.assertAllClose( 0.0, warm_started_linear_regressor.get_variable_value(variable_name)) else: self.assertAllClose( linear_regressor.get_variable_value(variable_name), warm_started_linear_regressor.get_variable_value(variable_name)) def test_warm_starting_selective_variables(self): """Tests selecting variables to warm-start.""" age = self._fc_lib.numeric_column('age') # Create a LinearClassifier and train to save a checkpoint. linear_classifier = self._linear_classifier_fn( feature_columns=[age], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') linear_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_linear_classifier = self._linear_classifier_fn( feature_columns=[age], n_classes=4, optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), # The provided regular expression will only warm-start the age variable # and not the bias. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, vars_to_warm_start='.*(age).*')) warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1) self.assertAllClose( linear_classifier.get_variable_value(AGE_WEIGHT_NAME), warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME)) # Bias should still be zero from initialization. self.assertAllClose( [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME)) def test_warm_starting_with_vocab_remapping_and_partitioning(self): """Tests warm-starting with vocab remapping and partitioning.""" vocab_list = ['doctor', 'lawyer', 'consultant'] vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab') with open(vocab_file, 'w') as f: f.write('\n'.join(vocab_list)) occupation = self._fc_lib.categorical_column_with_vocabulary_file( 'occupation', vocabulary_file=vocab_file, vocabulary_size=len(vocab_list)) # Create a LinearClassifier and train to save a checkpoint. linear_classifier = self._linear_classifier_fn( feature_columns=[occupation], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') linear_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). Use a new FeatureColumn with a # different vocabulary for occupation. new_vocab_list = ['doctor', 'consultant', 'engineer'] new_vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'new_occupation_vocab') with open(new_vocab_file, 'w') as f: f.write('\n'.join(new_vocab_list)) new_occupation = self._fc_lib.categorical_column_with_vocabulary_file( 'occupation', vocabulary_file=new_vocab_file, vocabulary_size=len(new_vocab_list)) # We can create our VocabInfo object from the new and old occupation # FeatureColumn's. occupation_vocab_info = estimator.VocabInfo( new_vocab=new_occupation.vocabulary_file, new_vocab_size=new_occupation.vocabulary_size, num_oov_buckets=new_occupation.num_oov_buckets, old_vocab=occupation.vocabulary_file, old_vocab_size=occupation.vocabulary_size, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. backup_initializer=tf.compat.v1.initializers.random_uniform( minval=0.39, maxval=0.39)) warm_started_linear_classifier = self._linear_classifier_fn( feature_columns=[occupation], n_classes=4, optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, var_name_to_vocab_info={ OCCUPATION_WEIGHT_NAME: occupation_vocab_info }, # Explicitly providing None here will only warm-start variables # referenced in var_name_to_vocab_info (the bias will not be # warm-started). vars_to_warm_start=None)) warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1) # 'doctor' was ID-0 and still ID-0. self.assertAllClose( linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[0, :], warm_started_linear_classifier.get_variable_value( OCCUPATION_WEIGHT_NAME)[0, :]) # 'consultant' was ID-2 and now ID-1. self.assertAllClose( linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[2, :], warm_started_linear_classifier.get_variable_value( OCCUPATION_WEIGHT_NAME)[1, :]) # 'engineer' is a new entry and should be initialized with the # backup_initializer in VocabInfo. self.assertAllClose([0.39] * 4, warm_started_linear_classifier.get_variable_value( OCCUPATION_WEIGHT_NAME)[2, :]) # Bias should still be zero (from initialization logic). self.assertAllClose( [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME)) def test_warm_starting_with_naming_change(self): """Tests warm-starting with a Tensor name remapping.""" age_in_years = self._fc_lib.numeric_column('age_in_years') # Create a LinearClassifier and train to save a checkpoint. linear_classifier = self._linear_classifier_fn( feature_columns=[age_in_years], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') linear_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_linear_classifier = self._linear_classifier_fn( feature_columns=[self._fc_lib.numeric_column('age')], n_classes=4, optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.0), # The 'age' variable correspond to the 'age_in_years' variable in the # previous model. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, var_name_to_prev_var_name={ AGE_WEIGHT_NAME: AGE_WEIGHT_NAME.replace('age', 'age_in_years') })) warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1) self.assertAllClose( linear_classifier.get_variable_value( AGE_WEIGHT_NAME.replace('age', 'age_in_years')), warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME)) # The bias is also warm-started (with no name remapping). self.assertAllClose( linear_classifier.get_variable_value(BIAS_NAME), warm_started_linear_classifier.get_variable_value(BIAS_NAME)) ================================================ FILE: tensorflow_estimator/python/estimator/canned/metric_keys.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Enum for model prediction keys.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow_estimator.python.estimator import model_fn class MetricKeys(object): """Metric key strings.""" LOSS = model_fn.LOSS_METRIC_KEY LOSS_MEAN = model_fn.AVERAGE_LOSS_METRIC_KEY LOSS_REGULARIZATION = 'regularization_loss' ACCURACY = 'accuracy' PRECISION = 'precision' RECALL = 'recall' # This is the best the model could do by always predicting one class. # Should be < ACCURACY in a trained model. ACCURACY_BASELINE = 'accuracy_baseline' AUC = 'auc' AUC_PR = 'auc_precision_recall' LABEL_MEAN = 'label/mean' PREDICTION_MEAN = 'prediction/mean' # The following require a threshold applied, should be float in range (0, 1). ACCURACY_AT_THRESHOLD = 'accuracy/positive_threshold_%g' PRECISION_AT_THRESHOLD = 'precision/positive_threshold_%g' RECALL_AT_THRESHOLD = 'recall/positive_threshold_%g' # The following require a constraint on a competing metric to be applied, # float in range (0, 1). PRECISION_AT_RECALL = 'precision_at_recall_%g' RECALL_AT_PRECISION = 'recall_at_precision_%g' SENSITIVITY_AT_SPECIFICITY = 'sensitivity_at_specificity_%g' SPECIFICITY_AT_SENSITIVITY = 'specificity_at_sensitivity_%g' # The following require a class id applied. PROBABILITY_MEAN_AT_CLASS = 'probability_mean/class%d' AUC_AT_CLASS = 'auc/class%d' AUC_PR_AT_CLASS = 'auc_precision_recall/class%d' # The following require a class name applied. PROBABILITY_MEAN_AT_NAME = 'probability_mean/%s' AUC_AT_NAME = 'auc/%s' AUC_PR_AT_NAME = 'auc_precision_recall/%s' ================================================ FILE: tensorflow_estimator/python/estimator/canned/optimizers.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Methods related to optimizers used in canned_estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import inspect from absl import logging import six import tensorflow as tf from tensorflow_estimator.python.estimator.util import tf_keras _OPTIMIZER_CLS_NAMES = { 'Adagrad': tf.compat.v1.train.AdagradOptimizer, 'Adam': tf.compat.v1.train.AdamOptimizer, 'Ftrl': tf.compat.v1.train.FtrlOptimizer, 'RMSProp': tf.compat.v1.train.RMSPropOptimizer, 'SGD': tf.compat.v1.train.GradientDescentOptimizer, } _OPTIMIZER_CLS_NAMES_V2 = { 'Adagrad': tf_keras.optimizers.legacy.Adagrad, 'Adam': tf_keras.optimizers.legacy.Adam, 'Ftrl': tf_keras.optimizers.legacy.Ftrl, 'RMSProp': tf_keras.optimizers.legacy.RMSprop, 'SGD': tf_keras.optimizers.legacy.SGD, } # The default learning rate of 0.05 is a historical artifact of the initial # implementation, but seems a reasonable choice. _LEARNING_RATE = 0.05 def get_optimizer_instance(opt, learning_rate=None): """Returns an optimizer instance. Supports the following types for the given `opt`: * An `Optimizer` instance: Returns the given `opt`. * A string: Creates an `Optimizer` subclass with the given `learning_rate`. Supported strings: * 'Adagrad': Returns an `AdagradOptimizer`. * 'Adam': Returns an `AdamOptimizer`. * 'Ftrl': Returns an `FtrlOptimizer`. * 'RMSProp': Returns an `RMSPropOptimizer`. * 'SGD': Returns a `GradientDescentOptimizer`. Args: opt: An `Optimizer` instance, or string, as discussed above. learning_rate: A float. Only used if `opt` is a string. Returns: An `Optimizer` instance. Raises: ValueError: If `opt` is an unsupported string. ValueError: If `opt` is a supported string but `learning_rate` was not specified. ValueError: If `opt` is none of the above types. """ if isinstance(opt, six.string_types): if opt in six.iterkeys(_OPTIMIZER_CLS_NAMES): if not learning_rate: raise ValueError('learning_rate must be specified when opt is string.') return _OPTIMIZER_CLS_NAMES[opt](learning_rate=learning_rate) raise ValueError( 'Unsupported optimizer name: {}. Supported names are: {}'.format( opt, tuple(sorted(six.iterkeys(_OPTIMIZER_CLS_NAMES))))) if callable(opt): opt = opt() if not isinstance(opt, tf.compat.v1.train.Optimizer): raise ValueError( 'The given object is not an Optimizer instance. Given: {}'.format(opt)) return opt def _optimizer_has_default_learning_rate(opt): signature = inspect.getfullargspec(opt.__init__) default_name_to_value = dict(zip(signature.args[::-1], signature.defaults)) for name in signature.kwonlyargs: if name in signature.kwonlydefaults: default_name_to_value[name] = signature.kwonlydefaults[name] return 'learning_rate' in default_name_to_value def get_optimizer_instance_v2(opt, learning_rate=None): """Returns an optimizer_v2.OptimizerV2 instance. Supports the following types for the given `opt`: * An `optimizer_v2.OptimizerV2` instance: Returns the given `opt`. * A string: Creates an `optimizer_v2.OptimizerV2` subclass with the given `learning_rate`. Supported strings: * 'Adagrad': Returns an tf_keras.optimizers.Adagrad. * 'Adam': Returns an tf_keras.optimizers.Adam. * 'Ftrl': Returns an tf_keras.optimizers.Ftrl. * 'RMSProp': Returns an tf_keras.optimizers.RMSProp. * 'SGD': Returns a tf_keras.optimizers.SGD. Args: opt: An `tf_keras.optimizers.Optimizer` instance, or string, as discussed above. learning_rate: A float. Only used if `opt` is a string. If None, and opt is string, it will use the default learning_rate of the optimizer. Returns: An `tf_keras.optimizers.Optimizer` instance. Raises: ValueError: If `opt` is an unsupported string. ValueError: If `opt` is a supported string but `learning_rate` was not specified. ValueError: If `opt` is none of the above types. """ if isinstance(opt, six.string_types): if opt in six.iterkeys(_OPTIMIZER_CLS_NAMES_V2): if not learning_rate: if _optimizer_has_default_learning_rate(_OPTIMIZER_CLS_NAMES_V2[opt]): return _OPTIMIZER_CLS_NAMES_V2[opt]() else: return _OPTIMIZER_CLS_NAMES_V2[opt](learning_rate=_LEARNING_RATE) return _OPTIMIZER_CLS_NAMES_V2[opt](learning_rate=learning_rate) raise ValueError( 'Unsupported optimizer name: {}. Supported names are: {}'.format( opt, tuple(sorted(six.iterkeys(_OPTIMIZER_CLS_NAMES_V2))))) if callable(opt): opt = opt() if isinstance(opt, tf_keras.optimizers.experimental.Optimizer): if tf.executing_eagerly(): logging.warning( 'You are using `tf_keras.optimizers.experimental.Optimizer` in TF ' 'estimator, which only supports ' '`tf_keras.optimizers.legacy.Optimizer`. Automatically converting ' 'your optimizer to `tf_keras.optimizers.legacy.Optimizer`.') opt = tf_keras.__internal__.optimizers.convert_to_legacy_optimizer(opt) else: raise ValueError('Please set your optimizer as an instance of ' '`tf_keras.optimizers.legacy.Optimizer`, e.g., ' f'`tf_keras.optimizers.legacy.{opt.__class__.__name__}`.' f'Received optimizer type: {type(opt)}.') if not isinstance( opt, (tf_keras.optimizers.legacy.Optimizer, tf_keras.optimizers.Optimizer)): raise ValueError( 'The given object is not a tf_keras.optimizers.Optimizer instance.' ' Given: {}'.format(opt)) return opt ================================================ FILE: tensorflow_estimator/python/estimator/canned/optimizers_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for optimizers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import optimizers class _TestOptimizer(tf.compat.v1.train.Optimizer): def __init__(self): super(_TestOptimizer, self).__init__( use_locking=False, name='TestOptimizer') class GetOptimizerInstance(tf.test.TestCase): def test_unsupported_name(self): with self.assertRaisesRegex( ValueError, 'Unsupported optimizer name: unsupported_name'): optimizers.get_optimizer_instance('unsupported_name', learning_rate=0.1) def test_supported_name_but_learning_rate_none(self): with self.assertRaisesRegex( ValueError, 'learning_rate must be specified when opt is string'): optimizers.get_optimizer_instance('Adagrad', learning_rate=None) def test_keras_optimizer_after_tf_2_11(self): new_opt = tf_keras.optimizers.Adagrad() # In eager mode it should automatically convert to legacy optimizer. opt = optimizers.get_optimizer_instance_v2(new_opt, learning_rate=0.1) self.assertIsInstance(opt, tf_keras.optimizers.legacy.Adagrad) # In graph mode errors should be thrown. @tf.function def foo(): with self.assertRaisesRegex( ValueError, r'Please set your.*tf_keras\.optimizers\.legacy\.Adagrad.*'): optimizers.get_optimizer_instance_v2(new_opt, learning_rate=0.1) foo() def test_adagrad(self): opt = optimizers.get_optimizer_instance('Adagrad', learning_rate=0.1) self.assertIsInstance(opt, tf.compat.v1.train.AdagradOptimizer) self.assertAlmostEqual(0.1, opt._learning_rate) def test_adam(self): opt = optimizers.get_optimizer_instance('Adam', learning_rate=0.1) self.assertIsInstance(opt, tf.compat.v1.train.AdamOptimizer) self.assertAlmostEqual(0.1, opt._lr) def test_ftrl(self): opt = optimizers.get_optimizer_instance('Ftrl', learning_rate=0.1) self.assertIsInstance(opt, tf.compat.v1.train.FtrlOptimizer) self.assertAlmostEqual(0.1, opt._learning_rate) def test_rmsprop(self): opt = optimizers.get_optimizer_instance('RMSProp', learning_rate=0.1) self.assertIsInstance(opt, tf.compat.v1.train.RMSPropOptimizer) self.assertAlmostEqual(0.1, opt._learning_rate) def test_sgd(self): opt = optimizers.get_optimizer_instance('SGD', learning_rate=0.1) self.assertIsInstance(opt, tf.compat.v1.train.GradientDescentOptimizer) self.assertAlmostEqual(0.1, opt._learning_rate) def test_object(self): opt = optimizers.get_optimizer_instance(_TestOptimizer()) self.assertIsInstance(opt, _TestOptimizer) def test_object_invalid(self): with self.assertRaisesRegex( ValueError, 'The given object is not an Optimizer instance'): optimizers.get_optimizer_instance((1, 2, 3)) def test_callable(self): def _optimizer_fn(): return _TestOptimizer() opt = optimizers.get_optimizer_instance(_optimizer_fn) self.assertIsInstance(opt, _TestOptimizer) def test_lambda(self): opt = optimizers.get_optimizer_instance(lambda: _TestOptimizer()) # pylint: disable=unnecessary-lambda self.assertIsInstance(opt, _TestOptimizer) def test_callable_returns_invalid(self): def _optimizer_fn(): return (1, 2, 3) with self.assertRaisesRegex( ValueError, 'The given object is not an Optimizer instance'): optimizers.get_optimizer_instance(_optimizer_fn) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/optimizers_test_v2.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for optimizers.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import optimizers class _TestOptimizerV2(tf_keras.optimizers.legacy.Optimizer): def __init__(self): super(_TestOptimizerV2, self).__init__(name='TestOptimizer') def get_config(self): pass class GetOptimizerInstanceV2(tf.test.TestCase): """Tests for Optimizer V2.""" def test_unsupported_name(self): with self.assertRaisesRegexp( ValueError, 'Unsupported optimizer name: unsupported_name'): optimizers.get_optimizer_instance_v2( 'unsupported_name', learning_rate=0.1) def test_adagrad_but_no_learning_rate(self): with self.cached_session(): opt = optimizers.get_optimizer_instance_v2('Adagrad') # The creation of variables in optimizer_v2 is deferred to when it's # called, so we need to manually create it here. Same for all other tests. self.assertIsInstance(opt.learning_rate, tf.Variable) self.evaluate(tf.compat.v1.initializers.global_variables()) self.assertIsInstance( opt, (tf_keras.optimizers.Adagrad, tf_keras.optimizers.legacy.Adagrad)) self.assertAlmostEqual(0.001, self.evaluate(opt.learning_rate)) def test_adam_but_no_learning_rate(self): with self.cached_session(): opt = optimizers.get_optimizer_instance_v2('Adam') self.assertIsInstance(opt.learning_rate, tf.Variable) self.evaluate(tf.compat.v1.initializers.global_variables()) self.assertIsInstance( opt, (tf_keras.optimizers.Adam, tf_keras.optimizers.legacy.Adam)) self.assertAlmostEqual(0.001, self.evaluate(opt.learning_rate)) def test_adagrad(self): with self.cached_session(): opt = optimizers.get_optimizer_instance_v2('Adagrad', learning_rate=0.1) self.assertIsInstance(opt.learning_rate, tf.Variable) self.evaluate(tf.compat.v1.initializers.global_variables()) self.assertIsInstance( opt, (tf_keras.optimizers.Adagrad, tf_keras.optimizers.legacy.Adagrad)) self.assertAlmostEqual(0.1, self.evaluate(opt.learning_rate)) def test_adam(self): with self.cached_session(): opt = optimizers.get_optimizer_instance_v2('Adam', learning_rate=0.1) self.assertIsInstance(opt.learning_rate, tf.Variable) self.evaluate(tf.compat.v1.initializers.global_variables()) self.assertIsInstance( opt, (tf_keras.optimizers.Adam, tf_keras.optimizers.legacy.Adam)) self.assertAlmostEqual(0.1, self.evaluate(opt.learning_rate)) def test_ftrl(self): with self.cached_session(): opt = optimizers.get_optimizer_instance_v2('Ftrl', learning_rate=0.1) self.assertIsInstance(opt.learning_rate, tf.Variable) self.evaluate(tf.compat.v1.initializers.global_variables()) self.assertIsInstance( opt, (tf_keras.optimizers.Ftrl, tf_keras.optimizers.legacy.Ftrl)) self.assertAlmostEqual(0.1, self.evaluate(opt.learning_rate)) def test_rmsprop(self): with self.cached_session(): opt = optimizers.get_optimizer_instance_v2('RMSProp', learning_rate=0.1) self.assertIsInstance(opt.learning_rate, tf.Variable) self.evaluate(tf.compat.v1.initializers.global_variables()) self.assertIsInstance( opt, (tf_keras.optimizers.RMSprop, tf_keras.optimizers.legacy.RMSprop)) self.assertAlmostEqual(0.1, self.evaluate(opt.learning_rate)) def test_sgd(self): with self.cached_session(): opt = optimizers.get_optimizer_instance_v2('SGD', learning_rate=0.1) self.assertIsInstance(opt.learning_rate, tf.Variable) self.evaluate(tf.compat.v1.initializers.global_variables()) self.assertIsInstance( opt, (tf_keras.optimizers.SGD, tf_keras.optimizers.legacy.SGD)) self.assertAlmostEqual(0.1, self.evaluate(opt.learning_rate)) def test_object(self): opt = optimizers.get_optimizer_instance_v2(_TestOptimizerV2()) self.assertIsInstance(opt, _TestOptimizerV2) def test_object_invalid(self): with self.assertRaisesRegexp( ValueError, 'The given object is not a tf_keras.optimizers.Optimizer instance'): optimizers.get_optimizer_instance_v2((1, 2, 3)) def test_callable(self): def _optimizer_fn(): return _TestOptimizerV2() opt = optimizers.get_optimizer_instance_v2(_optimizer_fn) self.assertIsInstance(opt, _TestOptimizerV2) def test_lambda(self): opt = optimizers.get_optimizer_instance_v2(lambda: _TestOptimizerV2()) # pylint: disable=unnecessary-lambda self.assertIsInstance(opt, _TestOptimizerV2) def test_callable_returns_invalid(self): def _optimizer_fn(): return (1, 2, 3) with self.assertRaisesRegexp( ValueError, 'The given object is not a tf_keras.optimizers.Optimizer instance'): optimizers.get_optimizer_instance_v2(_optimizer_fn) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/parsing_utils.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Parsing related helper function to be used in `input_fn`.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import tensorflow as tf from tensorflow.python.feature_column import feature_column_lib as fc from tensorflow_estimator.python.estimator.estimator_export import estimator_export @estimator_export('estimator.classifier_parse_example_spec', v1=[]) def classifier_parse_example_spec_v2(feature_columns, label_key, label_dtype=tf.dtypes.int64, label_default=None, weight_column=None): """Generates parsing spec for tf.parse_example to be used with classifiers. If users keep data in tf.Example format, they need to call tf.parse_example with a proper feature spec. There are two main things that this utility helps: * Users need to combine parsing spec of features with labels and weights (if any) since they are all parsed from same tf.Example instance. This utility combines these specs. * It is difficult to map expected label by a classifier such as `DNNClassifier` to corresponding tf.parse_example spec. This utility encodes it by getting related information from users (key, dtype). Example output of parsing spec: ```python # Define features and transformations feature_b = tf.feature_column.numeric_column(...) feature_c_bucketized = tf.feature_column.bucketized_column( tf.feature_column.numeric_column("feature_c"), ...) feature_a_x_feature_c = tf.feature_column.crossed_column( columns=["feature_a", feature_c_bucketized], ...) feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c] parsing_spec = tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dtype=tf.string) # For the above example, classifier_parse_example_spec would return the dict: assert parsing_spec == { "feature_a": parsing_ops.VarLenFeature(tf.string), "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32), "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32) "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.string) } ``` Example usage with a classifier: ```python feature_columns = # define features via tf.feature_column estimator = DNNClassifier( n_classes=1000, feature_columns=feature_columns, weight_column='example-weight', label_vocabulary=['photos', 'keep', ...], hidden_units=[256, 64, 16]) # This label configuration tells the classifier the following: # * weights are retrieved with key 'example-weight' # * label is string and can be one of the following ['photos', 'keep', ...] # * integer id for label 'photos' is 0, 'keep' is 1, ... # Input builders def input_fn_train(): # Returns a tuple of features and labels. features = tf.contrib.learn.read_keyed_batch_features( file_pattern=train_files, batch_size=batch_size, # creates parsing configuration for tf.parse_example features=tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dtype=tf.string, weight_column='example-weight'), reader=tf.RecordIOReader) labels = features.pop('my-label') return features, labels estimator.train(input_fn=input_fn_train) ``` Args: feature_columns: An iterable containing all feature columns. All items should be instances of classes derived from `FeatureColumn`. label_key: A string identifying the label. It means tf.Example stores labels with this key. label_dtype: A `tf.dtype` identifies the type of labels. By default it is `tf.int64`. If user defines a `label_vocabulary`, this should be set as `tf.string`. `tf.float32` labels are only supported for binary classification. label_default: used as label if label_key does not exist in given tf.Example. An example usage: let's say `label_key` is 'clicked' and tf.Example contains clicked data only for positive examples in following format `key:clicked, value:1`. This means that if there is no data with key 'clicked' it should count as negative example by setting `label_deafault=0`. Type of this value should be compatible with `label_dtype`. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. Returns: A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature` value. Raises: ValueError: If label is used in `feature_columns`. ValueError: If weight_column is used in `feature_columns`. ValueError: If any of the given `feature_columns` is not a `_FeatureColumn` instance. ValueError: If `weight_column` is not a `NumericColumn` instance. ValueError: if label_key is None. """ parsing_spec = tf.compat.v2.feature_column.make_parse_example_spec(feature_columns) label_spec = tf.io.FixedLenFeature((1,), label_dtype, label_default) return _add_label_and_weight_to_parsing_spec( parsing_spec=parsing_spec, label_key=label_key, label_spec=label_spec, weight_column=weight_column) @estimator_export('estimator.regressor_parse_example_spec', v1=[]) def regressor_parse_example_spec_v2(feature_columns, label_key, label_dtype=tf.dtypes.float32, label_default=None, label_dimension=1, weight_column=None): """Generates parsing spec for tf.parse_example to be used with regressors. If users keep data in tf.Example format, they need to call tf.parse_example with a proper feature spec. There are two main things that this utility helps: * Users need to combine parsing spec of features with labels and weights (if any) since they are all parsed from same tf.Example instance. This utility combines these specs. * It is difficult to map expected label by a regressor such as `DNNRegressor` to corresponding tf.parse_example spec. This utility encodes it by getting related information from users (key, dtype). Example output of parsing spec: ```python # Define features and transformations feature_b = tf.feature_column.numeric_column(...) feature_c_bucketized = tf.feature_column.bucketized_column( tf.feature_column.numeric_column("feature_c"), ...) feature_a_x_feature_c = tf.feature_column.crossed_column( columns=["feature_a", feature_c_bucketized], ...) feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c] parsing_spec = tf.estimator.regressor_parse_example_spec( feature_columns, label_key='my-label') # For the above example, regressor_parse_example_spec would return the dict: assert parsing_spec == { "feature_a": parsing_ops.VarLenFeature(tf.string), "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32), "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32) "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.float32) } ``` Example usage with a regressor: ```python feature_columns = # define features via tf.feature_column estimator = DNNRegressor( hidden_units=[256, 64, 16], feature_columns=feature_columns, weight_column='example-weight', label_dimension=3) # This label configuration tells the regressor the following: # * weights are retrieved with key 'example-weight' # * label is a 3 dimension tensor with float32 dtype. # Input builders def input_fn_train(): # Returns a tuple of features and labels. features = tf.contrib.learn.read_keyed_batch_features( file_pattern=train_files, batch_size=batch_size, # creates parsing configuration for tf.parse_example features=tf.estimator.classifier_parse_example_spec( feature_columns, label_key='my-label', label_dimension=3, weight_column='example-weight'), reader=tf.RecordIOReader) labels = features.pop('my-label') return features, labels estimator.train(input_fn=input_fn_train) ``` Args: feature_columns: An iterable containing all feature columns. All items should be instances of classes derived from `_FeatureColumn`. label_key: A string identifying the label. It means tf.Example stores labels with this key. label_dtype: A `tf.dtype` identifies the type of labels. By default it is `tf.float32`. label_default: used as label if label_key does not exist in given tf.Example. By default default_value is none, which means `tf.parse_example` will error out if there is any missing label. label_dimension: Number of regression targets per example. This is the size of the last dimension of the labels and logits `Tensor` objects (typically, these have shape `[batch_size, label_dimension]`). weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. Returns: A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature` value. Raises: ValueError: If label is used in `feature_columns`. ValueError: If weight_column is used in `feature_columns`. ValueError: If any of the given `feature_columns` is not a `_FeatureColumn` instance. ValueError: If `weight_column` is not a `NumericColumn` instance. ValueError: if label_key is None. """ parsing_spec = tf.compat.v2.feature_column.make_parse_example_spec(feature_columns) label_spec = tf.io.FixedLenFeature((label_dimension,), label_dtype, label_default) return _add_label_and_weight_to_parsing_spec( parsing_spec=parsing_spec, label_key=label_key, label_spec=label_spec, weight_column=weight_column) def _add_label_and_weight_to_parsing_spec(parsing_spec, label_key, label_spec, weight_column=None): """Adds label and weight spec to given parsing spec. Args: parsing_spec: A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature` to which label and weight spec are added. label_key: A string identifying the label. It means tf.Example stores labels with this key. label_spec: A `FixedLenFeature`. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. Returns: A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature` value. """ if label_key in parsing_spec: raise ValueError('label should not be used as feature. ' 'label_key: {}, features: {}'.format( label_key, parsing_spec.keys())) parsing_spec[label_key] = label_spec if weight_column is None: return parsing_spec if isinstance(weight_column, six.string_types): weight_column = tf.feature_column.numeric_column(weight_column) if not isinstance(weight_column, fc.NumericColumn): raise ValueError('weight_column should be an instance of ' 'tf.feature_column.numeric_column. ' 'Given type: {} value: {}'.format( type(weight_column), weight_column)) if weight_column.key in parsing_spec: raise ValueError('weight_column should not be used as feature. ' 'weight_column: {}, features: {}'.format( weight_column.key, parsing_spec.keys())) parsing_spec.update(weight_column.parse_example_spec) return parsing_spec @estimator_export(v1=['estimator.classifier_parse_example_spec']) def classifier_parse_example_spec(feature_columns, label_key, label_dtype=tf.dtypes.int64, label_default=None, weight_column=None): parsing_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) label_spec = tf.io.FixedLenFeature((1,), label_dtype, label_default) return _add_label_and_weight_to_parsing_spec( parsing_spec=parsing_spec, label_key=label_key, label_spec=label_spec, weight_column=weight_column) classifier_parse_example_spec.__doc__ = classifier_parse_example_spec_v2.__doc__ @estimator_export(v1=['estimator.regressor_parse_example_spec']) def regressor_parse_example_spec( feature_columns, # pylint: disable=missing-docstring label_key, label_dtype=tf.dtypes.float32, label_default=None, label_dimension=1, weight_column=None): parsing_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) label_spec = tf.io.FixedLenFeature((label_dimension,), label_dtype, label_default) return _add_label_and_weight_to_parsing_spec( parsing_spec=parsing_spec, label_key=label_key, label_spec=label_spec, weight_column=weight_column) regressor_parse_example_spec.__doc__ = regressor_parse_example_spec_v2.__doc__ ================================================ FILE: tensorflow_estimator/python/estimator/canned/parsing_utils_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for parsing_utils.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow_estimator.python.estimator.canned import parsing_utils class BaseClassifierParseExampleSpec(object): """Tests tf.estimator.classifier_parse_example_spec.""" def __init__(self, parse_example_fn): self._parse_example_fn = parse_example_fn def test_defaults(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b') expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.int64), } self.assertDictEqual(expected_spec, parsing_spec) def test_string(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', label_dtype=tf.dtypes.string) expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.string), } self.assertDictEqual(expected_spec, parsing_spec) # TODO(ispir): test label_default_value compatibility with label_dtype def test_label_default_value(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', label_default=0) expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.int64, default_value=0), } self.assertDictEqual(expected_spec, parsing_spec) def test_weight_column_as_string(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', weight_column='c') expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.int64), 'c': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec) def test_weight_column_as_numeric_column(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', weight_column=tf.feature_column.numeric_column('c')) expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.int64), 'c': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec) def test_label_key_should_not_be_used_as_feature(self): with self.assertRaisesRegexp(ValueError, 'label should not be used as feature'): self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='a') def test_weight_column_should_not_be_used_as_feature(self): with self.assertRaisesRegexp(ValueError, 'weight_column should not be used as feature'): self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', weight_column=tf.feature_column.numeric_column('a')) def test_weight_column_should_be_a_numeric_column(self): with self.assertRaisesRegexp(ValueError, 'tf.feature_column.numeric_column'): not_a_numeric_column = 3 self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', weight_column=not_a_numeric_column) class ClassifierParseExampleSpecV2(BaseClassifierParseExampleSpec, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) BaseClassifierParseExampleSpec.__init__( self, parsing_utils.classifier_parse_example_spec_v2) def test_non_v1_feature_column(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.sequence_numeric_column('a')], label_key='b') expected_spec = { 'a': tf.io.VarLenFeature(dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.int64), } self.assertDictEqual(expected_spec, parsing_spec) class ClassifierParseExampleSpecV1(BaseClassifierParseExampleSpec, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) BaseClassifierParseExampleSpec.__init__( self, parsing_utils.classifier_parse_example_spec) class BaseRegressorParseExampleSpec(object): """Tests tf.estimator.classifier_parse_example_spec.""" def __init__(self, parse_example_fn): self._parse_example_fn = parse_example_fn def test_defaults(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b') expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec) def test_int64(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', label_dtype=tf.dtypes.int64) expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.int64), } self.assertDictEqual(expected_spec, parsing_spec) def test_label_default_value(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', label_default=0.) expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32, default_value=0.), } self.assertDictEqual(expected_spec, parsing_spec) def test_label_dimension(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', label_dimension=3) expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((3,), dtype=tf.dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec) def test_weight_column_as_string(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', weight_column='c') expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'c': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec) def test_weight_column_as_numeric_column(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', weight_column=tf.feature_column.numeric_column('c')) expected_spec = { 'a': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), 'c': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec) def test_label_key_should_not_be_used_as_feature(self): with self.assertRaisesRegexp(ValueError, 'label should not be used as feature'): self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='a') def test_weight_column_should_not_be_used_as_feature(self): with self.assertRaisesRegexp(ValueError, 'weight_column should not be used as feature'): self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', weight_column=tf.feature_column.numeric_column('a')) def test_weight_column_should_be_a_numeric_column(self): with self.assertRaisesRegexp(ValueError, 'tf.feature_column.numeric_column'): not_a_numeric_column = 3 self._parse_example_fn( feature_columns=[tf.feature_column.numeric_column('a')], label_key='b', weight_column=not_a_numeric_column) class RegressorParseExampleSpecV2(BaseRegressorParseExampleSpec, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) BaseRegressorParseExampleSpec.__init__( self, parsing_utils.regressor_parse_example_spec_v2) def test_non_v1_feature_column(self): parsing_spec = self._parse_example_fn( feature_columns=[tf.feature_column.sequence_numeric_column('a')], label_key='b') expected_spec = { 'a': tf.io.VarLenFeature(dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature((1,), dtype=tf.dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec) class RegressorParseExampleSpecV1(BaseRegressorParseExampleSpec, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) BaseRegressorParseExampleSpec.__init__( self, parsing_utils.regressor_parse_example_spec) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/prediction_keys.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Enum for model prediction keys.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function class PredictionKeys(object): """Enum for canonical model prediction keys. The following values are defined: PREDICTIONS: Used by models that predict values, such as regressor models. """ CLASSES = 'classes' CLASS_IDS = 'class_ids' ALL_CLASSES = 'all_classes' ALL_CLASS_IDS = 'all_class_ids' LOGISTIC = 'logistic' LOGITS = 'logits' PREDICTIONS = 'predictions' PROBABILITIES = 'probabilities' TOP_K = 'top_k' ================================================ FILE: tensorflow_estimator/python/estimator/canned/rnn.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Recurrent Neural Network model and estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import tensorflow as tf from tensorflow.python.feature_column import feature_column_lib as fc from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.util import tf_keras_v1 from tensorflow_estimator.python.estimator.canned import optimizers from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.head import binary_class_head as binary_head_lib from tensorflow_estimator.python.estimator.head import multi_class_head as multi_head_lib from tensorflow_estimator.python.estimator.head import sequential_head as seq_head_lib # The defaults are historical artifacts of the initial implementation, but seem # reasonable choices. # TODO(aarg): Also apply default learning rate and clipping to Keras model so # they apply when the optimizer is set via `compile` and the model trained via # the `fit` method. _DEFAULT_LEARNING_RATE = 0.05 _DEFAULT_CLIP_NORM = 5.0 _SIMPLE_RNN_KEY = 'simple_rnn' _LSTM_KEY = 'lstm' _GRU_KEY = 'gru' _CELL_TYPE_TO_LAYER_MAPPING = { _LSTM_KEY: tf_keras.layers.LSTM, _GRU_KEY: tf_keras.layers.GRU, _SIMPLE_RNN_KEY: tf_keras.layers.SimpleRNN } _CELL_TYPES = { _LSTM_KEY: tf_keras.layers.LSTMCell, _GRU_KEY: tf_keras.layers.GRUCell, _SIMPLE_RNN_KEY: tf_keras.layers.SimpleRNNCell } # Indicates no value was provided by the user to a kwarg. USE_DEFAULT = object() def _single_rnn_cell(units, cell_type): """Initializes a RNN cell.""" cell_type = _CELL_TYPES.get(cell_type, cell_type) if not callable(cell_type): raise ValueError( '`cell_type` should be a class producing a RNN cell, or a string ' 'specifying the cell type. Supported strings are: {}.'.format( [_SIMPLE_RNN_KEY, _LSTM_KEY, _GRU_KEY])) cell = cell_type(units=units) if hasattr(cell, '_enable_caching_device'): # Enable the caching_device to speed up the repeative varaible read in # tf.while. This should work only with tf.session. cell._enable_caching_device = True # pylint: disable=protected-access if not hasattr(cell, 'call') or not hasattr(cell, 'state_size'): raise ValueError('RNN cell should have a `call` and `state_size` method.') return cell def _make_rnn_cell_fn(units, cell_type=_SIMPLE_RNN_KEY): """Convenience function to create `rnn_cell_fn` for canned RNN Estimators. Args: units: Iterable of integer number of hidden units per RNN layer. cell_type: A class producing a RNN cell or a string specifying the cell type. Supported strings are: `'simple_rnn'`, `'lstm'`, and `'gru'`. Returns: A function that returns a RNN cell. Raises: ValueError: If cell_type is not supported. """ def rnn_cell_fn(): cells = [_single_rnn_cell(n, cell_type) for n in units] if len(cells) == 1: return cells[0] return cells return rnn_cell_fn class RNNModel(tf_keras.models.Model): """A Keras RNN model. Composition of layers to compute logits from RNN model, along with training and inference features. See `tf_keras.models.Model` for more details on Keras models. Example of usage: ```python rating = tf.feature_column.embedding_column( tf.feature_column.sequence_categorical_column_with_identity('rating', 5), 10) rnn_layer = tf_keras.layers.SimpleRNN(20) rnn_model = RNNModel(rnn_layer, units=1, sequence_feature_columns=[rating]) rnn_model.compile( tf_keras.optimizers.Adam(), loss=tf_keras.losses.MeanSquaredError()) rnn_model.fit(generator(), epochs=10, steps_per_epoch=100) rnn_model.predict({'rating': np.array([[0, 1], [2, 3]])}, steps=1) ``` """ # TODO(aarg): Update arguments to support multiple rnn layers. def __init__(self, rnn_layer, units, sequence_feature_columns, context_feature_columns=None, activation=None, return_sequences=False, **kwargs): """Initializes a RNNModel instance. Args: rnn_layer: A Keras RNN layer. units: An int indicating the dimension of the logit layer, and of the model output. sequence_feature_columns: An iterable containing the `FeatureColumn`s that represent sequential input. All items in the set should either be sequence columns (e.g. `sequence_numeric_column`) or constructed from one (e.g. `embedding_column` with `sequence_categorical_column_*` as input). context_feature_columns: An iterable containing the `FeatureColumn`s for contextual input. The data represented by these columns will be replicated and given to the RNN at each timestep. These columns must be instances of classes derived from `DenseColumn` such as `numeric_column`, not the sequential variants. activation: Activation function to apply to the logit layer (for instance `tf_keras.activations.sigmoid`). If you don't specify anything, no activation is applied. return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. **kwargs: Additional arguments. Raises: ValueError: If `units` is not an int. """ super(RNNModel, self).__init__(**kwargs) if not isinstance(units, int): raise ValueError('units must be an int. Given type: {}'.format( type(units))) self._return_sequences = return_sequences self._sequence_feature_columns = sequence_feature_columns self._context_feature_columns = context_feature_columns self._sequence_features_layer = tf_keras.experimental.SequenceFeatures( sequence_feature_columns) self._dense_features_layer = None if context_feature_columns: self._dense_features_layer = tf_keras_v1.layers.DenseFeatures( context_feature_columns) self._rnn_layer = rnn_layer self._logits_layer = tf_keras.layers.Dense( units=units, activation=activation, name='logits') def call(self, inputs, training=None): """Computes the RNN output. By default no activation is applied and the logits are returned. To output probabilites an activation needs to be specified such as sigmoid or softmax. Args: inputs: A dict mapping keys to input tensors. training: Python boolean indicating whether the layers should behave in training mode or in inference mode. This argument is passed to the model's layers. This is for instance used with cells that use dropout. Returns: A `Tensor` with logits from RNN model. It has shape (batch_size, time_step, logits_size) if `return_sequence` is `True`, (batch_size, logits_size) otherwise. """ if not isinstance(inputs, dict): raise ValueError('inputs should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(inputs))) with ops.name_scope('sequence_input_layer'): try: sequence_input, sequence_length = self._sequence_features_layer( inputs, training=training) except TypeError: sequence_input, sequence_length = self._sequence_features_layer(inputs) tf.compat.v1.summary.histogram('sequence_length', sequence_length) if self._context_feature_columns: try: context_input = self._dense_features_layer(inputs, training=training) except TypeError: context_input = self._dense_features_layer(inputs) sequence_input = fc.concatenate_context_input( context_input, sequence_input=sequence_input) sequence_length_mask = tf.sequence_mask(sequence_length) rnn_outputs = self._rnn_layer( sequence_input, mask=sequence_length_mask, training=training) logits = self._logits_layer(rnn_outputs) if self._return_sequences: # Passes sequence mask as `_keras_mask` to be used in Keras model for # loss and metrics aggregation to exclude padding in the sequential case. logits._keras_mask = sequence_length_mask # pylint: disable=protected-access return logits def get_config(self): """Returns a dictionary with the config of the model.""" config = {'name': self.name} config['rnn_layer'] = { 'class_name': self._rnn_layer.__class__.__name__, 'config': self._rnn_layer.get_config() } config['units'] = self._logits_layer.units config['return_sequences'] = self._return_sequences config['activation'] = tf_keras.activations.serialize(self._logits_layer.activation) config['sequence_feature_columns'] = fc.serialize_feature_columns( self._sequence_feature_columns) config['context_feature_columns'] = ( fc.serialize_feature_columns(self._context_feature_columns) if self._context_feature_columns else None) return config @classmethod def from_config(cls, config, custom_objects=None): """Creates a RNNModel from its config. Args: config: A Python dictionary, typically the output of `get_config`. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. Returns: A RNNModel. """ rnn_layer = tf_keras.layers.deserialize( config.pop('rnn_layer'), custom_objects=custom_objects) sequence_feature_columns = fc.deserialize_feature_columns( config.pop('sequence_feature_columns'), custom_objects=custom_objects) context_feature_columns = config.pop('context_feature_columns', None) if context_feature_columns: context_feature_columns = fc.deserialize_feature_columns( context_feature_columns, custom_objects=custom_objects) activation = tf_keras.activations.deserialize( config.pop('activation', None), custom_objects=custom_objects) return cls( rnn_layer=rnn_layer, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, activation=activation, **config) def _get_rnn_estimator_spec(features, labels, mode, head, rnn_model, optimizer, return_sequences): """Computes `EstimatorSpec` from logits to use in estimator model function. Args: features: dict of `Tensor` and `SparseTensor` objects returned from `input_fn`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] with labels. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. rnn_model: A Keras model that computes RNN logits from features. optimizer: String, `tf_keras.optimizers.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05 and gradient clip norm of 5.0. return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. Returns: An `EstimatorSpec` instance. Raises: ValueError: If mode or optimizer is invalid, or features has the wrong type. """ training = (mode == model_fn.ModeKeys.TRAIN) # In TRAIN mode, create optimizer and assign global_step variable to # optimizer.iterations to make global_step increased correctly, as Hooks # relies on global step as step counter - otherwise skip optimizer # initialization and set it to None. if training: # If user does not provide an optimizer instance, use the optimizer # specified by the string with default learning rate and gradient clipping. if isinstance(optimizer, six.string_types): optimizer = optimizers.get_optimizer_instance_v2( optimizer, learning_rate=_DEFAULT_LEARNING_RATE) optimizer.clipnorm = _DEFAULT_CLIP_NORM else: optimizer = optimizers.get_optimizer_instance_v2(optimizer) optimizer.iterations = tf.compat.v1.train.get_or_create_global_step() else: optimizer = None logits = rnn_model(features, training) if return_sequences and head.input_sequence_mask_key not in features: features[head.input_sequence_mask_key] = logits._keras_mask # pylint: disable=protected-access return head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits, update_ops=rnn_model.updates, trainable_variables=rnn_model.trainable_variables) def _verify_rnn_cell_input(rnn_cell_fn, units, cell_type): if rnn_cell_fn and (units or cell_type != USE_DEFAULT): raise ValueError( 'units and cell_type must not be specified when using rnn_cell_fn') def _make_rnn_layer(rnn_cell_fn, units, cell_type, return_sequences): """Assert arguments are valid and return rnn_layer_fn. Args: rnn_cell_fn: A function that returns a RNN cell instance that will be used to construct the RNN. units: Iterable of integer number of hidden units per RNN layer. cell_type: A class producing a RNN cell or a string specifying the cell type. return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence.: Returns: A tf_keras.layers.RNN layer. """ _verify_rnn_cell_input(rnn_cell_fn, units, cell_type) if cell_type in _CELL_TYPE_TO_LAYER_MAPPING and isinstance(units, int): return _CELL_TYPE_TO_LAYER_MAPPING[cell_type]( units=units, return_sequences=return_sequences) if not rnn_cell_fn: if cell_type == USE_DEFAULT: cell_type = _SIMPLE_RNN_KEY rnn_cell_fn = _make_rnn_cell_fn(units, cell_type) return tf_keras.layers.RNN(cell=rnn_cell_fn(), return_sequences=return_sequences) @estimator_export('estimator.experimental.RNNEstimator', v1=[]) class RNNEstimator(estimator.Estimator): """An Estimator for TensorFlow RNN models with user-specified head. Example: ```python token_sequence = sequence_categorical_column_with_hash_bucket(...) token_emb = embedding_column(categorical_column=token_sequence, ...) estimator = RNNEstimator( head=tf.estimator.RegressionHead(), sequence_feature_columns=[token_emb], units=[32, 16], cell_type='lstm') # Or with custom RNN cell: def rnn_cell_fn(_): cells = [ tf_keras.layers.LSTMCell(size) for size in [32, 16] ] return tf_keras.layers.StackedRNNCells(cells) estimator = RNNEstimator( head=tf.estimator.RegressionHead(), sequence_feature_columns=[token_emb], rnn_cell_fn=rnn_cell_fn) # Input builders def input_fn_train: # returns x, y pass estimator.train(input_fn=input_fn_train, steps=100) def input_fn_eval: # returns x, y pass metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) def input_fn_predict: # returns x, None pass predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if the head's `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. * for each `column` in `sequence_feature_columns`: - a feature with `key=column.name` whose `value` is a `SparseTensor`. * for each `column` in `context_feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss and predicted output are determined by the specified head. @compatibility(eager) Estimators are not compatible with eager execution. @end_compatibility """ def __init__(self, head, sequence_feature_columns, context_feature_columns=None, units=None, cell_type=USE_DEFAULT, rnn_cell_fn=None, return_sequences=False, model_dir=None, optimizer='Adagrad', config=None): """Initializes a `RNNEstimator` instance. Args: head: A `Head` instance. This specifies the model's output and loss function to be optimized. sequence_feature_columns: An iterable containing the `FeatureColumn`s that represent sequential input. All items in the set should either be sequence columns (e.g. `sequence_numeric_column`) or constructed from one (e.g. `embedding_column` with `sequence_categorical_column_*` as input). context_feature_columns: An iterable containing the `FeatureColumn`s for contextual input. The data represented by these columns will be replicated and given to the RNN at each timestep. These columns must be instances of classes derived from `DenseColumn` such as `numeric_column`, not the sequential variants. units: Iterable of integer number of hidden units per RNN layer. If set, `cell_type` must also be specified and `rnn_cell_fn` must be `None`. cell_type: A class producing a RNN cell or a string specifying the cell type. Supported strings are: `'simple_rnn'`, `'lstm'`, and `'gru'`. If set, `units` must also be specified and `rnn_cell_fn` must be `None`. rnn_cell_fn: A function that returns a RNN cell instance that will be used to construct the RNN. If set, `units` and `cell_type` cannot be set. This is for advanced users who need additional customization beyond `units` and `cell_type`. Note that `tf_keras.layers.StackedRNNCells` is needed for stacked RNNs. return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. optimizer: An instance of `tf.Optimizer` or string specifying optimizer type. Defaults to Adagrad optimizer. config: `RunConfig` object to configure the runtime settings. Note that a RNN cell has: - a `call` method. - a `state_size` attribute. - a `output_size` attribute. - a `get_initial_state` method. See the documentation on `tf_keras.layers.RNN` for more details. Raises: ValueError: If `units`, `cell_type`, and `rnn_cell_fn` are not compatible. """ # TODO(aarg): Instead of raising an error convert head to sequential head. if return_sequences and not isinstance(head, seq_head_lib._SequentialHead): # pylint: disable=protected-access raise ValueError('Provided head must be a `_SequentialHead` object when ' '`return_sequences` is set to True.') _verify_rnn_cell_input(rnn_cell_fn, units, cell_type) def _model_fn(features, labels, mode, config): """RNNEstimator model function.""" del config # Unused. rnn_layer = _make_rnn_layer( rnn_cell_fn=rnn_cell_fn, units=units, cell_type=cell_type, return_sequences=return_sequences) rnn_model = RNNModel( rnn_layer=rnn_layer, units=head.logits_dimension, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, return_sequences=return_sequences, name='rnn_model') return _get_rnn_estimator_spec( features, labels, mode, head=head, rnn_model=rnn_model, optimizer=optimizer, return_sequences=return_sequences) super(RNNEstimator, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) @estimator_export('estimator.experimental.RNNClassifier', v1=[]) class RNNClassifier(RNNEstimator): """A classifier for TensorFlow RNN models. Trains a recurrent neural network model to classify instances into one of multiple classes. Example: ```python token_sequence = sequence_categorical_column_with_hash_bucket(...) token_emb = embedding_column(categorical_column=token_sequence, ...) estimator = RNNClassifier( sequence_feature_columns=[token_emb], units=[32, 16], cell_type='lstm') # Input builders def input_fn_train: # returns x, y pass estimator.train(input_fn=input_fn_train, steps=100) def input_fn_eval: # returns x, y pass metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) def input_fn_predict: # returns x, None pass predictions = estimator.predict(input_fn=input_fn_predict) ``` Input of `train` and `evaluate` should have following features, otherwise there will be a `KeyError`: * if `weight_column` is not `None`, a feature with `key=weight_column` whose value is a `Tensor`. * for each `column` in `sequence_feature_columns`: - a feature with `key=column.name` whose `value` is a `SparseTensor`. * for each `column` in `context_feature_columns`: - if `column` is a `CategoricalColumn`, a feature with `key=column.name` whose `value` is a `SparseTensor`. - if `column` is a `WeightedCategoricalColumn`, two features: the first with `key` the id column name, the second with `key` the weight column name. Both features' `value` must be a `SparseTensor`. - if `column` is a `DenseColumn`, a feature with `key=column.name` whose `value` is a `Tensor`. Loss is calculated by using softmax cross entropy. @compatibility(eager) Estimators are not compatible with eager execution. @end_compatibility """ def __init__(self, sequence_feature_columns, context_feature_columns=None, units=None, cell_type=USE_DEFAULT, rnn_cell_fn=None, return_sequences=False, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, sequence_mask='sequence_mask', config=None): """Initializes a `RNNClassifier` instance. Args: sequence_feature_columns: An iterable containing the `FeatureColumn`s that represent sequential input. All items in the set should either be sequence columns (e.g. `sequence_numeric_column`) or constructed from one (e.g. `embedding_column` with `sequence_categorical_column_*` as input). context_feature_columns: An iterable containing the `FeatureColumn`s for contextual input. The data represented by these columns will be replicated and given to the RNN at each timestep. These columns must be instances of classes derived from `DenseColumn` such as `numeric_column`, not the sequential variants. units: Iterable of integer number of hidden units per RNN layer. If set, `cell_type` must also be specified and `rnn_cell_fn` must be `None`. cell_type: A class producing a RNN cell or a string specifying the cell type. Supported strings are: `'simple_rnn'`, `'lstm'`, and `'gru'`. If set, `units` must also be specified and `rnn_cell_fn` must be `None`. rnn_cell_fn: A function that returns a RNN cell instance that will be used to construct the RNN. If set, `units` and `cell_type` cannot be set. This is for advanced users who need additional customization beyond `units` and `cell_type`. Note that `tf_keras.layers.StackedRNNCells` is needed for stacked RNNs. return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. Note that if True, `weight_column` must be None or a string. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` or string specifying optimizer type. Defaults to Adagrad optimizer. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. sequence_mask: A string with the name of the sequence mask tensor. If `sequence_mask` is in the features dictionary, the provided tensor is used, otherwise the sequence mask is computed from the length of sequential features. The sequence mask is used in evaluation and training mode to aggregate loss and metrics computation while excluding padding steps. It is also added to the predictions dictionary in prediction mode to indicate which steps are padding. config: `RunConfig` object to configure the runtime settings. Note that a RNN cell has: - a `call` method. - a `state_size` attribute. - a `output_size` attribute. - a `get_initial_state` method. See the documentation on `tf_keras.layers.RNN` for more details. Raises: ValueError: If `units`, `cell_type`, and `rnn_cell_fn` are not compatible. """ if n_classes == 2: head = binary_head_lib.BinaryClassHead( weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = multi_head_lib.MultiClassHead( n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) if return_sequences: tf.compat.v1.logging.info( 'Converting head to sequential head with ' '`SequentialHeadWrapper` to allow sequential predictions.') head = seq_head_lib.SequentialHeadWrapper( head, sequence_length_mask=sequence_mask, feature_columns=weight_column) super(RNNClassifier, self).__init__( head=head, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, units=units, cell_type=cell_type, rnn_cell_fn=rnn_cell_fn, return_sequences=return_sequences, model_dir=model_dir, optimizer=optimizer, config=config) ================================================ FILE: tensorflow_estimator/python/estimator/canned/rnn_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for rnn.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import random import tempfile from absl.testing import parameterized import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import parsing_utils from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.canned import rnn from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.head import multi_class_head as multi_head_lib from tensorflow_estimator.python.estimator.head import sequential_head as seq_head_lib from tensorflow_estimator.python.estimator.inputs import numpy_io # Names of variables created by BasicRNNCell model. CELL_KERNEL_NAME = 'rnn_model/rnn/kernel' CELL_RECURRENT_KERNEL_NAME = 'rnn_model/rnn/recurrent_kernel' CELL_BIAS_NAME = 'rnn_model/rnn/bias' LOGITS_WEIGHTS_NAME = 'rnn_model/logits/kernel' LOGITS_BIAS_NAME = 'rnn_model/logits/bias' def _assert_close(expected, actual, rtol=1e-04, name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs(expected - actual, 'diff') / tf.math.abs(expected) rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=('Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), name=scope) def create_checkpoint(kernel, recurrent, bias, dense_kernel, dense_bias, global_step, model_dir): """Create checkpoint file with provided model weights. Args: kernel: Iterable of values of input weights for the RNN cell. recurrent: Iterable of values of recurrent weights for the RNN cell. bias: Iterable of values of biases for the RNN cell. dense_kernel: Iterable of values for matrix connecting RNN output to logits. dense_bias: Iterable of values for logits bias term. global_step: Initial global step to save in checkpoint. model_dir: Directory into which checkpoint is saved. """ model_weights = {} model_weights[CELL_KERNEL_NAME] = kernel model_weights[CELL_RECURRENT_KERNEL_NAME] = recurrent model_weights[CELL_BIAS_NAME] = bias model_weights[LOGITS_WEIGHTS_NAME] = dense_kernel model_weights[LOGITS_BIAS_NAME] = dense_bias with tf.Graph().as_default(): # Create model variables. for k, v in six.iteritems(model_weights): tf.Variable(v, name=k, dtype=tf.dtypes.float32) # Create non-model variables. global_step_var = tf.compat.v1.train.create_global_step() assign_op = global_step_var.assign(global_step) # Initialize vars and save checkpoint. with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=model_dir) as sess: sess.run(assign_op) def _make_rnn_layer(rnn_cell_fn=None, units=None, cell_type=rnn.USE_DEFAULT, return_sequences=False): return rnn._make_rnn_layer( rnn_cell_fn=rnn_cell_fn, units=units, cell_type=cell_type, return_sequences=return_sequences) @test_util.run_all_in_graph_and_eager_modes class RNNLayerFnTest(tf.test.TestCase, parameterized.TestCase): """Tests for rnn layer function.""" def testWrongClassProvided(self): """Tests that an error is raised if the class doesn't have a call method.""" with self.assertRaisesRegexp( ValueError, 'RNN cell should have a `call` and `state_size` method.'): _make_rnn_layer(units=[10], cell_type=lambda units: object()) def testWrongStringProvided(self): """Tests that an error is raised if cell type is unknown.""" with self.assertRaisesRegexp( ValueError, 'cell_type` should be a class producing a RNN cell, or a string .*.'): _make_rnn_layer(units=[10], cell_type='unknown-cell-name') @parameterized.parameters(['simple_rnn', rnn.USE_DEFAULT]) def testDefaultCellProvided(self, cell_type): """Tests behavior when the default cell type is provided.""" layer = _make_rnn_layer(cell_type=cell_type, units=[1]) self.assertIsInstance(layer, tf_keras.layers.RNN) self.assertIsInstance(layer.cell, tf_keras.layers.SimpleRNNCell) @parameterized.parameters([('gru', tf_keras.layers.GRU), ('lstm', tf_keras.layers.LSTM), ('simple_rnn', tf_keras.layers.SimpleRNN)]) def testSpecificLayerTypeProvided(self, cell_type, layer_type): """Tests specific layer type for GRU and LSTM.""" layer = _make_rnn_layer(cell_type=cell_type, units=1) self.assertIsInstance(layer, layer_type) def testSpecificLayerTypeArguments(self): """Tests arguments for specific layer types (GRU and LSTM).""" mock_layer_type = tf.compat.v1.test.mock.Mock() with tf.compat.v1.test.mock.patch.object(rnn, '_CELL_TYPE_TO_LAYER_MAPPING', {'custom-type': mock_layer_type}): _make_rnn_layer( cell_type='custom-type', units=11, return_sequences='return-seq-value') mock_layer_type.assert_called_once_with( units=11, return_sequences='return-seq-value') @tf.compat.v1.test.mock.patch.object(tf_keras.layers, 'RNN') def testCustomCellProvided(self, mock_rnn_layer_type): """Tests behavior when a custom cell type is provided.""" mock_custom_cell = tf.compat.v1.test.mock.Mock() _make_rnn_layer( units=[10], cell_type=lambda units: mock_custom_cell, return_sequences='return-seq-value') mock_rnn_layer_type.assert_called_once_with( cell=mock_custom_cell, return_sequences='return-seq-value') def testMultipleCellsProvided(self): """Tests behavior when multiple cells are provided.""" layer = _make_rnn_layer(cell_type='simple_rnn', units=[1, 2]) self.assertIsInstance(layer, tf_keras.layers.RNN) self.assertIsInstance(layer.cell, tf_keras.layers.StackedRNNCells) self.assertLen(layer.cell.cells, 2) self.assertIsInstance(layer.cell.cells[0], tf_keras.layers.SimpleRNNCell) @tf.compat.v1.test.mock.patch.object(tf_keras.layers, 'RNN') def testCustomCellFnProvided(self, mock_rnn_layer_type): """Tests behavior when a custom cell function is provided.""" mock_cell_fn = tf.compat.v1.test.mock.Mock(return_value='custom-cell') _make_rnn_layer( rnn_cell_fn=mock_cell_fn, return_sequences='return-seq-value') mock_rnn_layer_type.assert_called_once_with( cell='custom-cell', return_sequences='return-seq-value') def _mock_logits_layer(kernel, bias): """Sets initialization values to dense `logits` layers used in context.""" class _MockDenseLayer(tf_keras.layers.Dense): def __init__(self, units, activation, name): kwargs = {} if name == 'logits': kwargs = { 'kernel_initializer': tf.compat.v1.initializers.constant(kernel), 'bias_initializer': tf.compat.v1.initializers.constant(bias) } super(_MockDenseLayer, self).__init__( units=units, name=name, activation=activation, **kwargs) return tf.compat.v1.test.mock.patch.object(tf_keras.layers, 'Dense', _MockDenseLayer) def _default_features_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } def _get_mock_head(): mock_head = multi_head_lib.MultiClassHead(3) mock_head.create_estimator_spec = tf.compat.v1.test.mock.Mock( return_value=model_fn.EstimatorSpec(None)) return mock_head @test_util.run_all_in_graph_and_eager_modes class RNNLogitFnTest(tf.test.TestCase, parameterized.TestCase): """Tests correctness of logits calculated from RNNModel.""" def setUp(self): # Sets layers default weights for testing purpose. self.kernel = [[.1, -.2]] self.recurrent = [[.2, -.3], [.3, -.4]] self.bias = [.2, .5] self.dense_kernel = [[-1.], [1.]] self.dense_bias = [0.3] self.sequence_feature_columns = [ tf.feature_column.sequence_numeric_column('price', shape=(1,)) ] self.context_feature_columns = [] super(RNNLogitFnTest, self).setUp() def _mock_logits_layer(self): return _mock_logits_layer(self.dense_kernel, bias=self.dense_bias) def _test_logits(self, logits_dimension, features_fn, expected_logits, expected_mask, return_sequences=False, training=False): """Tests that the expected logits are calculated.""" rnn_layer = tf_keras.layers.SimpleRNN( 2, return_sequences=return_sequences, kernel_initializer=tf.compat.v1.initializers.constant(self.kernel), recurrent_initializer=tf.compat.v1.initializers.constant( self.recurrent), bias_initializer=tf.compat.v1.initializers.constant(self.bias)) with self._mock_logits_layer(): logit_layer = rnn.RNNModel( rnn_layer=rnn_layer, units=logits_dimension, sequence_feature_columns=self.sequence_feature_columns, context_feature_columns=self.context_feature_columns, return_sequences=return_sequences) logits = logit_layer(features_fn(), training=training) if return_sequences: logits = (logits, logits._keras_mask) expected_logits = (expected_logits, expected_mask) self.evaluate(tf.compat.v1.initializers.global_variables()) self.assertAllClose(expected_logits, self.evaluate(logits), atol=1e-4) @parameterized.named_parameters( { 'testcase_name': 'Static', 'return_sequences': False, 'expected_logits': [[-0.6033]] }, { 'testcase_name': 'Sequential', 'return_sequences': True, 'expected_logits': [[[-1.4388], [-0.6033]]] }, { 'testcase_name': 'SequentialTrain', 'return_sequences': True, 'expected_logits': [[[-1.4388], [-0.6033]]], 'training': True }, { 'testcase_name': 'SequentialInfer', 'return_sequences': True, 'expected_logits': [[[-1.4388], [-0.6033]]], 'training': False }) def testOneDimLogits(self, return_sequences, expected_logits, training=False): """Tests one-dimensional logits. Intermediate values are rounded for ease in reading. input_layer = [[[10]], [[5]]] sequence_mask = [[1, 1]] initial_state = [0, 0] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)]] = [[0.53, -0.37]] logits_timestep_1 = [[-1*0.83 - 1*0.91 + 0.3]] = [[-1.4388]] logits_timestep_2 = [[-1*0.53 - 1*0.37 + 0.3]] = [[-0.6033]] Args: return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. expected_logits: An array with expected logits result. training: Specifies if this training or evaluation / prediction mode. """ expected_mask = [[1, 1]] self._test_logits( logits_dimension=1, features_fn=_default_features_fn, expected_mask=expected_mask, expected_logits=expected_logits, return_sequences=return_sequences, training=training) @parameterized.named_parameters( { 'testcase_name': 'Static', 'return_sequences': False, 'expected_logits': [[-0.6033, 0.7777, 0.5698]] }, { 'testcase_name': 'Sequential', 'return_sequences': True, 'expected_logits': [[[-1.4388, 1.0884, 0.5762], [-0.6033, 0.7777, 0.5698]]] }) def testMultiDimLogits(self, return_sequences, expected_logits): """Tests multi-dimensional logits. Intermediate values are rounded for ease in reading. input_layer = [[[10]], [[5]]] sequence_mask = [[1, 1]] initial_state = [0, 0] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)]] = [[0.53, -0.37]] logits_timestep_1 = [[-1*0.83 - 1*0.91 + 0.3], [0.5*0.83 + 0.3*0.91 + 0.4], [0.2*0.83 - 0.1*0.91 + 0.5]] = [[-1.4388, 1.0884, 0.5762]] logits_timestep_2 = [[-1*0.53 - 1*0.37 + 0.3], [0.5*0.53 + 0.3*0.37 + 0.4], [0.2*0.53 - 0.1*0.37 + 0.5]] = [[-0.6033, 0.7777, 0.5698]] Args: return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. expected_logits: An array with expected logits result. """ expected_mask = [[1, 1]] self.dense_kernel = [[-1., 0.5, 0.2], [1., -0.3, 0.1]] self.dense_bias = [0.3, 0.4, 0.5] self._test_logits( logits_dimension=3, features_fn=_default_features_fn, expected_mask=expected_mask, expected_logits=expected_logits, return_sequences=return_sequences) @parameterized.named_parameters( { 'testcase_name': 'Static', 'return_sequences': False, 'expected_logits': [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]] }, { 'testcase_name': 'Sequential', 'return_sequences': True, 'expected_logits': [[ [-1.4388, 1.0884, 0.5762], [-0.6033, 0.7777, 0.5698] ], [[0.0197, 0.5601, 0.5860], [-1.2473, 1.0170, 0.5745]]] }) def testMultiExampleMultiDim(self, return_sequences, expected_logits): """Tests multiple examples and multi-dimensional logits. Intermediate values are rounded for ease in reading. input_layer = [[[10], [5]], [[2], [7]]] sequence_mask = [[1, 1], [1, 1]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)], [tanh(.1*2 + .2*0 + .3*0 +.2), tanh(-.2*2 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91], [0.38, 0.10]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)], [tanh(.1*7 + .2*.38 + .3*.10 +.2), tanh(-.2*7 - .3*.38 - .4*.10 +.5)]] = [[0.53, -0.37], [0.76, -0.78] logits_timestep_1 = [[-1*0.83 - 1*0.91 + 0.3, 0.5*0.83 + 0.3*0.91 + 0.4, 0.2*0.83 - 0.1*0.91 + 0.5], [-1*0.38 + 1*0.10 + 0.3, 0.5*0.38 - 0.3*0.10 + 0.4, 0.2*0.38 + 0.1*0.10 + 0.5]] = [[-1.4388, 1.0884, 0.5762], [0.0197, 0.5601, 0.5860]] logits_timestep_2 = [[-1*0.53 - 1*0.37 + 0.3, 0.5*0.53 + 0.3*0.37 + 0.4, 0.2*0.53 - 0.1*0.37 + 0.5], [-1*0.76 - 1*0.78 + 0.3, 0.5*0.76 +0.3*0.78 + 0.4, 0.2*0.76 -0.1*0.78 + 0.5]] = [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]] Args: return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. expected_logits: An array with expected logits result. """ expected_mask = [[1, 1], [1, 1]] def features_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), } self.dense_kernel = [[-1., 0.5, 0.2], [1., -0.3, 0.1]] self.dense_bias = [0.3, 0.4, 0.5] self._test_logits( logits_dimension=3, features_fn=features_fn, expected_mask=expected_mask, expected_logits=expected_logits, return_sequences=return_sequences) @parameterized.named_parameters( { 'testcase_name': 'Static', 'return_sequences': False, 'expected_logits': [[-0.6033], [0.0197]] }, { 'testcase_name': 'Sequential', 'return_sequences': True, 'expected_logits': [[[-1.4388], [-0.6033]], [[0.0197], [0.0197]]] }) def testMultiExamplesDifferentLength(self, return_sequences, expected_logits): """Tests multiple examples with different lengths. Intermediate values are rounded for ease in reading. input_layer = [[[10], [5]], [[2], [0]]] sequence_mask = [[1, 1], [1, 0]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)], [tanh(.1*2 + .2*0 + .3*0 +.2), tanh(-.2*2 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91], [0.38, 0.10]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)], [_]] = [[0.53, -0.37], [_, _]] logits_timestep_1 = [[-1*0.83 - 1*0.91 + 0.3], [-1*0.38 + 1*0.10 + 0.3]] = [[-0.4388], [0.0197]] logits_timestep_2 = [[-1*0.53 - 1*0.37 + 0.3], [_]] = [[-0.6033], [_]] Args: return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. expected_logits: An array with expected logits result. """ expected_mask = [[1, 1], [1, 0]] def features_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } self._test_logits( logits_dimension=1, features_fn=features_fn, expected_mask=expected_mask, expected_logits=expected_logits, return_sequences=return_sequences) def testMultiExamplesWithContext(self): """Tests multiple examples with context features. Intermediate values are rounded for ease in reading. input_layer = [[[10, -0.5], [5, -0.5]], [[2, 0.8], [0, 0]]] sequence_mask = [[1, 1], [1, 0]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 - 1*.5 + .2*0 + .3*0 +.2), tanh(-.2*10 - 0.9*.5 - .3*0 - .4*0 +.5)], [tanh(.1*2 + 1*.8 + .2*0 + .3*0 +.2), tanh(-.2*2 + .9*.8 - .3*0 - .4*0 +.5)]] = [[0.60, -0.96], [0.83, 0.68]] rnn_output_timestep_2 = [[tanh(.1*5 - 1*.5 + .2*.60 - .3*.96 +.2), tanh(-.2*5 - .9*.5 - .3*.60 + .4*.96 +.5)], []] = [[0.03, -0.63], []] logits = [[-1*0.03 - 1*0.63 + 0.3], [-1*0.83 + 1*0.68 + 0.3]] = [[-0.3662], [0.1414]] """ expected_mask = [[1, 1], [1, 0]] def features_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'context': [[-0.5], [0.8]], } self.context_feature_columns = [ tf.feature_column.numeric_column('context', shape=(1,)) ] self.kernel = [[.1, -.2], [1., 0.9]] self._test_logits( logits_dimension=1, features_fn=features_fn, expected_mask=expected_mask, expected_logits=[[-0.3662], [0.1414]]) def testMultiExamplesMultiFeatures(self): """Tests examples with multiple sequential feature columns. Intermediate values are rounded for ease in reading. input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]] sequence_mask = [[1, 1], [1, 0]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)], [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]] = [[0.94, -0.96], [0.72, -0.38]] rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2), tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)], []] = [[0.92, -0.88], []] logits = [[-1*0.92 - 1*0.88 + 0.3], [-1*0.72 - 1*0.38 + 0.3]] = [[-1.5056], [-0.7962]] """ expected_mask = [[1, 1], [1, 0]] def features_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'on_sale': tf.sparse.SparseTensor( values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } price_column = tf.feature_column.sequence_numeric_column( 'price', shape=(1,)) on_sale_column = tf.feature_column.indicator_column( tf.feature_column.sequence_categorical_column_with_identity( 'on_sale', num_buckets=2)) self.sequence_feature_columns = [price_column, on_sale_column] self.kernel = [[.5, -.5], [1., -1.], [.1, -.2]] self._test_logits( logits_dimension=1, features_fn=features_fn, expected_mask=expected_mask, expected_logits=[[-1.5056], [-0.7962]]) @parameterized.parameters([(model_fn.ModeKeys.TRAIN, True), (model_fn.ModeKeys.EVAL, False), (model_fn.ModeKeys.PREDICT, False)]) def testTrainingMode(self, mode, expected_training_mode): """Tests that `training` argument is properly used.""" class _MockRNNCell(tf_keras.layers.SimpleRNNCell): """Used to test that `training` argument is properly used.""" def __init__(self, test_case): self._test_case = test_case super(_MockRNNCell, self).__init__(units=10) def call(self, inputs, states, training=None): self._test_case.assertEqual(training, expected_training_mode) return super(_MockRNNCell, self).call( inputs=inputs, states=states, training=training) estimator = rnn.RNNEstimator( head=_get_mock_head(), rnn_cell_fn=lambda: _MockRNNCell(self), sequence_feature_columns=self.sequence_feature_columns) features = { 'price': tf.sparse.SparseTensor( values=[ 10., ], indices=[[0, 0]], dense_shape=[1, 1]), } estimator.model_fn(features=features, labels=None, mode=mode, config=None) class RNNModelTest(tf.test.TestCase, parameterized.TestCase): """Tests for RNNModel.""" def setUp(self): super(RNNModelTest, self).setUp() self.kernel = [[.1, -.2]] self.recurrent = [[.2, -.3], [.3, -.4]] self.bias = [.2, .5] self.dense_kernel = [[-1.], [1.]] self.dense_bias = [0.3] self.sequence_feature_columns = [ tf.feature_column.sequence_numeric_column('price', shape=(1,)) ] self.x = { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } self.y = ops.convert_to_tensor([[[0], [1]], [[0], [1]]]) def _get_compiled_model(self, return_sequences=False, optimizer='Adam', **kwargs): """Initializes and compiles a RNN model with specific weights.""" rnn_layer = tf_keras.layers.SimpleRNN( 2, return_sequences=return_sequences, kernel_initializer=tf.compat.v1.initializers.constant(self.kernel), recurrent_initializer=tf.compat.v1.initializers.constant( self.recurrent), bias_initializer=tf.compat.v1.initializers.constant(self.bias)) with _mock_logits_layer(self.dense_kernel, bias=self.dense_bias): model = rnn.RNNModel( units=1, rnn_layer=rnn_layer, sequence_feature_columns=self.sequence_feature_columns, activation=tf_keras.activations.sigmoid, return_sequences=return_sequences, **kwargs) model.compile( optimizer=optimizer, loss=tf_keras.losses.BinaryCrossentropy(reduction='sum'), metrics=['accuracy']) return model def testModelWeights(self): """Tests that the layers weights are properly added to the model weights.""" col = tf.feature_column.categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=1) context_feature_columns = [ tf.feature_column.embedding_column(col, dimension=1) ] seq_col = tf.feature_column.sequence_categorical_column_with_hash_bucket( 'seq-tokens', hash_bucket_size=1) sequence_feature_columns = [ tf.feature_column.embedding_column(seq_col, dimension=1) ] model = rnn.RNNModel( units=1, rnn_layer=tf_keras.layers.SimpleRNN(2), sequence_feature_columns=sequence_feature_columns, activation=tf_keras.activations.sigmoid, context_feature_columns=context_feature_columns) model.compile( optimizer='Adam', loss=tf_keras.losses.BinaryCrossentropy(reduction='sum'), metrics=['accuracy']) model.predict( x={ 'tokens': ops.convert_to_tensor([['a']]), 'seq-tokens': ops.convert_to_tensor([[['a']]]) }, steps=1) # Weights included are: # - recurrent, kernel and bias from RNN layer # - kernel and bias from logits layer # - sequential feature column embedding # - context feature column embedding. self.assertLen(model.get_weights(), 7) def _testModelConfig(self, **kwargs): """Tests the parameters of a RNNModel stored to and restored from config. Args: **kwargs: Additional keyword arguments to initialize the RNNModel before calling `get_config`. Returns: A dictionary with RNNModel initialization arguments from the `from_config` call. """ seq_col = tf.feature_column.sequence_categorical_column_with_hash_bucket( 'seq-tokens', hash_bucket_size=1) sequence_feature_columns = [ tf.feature_column.embedding_column( seq_col, dimension=1, initializer=tf.compat.v1.initializers.zeros()) ] model = rnn.RNNModel( units=11, rnn_layer=tf_keras.layers.SimpleRNN(3), sequence_feature_columns=sequence_feature_columns, return_sequences=True, name='rnn-model', **kwargs) with tf.compat.v1.test.mock.patch.object( rnn.RNNModel, '__init__', return_value=None) as init: rnn.RNNModel.from_config( model.get_config(), custom_objects={'Zeros': tf.compat.v1.initializers.zeros}) return list(init.call_args_list[0])[1] def testModelConfig(self): """Tests that a RNNModel can be stored to and restored from config.""" init_kwargs = self._testModelConfig() self.assertEqual(init_kwargs['name'], 'rnn-model') self.assertEqual(init_kwargs['units'], 11) self.assertEqual(init_kwargs['return_sequences'], True) self.assertEqual( init_kwargs['sequence_feature_columns'][0].categorical_column.name, 'seq-tokens') self.assertEqual(init_kwargs['context_feature_columns'], None) self.assertEqual(init_kwargs['activation'].__name__, 'linear') self.assertEqual(init_kwargs['rnn_layer'].cell.units, 3) def testModelConfigWithActivation(self): """Tests store / restore from config with logits activation.""" init_kwargs = self._testModelConfig(activation=tf_keras.activations.sigmoid) self.assertEqual(init_kwargs['activation'].__name__, 'sigmoid') def testModelConfigWithContextFeatures(self): """Tests store / restore from config with context features.""" init_kwargs = self._testModelConfig(context_feature_columns=[ tf.feature_column.numeric_column('context', shape=(1,)) ]) self.assertEqual(init_kwargs['context_feature_columns'][0].name, 'context') def DISABLED_testSaveModelWeights(self): # See b/129842600. """Tests that model weights can be saved and restored.""" model = self._get_compiled_model(return_sequences=True) model.fit(x=self.x, y=self.y, batch_size=1, steps_per_epoch=1, epochs=1) y1 = model.predict(x=self.x, steps=1) model.save_weights(self.get_temp_dir() + 'model') model = self._get_compiled_model(return_sequences=True, name='model-2') model.load_weights(self.get_temp_dir() + 'model') y2 = model.predict(x=self.x, steps=1) self.assertAllClose(y1, y2) def DISABLED_testEvaluationMetrics(self): # See b/129842600. """Tests evaluation metrics computation in non-sequential case.""" model = self._get_compiled_model() metrics = model.evaluate( x=self.x, y=ops.convert_to_tensor([[0], [1]]), steps=1) # See `RNNClassifierEvaluationTest` for details on computation. self.assertAllClose(metrics, (1.1196611, 1.), atol=1e-4) def DISABLED_testEvaluationSequential(self): # See b/129842600. """Tests that the sequence mask is properly used to aggregate loss.""" model = self._get_compiled_model(return_sequences=True) metrics = model.evaluate(x=self.x, y=self.y, steps=1) # See `RNNClassifierEvaluationTest` for details on computation. self.assertAllClose(metrics, (1.9556, 1. / 3.), atol=1e-4) def DISABLED_testPredictions(self): # See b/129842600. """Tests predictions with RNN model.""" model = self._get_compiled_model() # See `RNNClassifierPredictionTest` for details on computation. self.assertAllClose( model.predict(x=self.x, steps=1), [[0.353593], [0.5049296]], atol=1e-4) def DISABLED_testPredictionsSequential(self): # See b/129842600. """Tests sequential predictions with RNN model.""" model = self._get_compiled_model(return_sequences=True) # See `RNNClassifierPredictionTest` for details on computation. self.assertAllClose( model.predict(x=self.x, steps=1), [[[0.191731], [0.353593]], [[0.5049296], [0.5049296]]], atol=1e-4) @parameterized.named_parameters( ('StringOptimizer', 'Adam'), ('OptimizerInstance', tf_keras.optimizers.Adam())) def DISABLED_testTraining(self, optimizer): # See b/129842600. """Tests the loss computed in training step.""" model = self._get_compiled_model(optimizer=optimizer) history = model.fit( x=self.x, y=ops.convert_to_tensor([[0], [1]]), batch_size=1, steps_per_epoch=1) # See `RNNClassifierTrainingTest` for details on computation. self.assertAllClose(history.history['loss'], [1.1196611], atol=1e-4) def DISABLED_testTrainingSequential(self): # See b/129842600. """Tests the loss computed in training step in sequential case.""" model = self._get_compiled_model(return_sequences=True) history = model.fit(x=self.x, y=self.y, batch_size=1, steps_per_epoch=1) # See `RNNClassifierTrainingTest` for details on computation. self.assertAllClose(history.history['loss'], [1.9556], atol=1e-4) @test_util.run_all_in_graph_and_eager_modes class RNNEstimatorInitTest(tf.test.TestCase): def setUp(self): col = tf.feature_column.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) self.feature_columns = [ tf.feature_column.embedding_column(col, dimension=2) ] self.cell_units = [4, 2] super(RNNEstimatorInitTest, self).setUp() def testConflictingRNNCellFn(self): with self.assertRaisesRegexp( ValueError, 'units and cell_type must not be specified when using rnn_cell_fn'): rnn.RNNClassifier( sequence_feature_columns=self.feature_columns, rnn_cell_fn=lambda: 'mock-cell', units=self.cell_units) with self.assertRaisesRegexp( ValueError, 'units and cell_type must not be specified when using rnn_cell_fn'): rnn.RNNClassifier( sequence_feature_columns=self.feature_columns, rnn_cell_fn=lambda: 'mock-cell', cell_type='lstm') def testNonSequentialHeadProvided(self): with self.assertRaisesRegexp( ValueError, 'Provided head must be a `_SequentialHead` object when ' '`return_sequences` is set to True.'): rnn.RNNEstimator( head=multi_head_lib.MultiClassHead(n_classes=3), sequence_feature_columns=self.feature_columns, return_sequences=True) def testWrongOptimizerTypeProvided(self): classifier = rnn.RNNClassifier( self.feature_columns, units=[1], optimizer=object()) with self.assertRaisesRegexp( ValueError, 'The given object is not a tf_keras.optimizers.Optimizer instance.'): classifier.model_fn( features=None, labels=None, mode=model_fn.ModeKeys.TRAIN, config=None) @test_util.run_all_in_graph_and_eager_modes class RNNClassifierTrainingTest(tf.test.TestCase): def setUp(self): self.kernel = [[.1, -.2]] self.recurrent = [[.2, -.3], [.3, -.4]] self.bias = [.2, .5] self.dense_kernel = [[-1.], [1.]] self.dense_bias = [0.3] self.sequence_feature_columns = [ tf.feature_column.sequence_numeric_column('price', shape=(1,)) ] super(RNNClassifierTrainingTest, self).setUp() def _assert_checkpoint(self, n_classes, input_units, cell_units, expected_global_step): shapes = { name: shape for (name, shape) in tf.train.list_variables(self.get_temp_dir()) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self.get_temp_dir(), tf.compat.v1.GraphKeys.GLOBAL_STEP)) # RNN Cell variables. for i, cell_unit in enumerate(cell_units): name_suffix = '_%d' % i if i else '' self.assertEqual([input_units, cell_unit], shapes[CELL_KERNEL_NAME + name_suffix]) self.assertEqual([cell_unit, cell_unit], shapes[CELL_RECURRENT_KERNEL_NAME + name_suffix]) self.assertEqual([cell_unit], shapes[CELL_BIAS_NAME + name_suffix]) input_units = cell_unit # Logits variables. logits_dimension = n_classes if n_classes > 2 else 1 self.assertEqual([cell_units[-1], logits_dimension], shapes[LOGITS_WEIGHTS_NAME]) self.assertEqual([logits_dimension], shapes[LOGITS_BIAS_NAME]) def _mock_optimizer(self, expected_loss=None): var_names = (CELL_BIAS_NAME, CELL_KERNEL_NAME, CELL_RECURRENT_KERNEL_NAME, LOGITS_BIAS_NAME, LOGITS_WEIGHTS_NAME) expected_var_names = ['%s:0' % name for name in var_names] class _Optimizer(tf_keras.optimizers.Optimizer): """Mock optimizer checking that loss has the proper value.""" def __init__(self, test_case): super(_Optimizer, self).__init__(name='my-optimizer') self.call_count = 0 self._test_case = test_case def get_updates(self, loss, params): self.call_count += 1 trainable_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self._test_case.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly so we add an assert op. self._test_case.assertEquals(0, loss.shape.ndims) if expected_loss is None: return [self.iterations.assign_add(1).op] assert_loss = _assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): return [self.iterations.assign_add(1).op] def get_config(self): pass return _Optimizer(test_case=self) def _testFromScratchWithDefaultOptimizer(self, n_classes): def train_input_fn(): return { 'tokens': tf.sparse.SparseTensor( values=['the', 'cat', 'sat'], indices=[[0, 0], [0, 1], [0, 2]], dense_shape=[1, 3]), }, [[1]] col = tf.feature_column.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = tf.feature_column.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] est = rnn.RNNClassifier( sequence_feature_columns=[embed], units=cell_units, n_classes=n_classes, model_dir=self.get_temp_dir()) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps) def testBinaryClassFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=2) def testMultiClassFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=4) def testFromScratchWithCustomRNNCellFn(self): def train_input_fn(): return { 'tokens': tf.sparse.SparseTensor( values=['the', 'cat', 'sat'], indices=[[0, 0], [0, 1], [0, 2]], dense_shape=[1, 3]), }, [[1]] col = tf.feature_column.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = tf.feature_column.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] n_classes = 2 def rnn_cell_fn(): cells = [tf_keras.layers.SimpleRNNCell(units=n) for n in cell_units] return tf_keras.layers.StackedRNNCells(cells) est = rnn.RNNClassifier( sequence_feature_columns=[embed], rnn_cell_fn=rnn_cell_fn, n_classes=n_classes, model_dir=self.get_temp_dir()) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps) def _testExampleWeight(self, n_classes): def train_input_fn(): return { 'tokens': tf.sparse.SparseTensor( values=['the', 'cat', 'sat', 'dog', 'barked'], indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]], dense_shape=[2, 3]), 'w': [[1], [2]], }, [[1], [0]] col = tf.feature_column.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = tf.feature_column.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] est = rnn.RNNClassifier( units=cell_units, sequence_feature_columns=[embed], n_classes=n_classes, weight_column='w', model_dir=self.get_temp_dir()) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps) def testBinaryClassWithExampleWeight(self): self._testExampleWeight(n_classes=2) def testMultiClassWithExampleWeight(self): self._testExampleWeight(n_classes=4) def _testFromCheckpoint(self, input_fn, expected_loss, **kwargs): """Loads classifier from checkpoint, runs training and checks loss.""" create_checkpoint( kernel=self.kernel, recurrent=self.recurrent, bias=self.bias, dense_kernel=self.dense_kernel, dense_bias=self.dense_bias, global_step=100, model_dir=self.get_temp_dir()) mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) est = rnn.RNNClassifier( units=[2], sequence_feature_columns=self.sequence_feature_columns, optimizer=mock_optimizer, model_dir=self.get_temp_dir(), **kwargs) self.assertEqual(0, mock_optimizer.call_count) est.train(input_fn=input_fn, steps=10) self.assertEqual(1, mock_optimizer.call_count) def testBinaryClassFromCheckpoint(self): def train_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), }, [[0], [1]] # Uses same checkpoint and examples as testBinaryClassEvaluationMetrics. # See that test for loss calculation. self._testFromCheckpoint(train_input_fn, expected_loss=0.559831) def testMultiClassFromCheckpoint(self): def train_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }, [[0], [1]] # Uses same checkpoint and examples as testMultiClassEvaluationMetrics. # See that test for loss calculation. self.dense_kernel = [[-1., 0.5, 0.2], [1., -0.3, 0.1]] self.dense_bias = [0.3, 0.4, 0.5] self._testFromCheckpoint( train_input_fn, expected_loss=1.331465, n_classes=3) def testBinaryClassFromCheckpointSequential(self): def train_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), }, tf.sparse.SparseTensor( values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]) # Same example as testBinaryClassEvaluationMetricsSequential. # logits = [[[-1.4388], [-0.6033]], # [[0.0197], [_]]] # probability = np.exp(logits) / (1 + np.exp(logits)) # = [[0.1917, 0.3536], # [0.5049, _]] # loss = -label * ln(p) - (1 - label) * ln(1 - p) # loss = [[0.2129, 1.0396], # [0.7031, _]] # aggregated_loss = sum(loss) / 3 # aggregated_loss = 0.6518 self._testFromCheckpoint( train_input_fn, expected_loss=0.651841, return_sequences=True) def testBinaryClassFromCheckpointSequentialWithWeights(self): def train_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'weights': tf.sparse.SparseTensor( values=[0., 0.5, 0.5], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]) }, tf.sparse.SparseTensor( values=[0, 0, 1], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]) # Checkpoint and input are the same as testBinaryClassEvaluationMetrics, and # expected loss is the same as we use non-zero weights only for the last # step of each sequence. # loss = [[_, 0.436326], # [0.6833351, _]] # weights = [[0, 0.5], [0.5, 0]] # aggregated_loss = (0.436326 + 0.6833351) / 2. # = 0.559831 self._testFromCheckpoint( train_input_fn, expected_loss=0.559831, return_sequences=True, weight_column='weights', loss_reduction=tf_keras.losses.Reduction.SUM) def testDefaultGradientClipping(self): """Tests that optimizer applies default gradient clipping value.""" def train_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[ 1., ], indices=[[0, 0]], dense_shape=[1, 1]), }, [[1]] def _wrap_create_estimator_spec(create_estimator_spec): """Wraps function and asserts that the optimizer applies clipping.""" def _wrapped_create_estimator_spec(obj, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): var = tf.Variable([1.0]) mock_loss = 10 * var gradients = optimizer.get_gradients(mock_loss, [var]) self.assertLen(gradients, 1) # Initial gradient value is 10 and expected to be clipped to 5 (default # clipping value). with tf.control_dependencies( (tf.compat.v1.debugging.assert_equal(gradients[0], 5.0),)): return create_estimator_spec(obj, features, mode, logits, labels, optimizer, trainable_variables, train_op_fn, update_ops, regularization_losses) return _wrapped_create_estimator_spec with tf.compat.v1.test.mock.patch.object( multi_head_lib.MultiClassHead, 'create_estimator_spec', _wrap_create_estimator_spec( multi_head_lib.MultiClassHead.create_estimator_spec)): est = rnn.RNNClassifier( n_classes=3, sequence_feature_columns=[ tf.feature_column.sequence_numeric_column('price') ], units=[2], model_dir=self.get_temp_dir()) est.train(input_fn=train_input_fn, steps=1) def sorted_key_dict(unsorted_dict): return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} @test_util.run_all_in_graph_and_eager_modes class RNNClassifierEvaluationTest(tf.test.TestCase): def setUp(self): self.kernel = [[.1, -.2]] self.recurrent = [[.2, -.3], [.3, -.4]] self.bias = [.2, .5] self.dense_kernel = [[-1.], [1.]] self.dense_bias = [0.3] self.global_step = 100 self.sequence_feature_columns = [ tf.feature_column.sequence_numeric_column('price', shape=(1,)) ] super(RNNClassifierEvaluationTest, self).setUp() def _testFromCheckpoint(self, input_fn, **kwargs): create_checkpoint( kernel=self.kernel, recurrent=self.recurrent, bias=self.bias, dense_kernel=self.dense_kernel, dense_bias=self.dense_bias, global_step=self.global_step, model_dir=self.get_temp_dir()) est = rnn.RNNClassifier( units=[2], sequence_feature_columns=self.sequence_feature_columns, model_dir=self.get_temp_dir(), **kwargs) return est.evaluate(input_fn, steps=1) def testBinaryClassEvaluationMetrics(self): def eval_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), }, [[0], [1]] eval_metrics = self._testFromCheckpoint(eval_input_fn) # Uses identical numbers to testMultiExamplesWithDifferentLength. # See that test for logits calculation. # logits = [[-0.603282], [0.019719]] # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]] # loss = -label * ln(p) - (1 - label) * ln(1 - p) # = [[0.436326], [0.683335]] # sum_over_batch_size = (0.436326 + 0.683335)/2 expected_metrics = { tf.compat.v1.GraphKeys.GLOBAL_STEP: self.global_step, metric_keys.MetricKeys.LOSS: 0.559831, metric_keys.MetricKeys.LOSS_MEAN: 0.559831, metric_keys.MetricKeys.ACCURACY: 1.0, metric_keys.MetricKeys.PREDICTION_MEAN: 0.429262, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, # With default threshold of 0.5, the model is a perfect classifier. metric_keys.MetricKeys.RECALL: 1.0, metric_keys.MetricKeys.PRECISION: 1.0, # Positive example is scored above negative, so AUC = 1.0. metric_keys.MetricKeys.AUC: 1.0, metric_keys.MetricKeys.AUC_PR: 1.0, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics)) def testBinaryClassEvaluationMetricsSequential(self): def eval_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), }, tf.sparse.SparseTensor( values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]) eval_metrics = self._testFromCheckpoint( eval_input_fn, return_sequences=True) # logits = [[[-1.4388], [-0.6033]], # [[0.0197], [_]]] # probability = np.exp(logits) / (1 + np.exp(logits)) # = [[0.1917, 0.3536], # [0.5049, _]] # labels = [[0, 1], # [0, _]] # loss = -label * ln(p) - (1 - label) * ln(1 - p) # loss = [[0.2129, 1.0396], # [0.7031, _]] # aggregated_loss = sum(loss) / 3 # aggregated_loss = 0.6518 # accuracy = 1/3 # prediction_mean = mean(probability) = 0.3501 expected_metrics = { tf.compat.v1.GraphKeys.GLOBAL_STEP: self.global_step, metric_keys.MetricKeys.LOSS: 0.651841, metric_keys.MetricKeys.LOSS_MEAN: 0.651841, metric_keys.MetricKeys.ACCURACY: 1.0 / 3, metric_keys.MetricKeys.PREDICTION_MEAN: 0.350085, metric_keys.MetricKeys.LABEL_MEAN: 1.0 / 3, metric_keys.MetricKeys.ACCURACY_BASELINE: 2.0 / 3, metric_keys.MetricKeys.RECALL: 0.0, metric_keys.MetricKeys.PRECISION: 0.0, metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 0.30685282, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics)) def testMultiClassEvaluationMetrics(self): def eval_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }, [[0], [1]] self.dense_kernel = [[-1., 0.5, 0.2], [1., -0.3, 0.1]] self.dense_bias = [0.3, 0.4, 0.5] # Uses identical numbers to testMultiExampleMultiDim. # See that test for logits calculation. # logits = [[-0.603282, 0.777708, 0.569756], # [-1.247356, 1.017018, 0.574481]] # logits_exp = exp(logits) / (1 + exp(logits)) # = [[0.547013, 2.176468, 1.767836], # [0.287263, 2.764937, 1.776208]] # softmax_probabilities = logits_exp / logits_exp.sum() # = [[0.121793, 0.484596, 0.393611], # [0.059494, 0.572639, 0.367866]] # loss = -1. * log(softmax[label]) # = [[2.105432], [0.557500]] # sum_over_batch_size = (2.105432 + 0.557500)/2 eval_metrics = self._testFromCheckpoint(eval_input_fn, n_classes=3) expected_metrics = { tf.compat.v1.GraphKeys.GLOBAL_STEP: self.global_step, metric_keys.MetricKeys.LOSS: 1.331465, metric_keys.MetricKeys.LOSS_MEAN: 1.331466, metric_keys.MetricKeys.ACCURACY: 0.5, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics)) @test_util.run_all_in_graph_and_eager_modes class RNNClassifierPredictionTest(tf.test.TestCase): def setUp(self): self.kernel = [[.1, -.2]] self.recurrent = [[.2, -.3], [.3, -.4]] self.bias = [.2, .5] self.dense_kernel = [[-1.], [1.]] self.dense_bias = [0.3] self.sequence_feature_columns = [ tf.feature_column.sequence_numeric_column('price', shape=(1,)) ] super(RNNClassifierPredictionTest, self).setUp() def _testFromCheckpoint(self, input_fn, **kwargs): create_checkpoint( kernel=self.kernel, recurrent=self.recurrent, bias=self.bias, dense_kernel=self.dense_kernel, dense_bias=self.dense_bias, global_step=100, model_dir=self.get_temp_dir()) n_classes = 2 if 'n_classes' in kwargs: n_classes = kwargs['n_classes'] assert n_classes >= 2 label_vocabulary = [ 'class_{}'.format(class_idx) for class_idx in range(n_classes) ] est = rnn.RNNClassifier( units=[2], sequence_feature_columns=self.sequence_feature_columns, label_vocabulary=label_vocabulary, model_dir=self.get_temp_dir(), **kwargs) return next(est.predict(input_fn)) def testBinaryClassPredictions(self): # Uses identical numbers to testOneDimLogits. # See that test for logits calculation. # logits = [-0.603282] # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593] # probabilities = [0.646407, 0.353593] # class_ids = argmax(probabilities) = [0] predictions = self._testFromCheckpoint(_default_features_fn) self.assertAllClose([-0.603282], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose([0.353593], predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [0.646407, 0.353593], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([0], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertEqual([b'class_0'], predictions[prediction_keys.PredictionKeys.CLASSES]) def testMultiClassPredictions(self): self.dense_kernel = [[-1., 0.5, 0.2], [1., -0.3, 0.1]] self.dense_bias = [0.3, 0.4, 0.5] # Uses identical numbers to testMultiDimLogits. # See that test for logits calculation. # logits = [-0.603282, 0.777708, 0.569756] # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836] # softmax_probabilities = logits_exp / logits_exp.sum() # = [0.121793, 0.484596, 0.393611] # class_ids = argmax(probabilities) = [1] predictions = self._testFromCheckpoint(_default_features_fn, n_classes=3) self.assertAllClose([-0.603282, 0.777708, 0.569756], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( [0.121793, 0.484596, 0.393611], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([1], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertEqual([b'class_1'], predictions[prediction_keys.PredictionKeys.CLASSES]) def testBinaryClassPredictionsSequential(self): def predict_input_fn(): return { 'price': tf.sparse.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } # Same as first record of testBinaryClassEvaluationMetricsSequential. # Last step values are carried over. # logits = [[-1.4388], [-0.6033], [_]] # probabilities = np.exp(logits) / (1 + np.exp(logits)) # = [[0.8083, 0.1917], [0.6464, 0.3536], [_, _]] # class_ids = [[0], [0], [_]] # classes = [['class_0'], ['class_0'], [_]] predictions = self._testFromCheckpoint( predict_input_fn, return_sequences=True, sequence_mask='my-mask') self.assertAllEqual([1, 1], predictions['my-mask']) self.assertAllClose([[-1.438803], [-0.603282]], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose([[0.191731], [0.353593]], predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [[0.808269, 0.191731], [0.646407, 0.353593]], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([[0], [0]], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual([[b'class_0'], [b'class_0']], predictions[prediction_keys.PredictionKeys.CLASSES]) class BaseRNNClassificationIntegrationTest(object): def setUp(self): col = tf.feature_column.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = tf.feature_column.embedding_column(col, dimension=2) self.feature_columns = [embed] super(BaseRNNClassificationIntegrationTest, self).setUp() def __init__(self, _create_estimator_fn): self._create_estimator_fn = _create_estimator_fn def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, n_classes, batch_size, optimizer='Adam'): cell_units = [4, 2] est = self._create_estimator_fn( self.feature_columns, n_classes, cell_units, self.get_temp_dir(), optimizer=optimizer) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = parsing_utils.classifier_parse_example_spec( self.feature_columns, label_key='label', label_dtype=tf.dtypes.int64) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _testNumpyInputFn(self, optimizer): """Tests complete flow with numpy_input_fn.""" n_classes = 3 batch_size = 10 words = ['dog', 'cat', 'bird', 'the', 'a', 'sat', 'flew', 'slept'] # Numpy only supports dense input, so all examples will have same length. # TODO(b/73160931): Update test when support for prepadded data exists. sequence_length = 3 features = [] for _ in range(batch_size): sentence = random.sample(words, sequence_length) features.append(sentence) x_data = np.array(features) y_data = np.random.randint(n_classes, size=batch_size) train_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, n_classes=n_classes, batch_size=batch_size, optimizer=optimizer) def testNumpyInputFnStringOptimizer(self): self._testNumpyInputFn(optimizer='Adam') def testNumpyInputFnOptimizerInstance(self): self._testNumpyInputFn(optimizer=tf_keras.optimizers.Adam()) def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] _, examples_file = tempfile.mkstemp() writer = tf.io.TFRecordWriter(examples_file) for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example( features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature( bytes_list=feature_pb2.BytesList(value=sentence)), 'label': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[label])), })) writer.write(example.SerializeToString()) writer.close() feature_spec = parsing_utils.classifier_parse_example_spec( self.feature_columns, label_key='label', label_dtype=tf.dtypes.int64) def _train_input_fn(): dataset = tf.compat.v1.data.experimental.make_batched_features_dataset( examples_file, batch_size, feature_spec) return dataset.map(lambda features: (features, features.pop('label'))) def _eval_input_fn(): dataset = tf.compat.v1.data.experimental.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) return dataset.map(lambda features: (features, features.pop('label'))) def _predict_input_fn(): dataset = tf.compat.v1.data.experimental.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) def features_fn(features): features.pop('label') return features return dataset.map(features_fn) self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size) def _rnn_classifier_fn(feature_columns, n_classes, cell_units, model_dir, optimizer): return rnn.RNNClassifier( units=cell_units, sequence_feature_columns=feature_columns, n_classes=n_classes, optimizer=optimizer, model_dir=model_dir) @test_util.run_all_in_graph_and_eager_modes class RNNClassifierIntegrationTest(BaseRNNClassificationIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) BaseRNNClassificationIntegrationTest.__init__(self, _rnn_classifier_fn) def _rnn_classifier_dropout_fn(feature_columns, n_classes, cell_units, model_dir, optimizer): def _rnn_cell_fn(): cells = [] for units in cell_units: cells.append(tf_keras.layers.SimpleRNNCell(units, dropout=0.5)) return tf_keras.layers.StackedRNNCells(cells) return rnn.RNNClassifier( rnn_cell_fn=_rnn_cell_fn, sequence_feature_columns=feature_columns, n_classes=n_classes, optimizer=optimizer, model_dir=model_dir) @test_util.run_all_in_graph_and_eager_modes class RNNClassifierDropoutIntegrationTest(BaseRNNClassificationIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) BaseRNNClassificationIntegrationTest.__init__(self, _rnn_classifier_dropout_fn) def _rnn_estimator_fn(feature_columns, n_classes, cell_units, model_dir, optimizer): return rnn.RNNEstimator( head=multi_head_lib.MultiClassHead(n_classes=n_classes), units=cell_units, sequence_feature_columns=feature_columns, optimizer=optimizer, model_dir=model_dir) @test_util.run_all_in_graph_and_eager_modes class RNNEstimatorIntegrationTest(BaseRNNClassificationIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) BaseRNNClassificationIntegrationTest.__init__(self, _rnn_estimator_fn) @test_util.run_all_in_graph_and_eager_modes class ModelFnTest(tf.test.TestCase): """Tests correctness of RNNEstimator's model function.""" def _test_sequential_mask_in_head(self, mask=None): features = { 'price': tf.sparse.SparseTensor( values=[10., 5., 4.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]) } if mask: features['sequence_mask'] = ops.convert_to_tensor(mask) expected_mask = mask or [[1, 1], [1, 0]] sequence_feature_columns = [ tf.feature_column.sequence_numeric_column('price', shape=(1,)) ] mock_head = _get_mock_head() seq_head = seq_head_lib.SequentialHeadWrapper( mock_head, sequence_length_mask='sequence_mask') estimator = rnn.RNNEstimator( head=seq_head, units=[10], sequence_feature_columns=sequence_feature_columns, return_sequences=True) estimator.model_fn( features=features, labels=None, mode=model_fn.ModeKeys.PREDICT, config=None) passed_features = list( mock_head.create_estimator_spec.call_args)[1]['features'] self.assertIn('sequence_mask', passed_features) sequence_mask = self.evaluate(passed_features['sequence_mask']) self.assertAllEqual(sequence_mask, expected_mask) def testSequentialMaskInHead(self): self._test_sequential_mask_in_head() def testSequentialMaskInHeadWithMasks(self): self._test_sequential_mask_in_head([[1, 1], [1, 1]]) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/saved_model_estimator.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Class that creates an Estimator from a SavedModel.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import six import tensorflow as tf from tensorflow.python.saved_model import constants from tensorflow.python.saved_model import loader_impl from tensorflow.python.saved_model import path_helpers from tensorflow.python.saved_model import signature_constants from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.export import export_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys class SavedModelEstimator(estimator_lib.EstimatorV2): """Create an Estimator from a SavedModel. Only SavedModels exported with `tf.estimator.Estimator.experimental_export_all_saved_models()` or `tf.estimator.Estimator.export_saved_model()` are supported for this class. Example with `tf.estimator.DNNClassifier`: **Step 1: Create and train DNNClassifier.** ```python feature1 = tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_vocabulary_list( key='feature1', vocabulary_list=('green', 'yellow')), dimension=1) feature2 = tf.feature_column.numeric_column(key='feature2', default_value=0.0) classifier = tf.estimator.DNNClassifier( hidden_units=[4,2], feature_columns=[feature1, feature2]) def input_fn(): features = {'feature1': tf.constant(['green', 'green', 'yellow']), 'feature2': tf.constant([3.5, 4.2, 6.1])} label = tf.constant([1., 0., 0.]) return tf.data.Dataset.from_tensors((features, label)).repeat() classifier.train(input_fn=input_fn, steps=10) ``` **Step 2: Export classifier.** First, build functions that specify the expected inputs. ```python # During train and evaluation, both the features and labels should be defined. supervised_input_receiver_fn = ( tf.estimator.experimental.build_raw_supervised_input_receiver_fn( {'feature1': tf.placeholder(dtype=tf.string, shape=[None]), 'feature2': tf.placeholder(dtype=tf.float32, shape=[None])}, tf.placeholder(dtype=tf.float32, shape=[None]))) # During predict mode, expect to receive a `tf.Example` proto, so a parsing # function is used. serving_input_receiver_fn = ( tf.estimator.export.build_parsing_serving_input_receiver_fn( tf.feature_column.make_parse_example_spec([feature1, feature2]))) ``` Next, export the model as a SavedModel. A timestamped directory will be created (for example `/tmp/export_all/1234567890`). ```python # Option 1: Save all modes (train, eval, predict) export_dir = classifier.experimental_export_all_saved_models( '/tmp/export_all', {tf.estimator.ModeKeys.TRAIN: supervised_input_receiver_fn, tf.estimator.ModeKeys.EVAL: supervised_input_receiver_fn, tf.estimator.ModeKeys.PREDICT: serving_input_receiver_fn}) # Option 2: Only export predict mode export_dir = classifier.export_saved_model( '/tmp/export_predict', serving_input_receiver_fn) ``` **Step 3: Create a SavedModelEstimator from the exported SavedModel.** ```python est = tf.estimator.experimental.SavedModelEstimator(export_dir) # If all modes were exported, you can immediately evaluate and predict, or # continue training. Otherwise only predict is available. eval_results = est.evaluate(input_fn=input_fn, steps=1) print(eval_results) est.train(input_fn=input_fn, steps=20) def predict_input_fn(): example = tf.train.Example() example.features.feature['feature1'].bytes_list.value.extend(['yellow']) example.features.feature['feature2'].float_list.value.extend([1.]) return {'inputs':tf.constant([example.SerializeToString()])} predictions = est.predict(predict_input_fn) print(next(predictions)) ``` """ def __init__(self, saved_model_dir, model_dir=None): """Initialize a SavedModelEstimator. The SavedModelEstimator loads its model function and variable values from the graphs defined in the SavedModel. There is no option to pass in `RunConfig` or `params` arguments, because the model function graph is defined statically in the SavedModel. Args: saved_model_dir: Directory containing SavedModel protobuf and subfolders. model_dir: Directory to save new checkpoints during training. Raises: NotImplementedError: If a DistributionStrategy is defined in the config. Unless the SavedModelEstimator is subclassed, this shouldn't happen. """ super(SavedModelEstimator, self).__init__( model_fn=self._model_fn_from_saved_model, model_dir=model_dir) if self._train_distribution or self._eval_distribution: raise NotImplementedError( 'SavedModelEstimator currently does not support ' 'DistributionStrategy.') self.saved_model_dir = saved_model_dir self.saved_model_loader = loader_impl.SavedModelLoader(saved_model_dir) self._available_modes = self._extract_available_modes() def _extract_available_modes(self): """Return list of modes found in SavedModel.""" available_modes = [] tf.compat.v1.logging.info( 'Checking available modes for SavedModelEstimator.') for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: try: self._get_meta_graph_def_for_mode(mode) except RuntimeError: tf.compat.v1.logging.warn('%s mode not found in SavedModel.' % mode) continue if self._get_signature_def_for_mode(mode) is not None: available_modes.append(mode) tf.compat.v1.logging.info('Available modes for Estimator: %s' % available_modes) return available_modes def _validate_mode(self, mode): """Make sure that mode can be run using the SavedModel.""" if mode not in self._available_modes: raise RuntimeError('%s mode is not available in the SavedModel. Use ' 'saved_model_cli to check that the Metagraph for this ' 'mode has been exported.' % mode) def _get_meta_graph_def_for_mode(self, mode): tags = export_lib.EXPORT_TAG_MAP[mode] return self.saved_model_loader.get_meta_graph_def_from_tags(tags) def _get_signature_def_for_mode(self, mode): meta_graph_def = self._get_meta_graph_def_for_mode(mode) if mode == ModeKeys.PREDICT: sig_def_key = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY else: sig_def_key = mode if sig_def_key not in meta_graph_def.signature_def: tf.compat.v1.logging.warn( 'Metagraph for mode %s was found, but SignatureDef with' ' key \"%s\" is missing.' % (mode, sig_def_key)) return None return meta_graph_def.signature_def[sig_def_key] def _get_saver_def_from_mode(self, mode): meta_graph_def = self._get_meta_graph_def_for_mode(mode) return meta_graph_def.saver_def def _create_and_assert_global_step(self, graph): # Do nothing here. The global step variable will be created/loaded from the # SavedModel. If a global step variable were created here, the result # will be two duplicate global step variables, causing issues during # the warm-start phase. # Due to the global variable being created in the model function, this may # cause issues when running DistributionStrategy. Thus, DistributionStrategy # is not yet supported with SavedModelEstimator. return None def _model_fn_from_saved_model(self, features, labels, mode): """Load a SavedModel graph and return an EstimatorSpec.""" # TODO(kathywu): Model function loads placeholders from the graph. Calling # export_all_saved_models creates another placeholder for the inputs, on top # of the original placeholders. There should be a way to avoid this. self._validate_mode(mode) g = tf.compat.v1.get_default_graph() if tf.compat.v1.train.get_global_step(g) is not None: raise RuntimeError( 'Graph must not contain a global step tensor before the SavedModel is' ' loaded. Please make sure that the input function does not create a ' 'global step.') # Extract SignatureDef for information about the input and output tensors. signature_def = self._get_signature_def_for_mode(mode) # Generate input map for replacing the inputs in the SavedModel graph with # the provided features and labels. input_map = _generate_input_map(signature_def, features, labels) # Create a list of the names of output tensors. When the graph is loaded, # names of the output tensors may be remapped. This ensures that the correct # tensors are returned in the EstimatorSpec. output_tensor_names = [ value.name for value in six.itervalues(signature_def.outputs) ] # Load the graph. `output_tensors` contains output `Tensors` in the same # same order as the `output_tensor_names` list. tags = export_lib.EXPORT_TAG_MAP[mode] _, output_tensors = self.saved_model_loader.load_graph( g, tags, input_map=input_map, return_elements=output_tensor_names) # Create saver object, and restore from the SavedModel `variables` directory # if no checkpoints have been saved in the `model_dir`. saver_obj = tf.compat.v1.train.Saver( saver_def=self._get_saver_def_from_mode(mode)) init_fn = None if not super(SavedModelEstimator, self).latest_checkpoint(): init_fn = self._restore_from_saver # Create a scaffold from the MetaGraphDef that contains ops to initialize # the graph. This should mirror the steps from _add_meta_graph_for_mode(), # which creates a MetaGraphDef from the EstimatorSpec's scaffold. # Get asset tensors, if any. meta_graph_def = self._get_meta_graph_def_for_mode(mode) asset_tensors_dictionary = loader_impl.get_asset_tensors( self.saved_model_loader.export_dir, meta_graph_def, import_scope=None) # TODO(kathywu): switch to loader_impl._get_main_op scaffold = tf.compat.v1.train.Scaffold( local_init_op=loader_impl._get_main_op_tensor( # pylint: disable=protected-access meta_graph_def), local_init_feed_dict=asset_tensors_dictionary, saver=saver_obj, init_fn=init_fn) # Ensure that a global step tensor has been created. global_step_tensor = tf.compat.v1.train.get_global_step(g) tf.compat.v1.train.assert_global_step(global_step_tensor) # Extract values to return in the EstimatorSpec. output_map = dict(zip(output_tensor_names, output_tensors)) outputs = { key: output_map[value.name] for key, value in six.iteritems(signature_def.outputs) } loss, predictions, metrics = _validate_and_extract_outputs( mode, outputs, signature_def.method_name) train_op = tf.compat.v1.get_collection(constants.TRAIN_OP_KEY) if len(train_op) > 1: raise RuntimeError('Multiple ops found in the train_op collection.') train_op = None if not train_op else train_op[0] _clear_saved_model_collections() return model_fn_lib.EstimatorSpec( scaffold=scaffold, mode=mode, loss=loss, train_op=train_op, predictions=predictions, eval_metric_ops=metrics) def _restore_from_saver(self, scaffold, session): return scaffold.saver.restore(session, _get_saved_model_ckpt(self.saved_model_dir)) def latest_checkpoint(self): """Returns the filename of the latest saved checkpoint. Returns: Filename of latest checkpoint in `model_dir`. If no checkpoints are found in `model_dir`, then the path to the SavedModel checkpoint is returned. """ return (super(SavedModelEstimator, self).latest_checkpoint() or _get_saved_model_ckpt(self.saved_model_dir)) def _get_saved_model_ckpt(saved_model_dir): """Return path to variables checkpoint in a `SavedModel` directory.""" if not tf.compat.v1.gfile.Exists( os.path.join( path_helpers.get_variables_dir(saved_model_dir), tf.compat.as_text('variables.index'))): raise ValueError('Directory provided has an invalid SavedModel format: %s' % saved_model_dir) return path_helpers.get_variables_path(saved_model_dir) def _clear_saved_model_collections(): """Clear collections that are expected empty when exporting a SavedModel. The SavedModel builder uses these collections to track ops necessary to restore the graph state. These collections are expected to be empty before MetaGraphs are added to the builder. """ del tf.compat.v1.get_collection_ref(tf.saved_model.ASSETS_KEY)[:] del tf.compat.v1.get_collection_ref( tf.compat.v1.saved_model.LEGACY_INIT_OP_KEY)[:] del tf.compat.v1.get_collection_ref(tf.compat.v1.saved_model.MAIN_OP_KEY)[:] del tf.compat.v1.get_collection_ref(constants.TRAIN_OP_KEY)[:] def _generate_input_map(signature_def, features, labels): """Return dict mapping an input tensor name to a feature or label tensor. Args: signature_def: SignatureDef loaded from SavedModel features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or `SparseTensor`, specifying the features to be passed to the model. labels: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or `SparseTensor`, specifying the labels to be passed to the model. May be `None`. Returns: dict mapping string names of inputs to features or labels tensors Raises: ValueError: if SignatureDef inputs are not completely mapped by the input features and labels. """ # Ensure that features and labels are dictionaries. If not, convert each to # a dictionary with a single item. The default keys are different for features # and labels. features = export_lib.wrap_and_check_input_tensors(features, 'feature') if labels is not None: # Unlike features, labels may be None (in prediction mode) labels = export_lib.wrap_and_check_input_tensors(labels, 'label') inputs = signature_def.inputs input_map = {} for key, tensor_info in six.iteritems(inputs): input_name = tensor_info.name if ':' in input_name: input_name = input_name[:input_name.find(':')] # When tensors are used as control inputs for operations, their names are # prepended with a '^' character in the GraphDef. To handle possible control # flow edge cases, control input names must be included in the input map. control_dependency_name = '^' + input_name if key in features: _check_same_dtype_and_shape(features[key], tensor_info, key) input_map[input_name] = input_map[control_dependency_name] = features[key] elif labels is not None and key in labels: _check_same_dtype_and_shape(labels[key], tensor_info, key) input_map[input_name] = input_map[control_dependency_name] = labels[key] else: raise ValueError( 'Key \"%s\" not found in features or labels passed in to the model ' 'function. All required keys: %s' % (key, inputs.keys())) return input_map def _check_same_dtype_and_shape(tensor, tensor_info, name): """Validate that tensor has the same properties as the TensorInfo proto. Args: tensor: a `Tensor` object. tensor_info: a `TensorInfo` proto. name: Name of the input (to identify Tensor if an error is raised). Raises: ValueError: If the tensor shape or dtype don't match the TensorInfo """ dtype_error = (tensor.dtype != tf.dtypes.DType(tensor_info.dtype)) shape_error = not tensor.shape.is_compatible_with(tensor_info.tensor_shape) if dtype_error or shape_error: msg = 'Tensor shape and/or dtype validation failed for input %s:' % name if dtype_error: msg += ('\n\tExpected dtype: %s, Got: %s' % (tf.dtypes.DType(tensor_info.dtype), tensor.dtype)) if shape_error: msg += ('\n\tExpected shape: %s, Got: %s' % (tf.TensorShape(tensor_info.tensor_shape), tensor.shape)) raise ValueError(msg) def _extract_eval_metrics(output_dict): """Return a eval metric dict extracted from the output_dict. Eval metrics consist of a value tensor and an update op. Both must be in the passed-in tensor dictionary for an eval metric to be added to the returned dictionary. Args: output_dict: a dict that maps strings to tensors. Returns: dict mapping strings to (value, update_op) tuples. """ # pylint: disable=protected-access metric_ops = {} separator_char = export_lib._SupervisedOutput._SEPARATOR_CHAR for key, tensor in six.iteritems(output_dict): split_key = key.split(separator_char) # The metric name may contain the separator character, so recreate its name. metric_name = separator_char.join(split_key[:-1]) if split_key[0] == export_lib._SupervisedOutput.METRICS_NAME: # If the key ends with the value suffix, and there is a corresponding # key ending with the update_op suffix, then add tensors to metrics dict. if split_key[-1] == export_lib._SupervisedOutput.METRIC_VALUE_SUFFIX: update_op = ''.join([ metric_name, separator_char, export_lib._SupervisedOutput.METRIC_UPDATE_SUFFIX ]) if update_op in output_dict: update_op_tensor = output_dict[update_op] metric_ops[metric_name] = (tensor, update_op_tensor) # pylint: enable=protected-access return metric_ops def _validate_and_extract_outputs(mode, output_dict, method_name): """Extract values from SignatureDef output dictionary. Args: mode: One of the modes enumerated in `tf.estimator.ModeKeys`. output_dict: dict of string SignatureDef keys to `Tensor`. method_name: Method name of the SignatureDef as a string. Returns: Tuple of ( loss: `Tensor` object, predictions: dictionary mapping string keys to `Tensor` objects, metrics: dictionary mapping string keys to a tuple of two `Tensor` objects ) Raises: RuntimeError: raised if SignatureDef has an invalid method name for the mode """ # pylint: disable=protected-access loss, predictions, metrics = None, None, None if mode == ModeKeys.PREDICT: predictions = output_dict else: # Validate that the SignatureDef's method name matches the expected name for # the given mode. expected_method_name = signature_constants.SUPERVISED_TRAIN_METHOD_NAME if mode == ModeKeys.EVAL: expected_method_name = signature_constants.SUPERVISED_EVAL_METHOD_NAME if method_name != expected_method_name: raise RuntimeError( 'Invalid SignatureDef method name for mode %s.\n\tExpected: %s\n\t' 'Got: %s\nPlease ensure that the SavedModel was exported with ' '`tf.estimator.experimental_export_all_saved_models()`.' % (mode, expected_method_name, method_name)) # Extract loss, metrics and predictions from the output dict. loss = output_dict[export_lib._SupervisedOutput.LOSS_NAME] metrics = _extract_eval_metrics(output_dict) predictions = { key: value for key, value in six.iteritems(output_dict) if key.split(export_lib._SupervisedOutput._SEPARATOR_CHAR)[0] == ( export_lib._SupervisedOutput.PREDICTIONS_NAME) } # pylint: enable=protected-access return loss, predictions, metrics ================================================ FILE: tensorflow_estimator/python/estimator/canned/saved_model_estimator_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for SavedModelEstimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import shutil import tempfile import six import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.framework.ops import add_to_collection from tensorflow.python.framework.ops import GraphKeys from tensorflow.python.ops import lookup_ops from tensorflow.python.training import saver_test_utils from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.canned import saved_model_estimator from tensorflow_estimator.python.estimator.export import export_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys def dummy_input_fn(): return tf.compat.v1.data.Dataset.from_tensors(({ 'x': tf.constant([[1], [-2]], name='feature_x') }, tf.constant([[4], [-3]], name='truth'))).repeat() def _serving_feature_dict(): return {'x': tf.constant([[5], [6]], name='feature_x')} def dummy_input_fn_features_only(): return tf.compat.v1.data.Dataset.from_tensors( _serving_feature_dict()).repeat() def dummy_supervised_receiver_fn(): return export_lib.build_supervised_input_receiver_fn_from_input_fn( dummy_input_fn) def dummy_serving_receiver_fn(): return export_lib.build_raw_serving_input_receiver_fn(_serving_feature_dict()) def model_fn_diff_modes(features, labels, mode): _, _ = features, labels v = tf.Variable(21, name='some_var') train_op = None loss = tf.constant(104) if mode == ModeKeys.TRAIN: loss = tf.constant(105) predictions = tf.constant([501]) train_op = tf.group( tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1), tf.compat.v1.assign_add(v, 3)) elif mode == ModeKeys.EVAL: loss = tf.constant(106) predictions = tf.constant([502]) else: loss = tf.constant(107) predictions = tf.constant([503]) return model_fn_lib.EstimatorSpec( mode, loss=loss, train_op=train_op, eval_metric_ops={ 'abs_err': tf.compat.v1.metrics.mean_absolute_error( tf.constant(0), predictions) }, predictions=predictions) def model_fn_with_trackable(features, labels, mode): spec = model_fn_diff_modes(features, labels, mode) predictions = spec.predictions trackable_variable_ = saver_test_utils.CheckpointedOp(name='v2') if mode == ModeKeys.TRAIN: init_op = trackable_variable_.insert('key1', 2.2) add_to_collection(GraphKeys.TABLE_INITIALIZERS, init_op) else: looked_up = trackable_variable_.lookup('key1', 0.0) predictions = tf.constant([503.0]) + looked_up return model_fn_lib.EstimatorSpec( mode, loss=spec.loss, train_op=spec.train_op, eval_metric_ops=spec.eval_metric_ops, predictions=predictions) @test_util.run_v1_only('b/122480158') class SavedModelEstimatorTest(tf.test.TestCase): def setUp(self): super(SavedModelEstimatorTest, self).setUp() self.tmpdirs = [] def tearDown(self): for tmpdir in self.tmpdirs: # gfile.DeleteRecursively fails in the windows cmake test, so use shutil. shutil.rmtree(tmpdir, ignore_errors=True) self.tmpdirs = [] super(SavedModelEstimatorTest, self).tearDown() def _get_tmp_dir(self): tmpdir = tempfile.mkdtemp() self.tmpdirs.append(tmpdir) return tmpdir def _export_estimator(self, train=True, evaluate=True, predict=True, model_fn=model_fn_diff_modes): est = estimator.Estimator(model_fn, self._get_tmp_dir()) est.train(input_fn=dummy_input_fn, steps=10) input_receiver_fn_map = {} if train: input_receiver_fn_map[ModeKeys.TRAIN] = (dummy_supervised_receiver_fn()) if evaluate: input_receiver_fn_map[ModeKeys.EVAL] = (dummy_supervised_receiver_fn()) if predict: input_receiver_fn_map[ModeKeys.PREDICT] = (dummy_serving_receiver_fn()) export_base_path = self._get_tmp_dir() export_dir = est.experimental_export_all_saved_models( export_base_path, input_receiver_fn_map) return export_dir def test_load_all_modes(self): sme = saved_model_estimator.SavedModelEstimator(self._export_estimator(), self._get_tmp_dir()) sme.train(input_fn=dummy_input_fn, steps=1) sme.train(input_fn=dummy_input_fn, steps=2) self.assertEqual(13, sme.get_variable_value('global_step')) self.assertEqual(60, sme.get_variable_value('some_var')) eval_results = sme.evaluate(dummy_input_fn, steps=5) self.assertEqual(13, eval_results['global_step']) self.assertEqual(106, eval_results['loss']) self.assertEqual(502, eval_results['metrics/abs_err']) predictions = next(sme.predict(dummy_input_fn_features_only)) self.assertDictEqual({'output': 503}, predictions) def test_load_all_modes_no_train(self): """Ensure that all functions can be used without requiring a ckpt.""" sme = saved_model_estimator.SavedModelEstimator(self._export_estimator(), self._get_tmp_dir()) eval_results = sme.evaluate(dummy_input_fn, steps=5) self.assertEqual(10, eval_results['global_step']) self.assertEqual(106, eval_results['loss']) self.assertEqual(502, eval_results['metrics/abs_err']) predictions = next(sme.predict(dummy_input_fn_features_only)) self.assertDictEqual({'output': 503}, predictions) def test_partial_exported_estimator(self): sme1 = saved_model_estimator.SavedModelEstimator( self._export_estimator(train=False, predict=False), self._get_tmp_dir()) sme1.evaluate(dummy_input_fn, steps=5) with self.assertRaisesRegexp(RuntimeError, 'train mode is not available'): sme1.train(input_fn=dummy_input_fn, steps=1) with self.assertRaisesRegexp(RuntimeError, 'infer mode is not available'): next(sme1.predict(dummy_input_fn_features_only)) sme2 = saved_model_estimator.SavedModelEstimator( self._export_estimator(evaluate=False), self._get_tmp_dir()) sme2.train(input_fn=dummy_input_fn, steps=1) next(sme2.predict(dummy_input_fn_features_only)) with self.assertRaisesRegexp(RuntimeError, 'eval mode is not available'): sme2.evaluate(dummy_input_fn, steps=5) def test_with_incorrect_input(self): sme = saved_model_estimator.SavedModelEstimator(self._export_estimator(), self._get_tmp_dir()) def bad_shape_input_fn(): return tf.compat.v1.data.Dataset.from_tensors(({ 'x': tf.constant([1, 2], dtype=tf.dtypes.int64) }, tf.constant([1, 2], dtype=tf.dtypes.float32))) with self.assertRaisesRegexp(ValueError, 'Expected shape'): sme.train(bad_shape_input_fn, steps=1) def bad_dtype_input_fn(): return tf.compat.v1.data.Dataset.from_tensors(({ 'x': tf.constant([[1], [1]], dtype=tf.dtypes.int32) }, tf.constant([[1], [1]], dtype=tf.dtypes.int64))) with self.assertRaisesRegexp(ValueError, 'Expected dtype'): sme.train(bad_dtype_input_fn, steps=1) def test_input_fn_with_global_step(self): sme = saved_model_estimator.SavedModelEstimator(self._export_estimator(), self._get_tmp_dir()) def bad_input_fn(): tf.compat.v1.train.get_or_create_global_step() return tf.compat.v1.data.Dataset.from_tensors(({ 'x': tf.constant([[1], [1]], dtype=tf.dtypes.int64) }, tf.constant([[1], [1]], dtype=tf.dtypes.float32))) with self.assertRaisesRegexp(RuntimeError, 'Graph must not contain a global step tensor'): sme.train(bad_input_fn, steps=1) def test_re_export_saved_model_serving_only(self): sme = saved_model_estimator.SavedModelEstimator(self._export_estimator(), self._get_tmp_dir()) sme.train(dummy_input_fn, steps=3) self.assertEqual(13, sme.get_variable_value('global_step')) self.assertEqual(60, sme.get_variable_value('some_var')) predictions = next(sme.predict(dummy_input_fn_features_only)) self.assertDictEqual({'output': 503}, predictions) # Export SavedModel, and test that the variable and prediction values are # the same. sme_export_dir = sme.export_saved_model(self._get_tmp_dir(), dummy_serving_receiver_fn()) sme2 = saved_model_estimator.SavedModelEstimator(sme_export_dir, self._get_tmp_dir()) self.assertEqual(60, sme.get_variable_value('some_var')) self.assertEqual(13, sme.get_variable_value('global_step')) predictions = next(sme2.predict(dummy_input_fn_features_only)) self.assertDictEqual({'output': 503}, predictions) def test_re_export_saved_model(self): sme = saved_model_estimator.SavedModelEstimator(self._export_estimator(), self._get_tmp_dir()) self.assertDictEqual( { 'loss': 106, 'metrics/abs_err': 502, 'global_step': 10 }, sme.evaluate(dummy_input_fn, steps=1)) sme.train(dummy_input_fn, steps=3) self.assertDictEqual( { 'loss': 106, 'metrics/abs_err': 502, 'global_step': 13 }, sme.evaluate(dummy_input_fn, steps=1)) self.assertEqual(60, sme.get_variable_value('some_var')) predictions = next(sme.predict(dummy_input_fn_features_only)) self.assertDictEqual({'output': 503}, predictions) # Export SavedModel for all modes input_receiver_fn_map = { ModeKeys.TRAIN: dummy_supervised_receiver_fn(), ModeKeys.EVAL: dummy_supervised_receiver_fn(), ModeKeys.PREDICT: dummy_serving_receiver_fn() } sme_export_dir = sme.experimental_export_all_saved_models( self._get_tmp_dir(), input_receiver_fn_map) sme2 = saved_model_estimator.SavedModelEstimator(sme_export_dir, self._get_tmp_dir()) self.assertDictEqual( { 'loss': 106, 'metrics/abs_err': 502, 'global_step': 13 }, sme.evaluate(dummy_input_fn, steps=1)) self.assertEqual(60, sme.get_variable_value('some_var')) sme.train(dummy_input_fn, steps=7) self.assertEqual(20, sme.get_variable_value('global_step')) predictions = next(sme2.predict(dummy_input_fn_features_only)) self.assertDictEqual({'output': 503}, predictions) def test_re_export_saved_model_with_trackable(self): sme = saved_model_estimator.SavedModelEstimator( self._export_estimator(model_fn=model_fn_with_trackable), self._get_tmp_dir()) self.assertDictEqual( { 'loss': 106, 'metrics/abs_err': 502, 'global_step': 10 }, sme.evaluate(dummy_input_fn, steps=1)) sme.train(dummy_input_fn, steps=3) self.assertDictEqual( { 'loss': 106, 'metrics/abs_err': 502, 'global_step': 13 }, sme.evaluate(dummy_input_fn, steps=1)) self.assertEqual(60, sme.get_variable_value('some_var')) predictions = next(sme.predict(dummy_input_fn_features_only)) self.assertIn('output', predictions) self.assertAlmostEqual(505.2, predictions['output'], places=4) # Export SavedModel for all modes input_receiver_fn_map = { ModeKeys.TRAIN: dummy_supervised_receiver_fn(), ModeKeys.EVAL: dummy_supervised_receiver_fn(), ModeKeys.PREDICT: dummy_serving_receiver_fn() } sme_export_dir = sme.experimental_export_all_saved_models( self._get_tmp_dir(), input_receiver_fn_map) sme2 = saved_model_estimator.SavedModelEstimator(sme_export_dir, self._get_tmp_dir()) sme2.train(dummy_input_fn, steps=7) self.assertEqual(20, sme2.get_variable_value('global_step')) self.assertEqual( 81, # 81 = 60 (last value) + 3 (step) * 7 (steps) sme2.get_variable_value('some_var')) predictions = next(sme2.predict(dummy_input_fn_features_only)) self.assertIn('output', predictions) self.assertAlmostEqual(505.2, predictions['output'], places=4) def test_load_saved_model_from_serving_only(self): def model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant([103]), train_op=tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([502]), export_outputs={ 'test': export_lib.ClassificationOutput(tf.constant([[32.]])) }) est = estimator.Estimator(model_fn, self._get_tmp_dir()) est.train(input_fn=dummy_input_fn, steps=10) def serving_input_receiver_fn(): return export_lib.ServingInputReceiver( {'test-features': tf.constant([[1], [1]])}, tf.compat.v1.placeholder(dtype=tf.dtypes.string)) export_dir = est.export_saved_model(self._get_tmp_dir(), serving_input_receiver_fn) sme = saved_model_estimator.SavedModelEstimator(export_dir, self._get_tmp_dir()) def input_fn(): return {'inputs': tf.constant('someinputstr')} prediction = next(sme.predict(input_fn)) self.assertDictEqual({'scores': 32}, prediction) def test_with_local_init_op(self): def model_fn(features, labels, mode): _, _ = features, labels v = tf.Variable(21, name='some_var') scaffold = tf.compat.v1.train.Scaffold( local_init_op=tf.compat.v1.assign_add(v, -3).op) return model_fn_lib.EstimatorSpec( mode, scaffold=scaffold, train_op=tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1), loss=tf.identity(v)) export_dir = self._export_estimator(predict=False, model_fn=model_fn) sme = saved_model_estimator.SavedModelEstimator(export_dir, self._get_tmp_dir()) eval_results1 = sme.evaluate(dummy_input_fn, steps=2) self.assertEqual(15, eval_results1['loss']) sme.train(dummy_input_fn, steps=1) self.assertEqual(15, sme.get_variable_value('some_var')) eval_results2 = sme.evaluate(dummy_input_fn, steps=5) self.assertEqual(12, eval_results2['loss']) def test_with_assets(self): filename = 'test_asset' tmpdir = tempfile.mkdtemp() absolute_filepath = os.path.join(tmpdir, filename) num_buckets = 1000 with open(absolute_filepath, 'w') as f: f.write(six.ensure_str(b'test')) def model_fn(features, labels, mode): _, _ = features, labels v = tf.Variable(0, name='some_var', dtype=tf.dtypes.int64) # We verify the value of filepath_tensor is replaced with a path to the # saved model's assets directory by assigning a hash of filepath_tensor # to some_var. filepath_tensor = ops.convert_to_tensor(absolute_filepath) tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.ASSET_FILEPATHS, filepath_tensor) scaffold = tf.compat.v1.train.Scaffold( local_init_op=tf.compat.v1.assign( v, tf.strings.to_hash_bucket_fast(filepath_tensor, num_buckets)).op) return model_fn_lib.EstimatorSpec( mode, scaffold=scaffold, train_op=tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1), loss=tf.identity(0)) export_dir = self._export_estimator(predict=False, model_fn=model_fn) sme = saved_model_estimator.SavedModelEstimator(export_dir, self._get_tmp_dir()) with self.session() as sess: expected_bucket = sess.run( tf.strings.to_hash_bucket_fast( os.path.join( six.ensure_str(export_dir), six.ensure_str(tf.saved_model.ASSETS_DIRECTORY), six.ensure_str(filename)), num_buckets)) sme.train(dummy_input_fn, steps=1) self.assertEqual(expected_bucket, sme.get_variable_value('some_var')) def test_with_working_input_fn(self): def model_fn(features, labels, mode): loss = None if labels is not None: loss = labels[0][0] + labels[1][0] return model_fn_lib.EstimatorSpec( mode, loss=loss, train_op=tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions={ 'features_0': tf.identity([features['x'][0][0]]), 'features_1': tf.identity([features['x'][1][0]]) }) sme = saved_model_estimator.SavedModelEstimator( self._export_estimator(model_fn=model_fn), self._get_tmp_dir()) eval_results = sme.evaluate(dummy_input_fn, steps=1) self.assertEqual(1, eval_results['loss']) predictions = next(sme.predict(dummy_input_fn_features_only)) self.assertDictEqual({'features_0': 5, 'features_1': 6}, predictions) def test_control_dependency(self): # Control dependencies are saved with "^" appended to the start of the input # name. The input map must include control dependencies as well. def model_fn(features, labels, mode): _ = labels with tf.control_dependencies([features['x']]): loss = features['x'][1][0] return model_fn_lib.EstimatorSpec( mode, loss=loss, train_op=tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1)) sme = saved_model_estimator.SavedModelEstimator( self._export_estimator(train=False, predict=False, model_fn=model_fn), self._get_tmp_dir()) sme.evaluate(dummy_input_fn, steps=1) # Should run without error def test_saveable_resources(self): def model_fn(features, labels, mode): tb = lookup_ops.MutableHashTable( key_dtype=tf.dtypes.int32, value_dtype=tf.dtypes.int32, default_value=-1) predictions = tb.lookup(features['x']) train_op = None if mode == ModeKeys.TRAIN: train_op = tf.group( tb.insert(features['x'], labels), tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1)) return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0), predictions=predictions, train_op=train_op) # Trains the model so that the table maps 1 -> 4, and -2 -> -3 # (see dummy_input_fn) sme = saved_model_estimator.SavedModelEstimator( self._export_estimator(model_fn=model_fn), self._get_tmp_dir()) def gen_input_fn(features, labels=None): def fn(): if labels: t = ({ 'x': tf.constant(features, name='feature_x') }, tf.constant(labels, name='truth')) else: t = {'x': tf.constant(features, name='feature_x')} return tf.compat.v1.data.Dataset.from_tensors(t).repeat() return fn self.assertAllEqual([-1], next(sme.predict(gen_input_fn([[5]])))['output']) self.assertAllEqual([4], next(sme.predict(gen_input_fn([[1]])))['output']) sme.train(gen_input_fn([[5]], [[6]]), steps=1) self.assertAllEqual([6], next(sme.predict(gen_input_fn([[5]])))['output']) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/testdata/wire_vocabulary.txt ================================================ omar stringer marlo ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/BUILD ================================================ # Placeholder: load py_library load("//tensorflow_estimator:estimator.bzl", "py_test") package(default_visibility = ["//tensorflow_estimator:__subpackages__"]) licenses(["notice"]) py_library( name = "feature_keys", srcs = [ "feature_keys.py", ], srcs_version = "PY3", deps = ["//tensorflow_estimator/python/estimator:expect_tensorflow_installed"], ) py_library( name = "saved_model_utils", srcs = [ "saved_model_utils.py", ], srcs_version = "PY3", deps = [ ":feature_keys", ":head", ":model_utils", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "model", srcs = [ "model.py", ], srcs_version = "PY3", deps = [ ":feature_keys", ":math_utils", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "estimators", srcs = [ "estimators.py", ], srcs_version = "PY3", deps = [ ":ar_model", ":feature_keys", ":head", ":math_utils", ":saved_model_utils", ":state_management", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "estimators_test", srcs = [ "estimators_test.py", ], python_version = "PY3", srcs_version = "PY3", tags = [ "notap", # TODO(b/132129465): Re-enable. ], deps = [ ":ar_model", ":estimators", ":feature_keys", ":saved_model_utils", "//tensorflow_estimator/python/estimator:estimator_py", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "head", srcs = [ "head.py", ], srcs_version = "PY3", deps = [ ":feature_keys", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "head_test", srcs = [ "head_test.py", ], python_version = "PY3", shard_count = 4, srcs_version = "PY3", deps = [ ":estimators", ":feature_keys", ":head", ":model", ":state_management", "//tensorflow_estimator/python/estimator:estimator_py", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "model_utils", srcs = [ "model_utils.py", ], srcs_version = "PY3", deps = [ ":feature_keys", "//tensorflow_estimator/python/estimator:expect_numpy_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "state_management", srcs = [ "state_management.py", ], srcs_version = "PY3", deps = [ ":feature_keys", ":math_utils", ":model", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "ar_model", srcs = [ "ar_model.py", ], srcs_version = "PY3", deps = [ ":feature_keys", ":model", ":model_utils", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "ar_model_test", srcs = [ "ar_model_test.py", ], python_version = "PY3", shard_count = 4, srcs_version = "PY3", deps = [ ":ar_model", ":estimators", ":feature_keys", "//tensorflow_estimator/python/estimator:estimator_py", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "ar_model_training_test", srcs = [ "ar_model_training_test.py", ], python_version = "PY3", shard_count = 4, srcs_version = "PY3", deps = [ ":ar_model", ":estimators", ":feature_keys", "//tensorflow_estimator/python/estimator:estimator_py", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_library( name = "math_utils", srcs = [ "math_utils.py", ], srcs_version = "PY3", deps = [ ":feature_keys", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "math_utils_test", srcs = [ "math_utils_test.py", ], python_version = "PY3", srcs_version = "PY3", deps = [ ":feature_keys", ":math_utils", "//tensorflow_estimator/python/estimator:expect_proto_cpp_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/ar_model.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Auto-Regressive models for time series data.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import distributions from tensorflow.python.ops import gen_math_ops from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned.timeseries import model from tensorflow_estimator.python.estimator.canned.timeseries import model_utils from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import PredictionFeatures from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import TrainEvalFeatures class LSTMPredictionModel(tf_keras.models.Model): """A simple encoder/decoder model using an LSTM. This model does not operate on its own, but rather is a plugin to `ARModel`. See `ARModel`'s constructor documentation (`prediction_model_factory`) for a usage example. """ def __init__(self, num_features, input_window_size, output_window_size, num_units=128): """Construct the LSTM prediction model. Args: num_features: number of input features per time step. input_window_size: Number of past time steps of data to look at when doing the regression. output_window_size: Number of future time steps to predict. Note that setting it to > 1 empirically seems to give a better fit. num_units: The number of units in the encoder and decoder LSTM cells. """ super(LSTMPredictionModel, self).__init__() self._encoder = tf_keras.layers.LSTM( num_units, name="encoder", dtype=self.dtype, return_state=True) self._decoder = tf_keras.layers.LSTM( num_units, name="decoder", dtype=self.dtype, return_sequences=True) self._mean_transform = tf_keras.layers.Dense(num_features, name="mean_transform") self._covariance_transform = tf_keras.layers.Dense( num_features, name="covariance_transform") def call(self, input_window_features, output_window_features): """Compute predictions from input and output windows.""" _, state_h, state_c = self._encoder(input_window_features) encoder_states = [state_h, state_c] decoder_output = self._decoder( output_window_features, initial_state=encoder_states) predicted_mean = self._mean_transform(decoder_output) predicted_covariance = gen_math_ops.exp( self._covariance_transform(decoder_output)) return {"mean": predicted_mean, "covariance": predicted_covariance} class ARModel(model.TimeSeriesModel): """Auto-regressive model, both linear and non-linear. Features to the model include time and values of input_window_size timesteps, and times for output_window_size timesteps. These are passed through a configurable prediction model, and then fed to a loss function (e.g. squared loss). Note that this class can also be used to regress against time only by setting the input_window_size to zero. Each periodicity in the `periodicities` arg is divided by the `num_time_buckets` into time buckets that are represented as features added to the model. A good heuristic for picking an appropriate periodicity for a given data set would be the length of cycles in the data. For example, energy usage in a home is typically cyclic each day. If the time feature in a home energy usage dataset is in the unit of hours, then 24 would be an appropriate periodicity. Similarly, a good heuristic for `num_time_buckets` is how often the data is expected to change within the cycle. For the aforementioned home energy usage dataset and periodicity of 24, then 48 would be a reasonable value if usage is expected to change every half hour. Each feature's value for a given example with time t is the difference between t and the start of the time bucket it falls under. If it doesn't fall under a feature's associated time bucket, then that feature's value is zero. For example: if `periodicities` = (9, 12) and `num_time_buckets` = 3, then 6 features would be added to the model, 3 for periodicity 9 and 3 for periodicity 12. For an example data point where t = 17: - It's in the 3rd time bucket for periodicity 9 (2nd period is 9-18 and 3rd time bucket is 15-18) - It's in the 2nd time bucket for periodicity 12 (2nd period is 12-24 and 2nd time bucket is between 16-20). Therefore the 6 added features for this row with t = 17 would be: # Feature name (periodicity#_timebucket#), feature value P9_T1, 0 # not in first time bucket P9_T2, 0 # not in second time bucket P9_T3, 2 # 17 - 15 since 15 is the start of the 3rd time bucket P12_T1, 0 # not in first time bucket P12_T2, 1 # 17 - 16 since 16 is the start of the 2nd time bucket P12_T3, 0 # not in third time bucket """ SQUARED_LOSS = "squared_loss" NORMAL_LIKELIHOOD_LOSS = "normal_likelihood_loss" def __init__(self, periodicities, input_window_size, output_window_size, num_features, prediction_model_factory=LSTMPredictionModel, num_time_buckets=10, loss=NORMAL_LIKELIHOOD_LOSS, exogenous_feature_columns=None): """Constructs an auto-regressive model. Args: periodicities: periodicities of the input data, in the same units as the time feature (for example 24 if feeding hourly data with a daily periodicity, or 60 * 24 if feeding minute-level data with daily periodicity). Note this can be a single value or a list of values for multiple periodicities. input_window_size: Number of past time steps of data to look at when doing the regression. output_window_size: Number of future time steps to predict. Note that setting it to > 1 empirically seems to give a better fit. num_features: number of input features per time step. prediction_model_factory: A callable taking arguments `num_features`, `input_window_size`, and `output_window_size` and returning a `tf_keras.Model`. The `Model`'s `call()` takes two arguments: an input window and an output window, and returns a dictionary of predictions. See `LSTMPredictionModel` for an example. The default model computes predictions as a linear function of flattened input and output windows. num_time_buckets: Number of buckets into which to divide (time % periodicity). This value multiplied by the number of periodicities is the number of time features added to the model. loss: Loss function to use for training. Currently supported values are SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For SQUARED_LOSS, the evaluation loss is reported based on un-scaled observations and predictions, while the training loss is computed on normalized data (if input statistics are available). exogenous_feature_columns: A list of `tf.feature_column`s (for example `tf.feature_column.embedding_column`) corresponding to features which provide extra information to the model but are not part of the series to be predicted. Example usage: >>> model = ar_model.ARModel( ... periodicities=2, num_features=3, ... prediction_model_factory=functools.partial( ... LSTMPredictionModel, hidden_layer_sizes=[10, 10])) """ self._model_factory = prediction_model_factory self.input_window_size = input_window_size self.output_window_size = output_window_size self.window_size = self.input_window_size + self.output_window_size self.loss = loss super(ARModel, self).__init__( num_features=num_features, exogenous_feature_columns=exogenous_feature_columns) if exogenous_feature_columns is not None: self.exogenous_size = self._get_exogenous_embedding_shape()[-1] else: self.exogenous_size = 0 assert num_time_buckets > 0 self._buckets = int(num_time_buckets) if periodicities is None or not periodicities: periodicities = [] elif (not isinstance(periodicities, list) and not isinstance(periodicities, tuple)): periodicities = [periodicities] self._periodicities = [int(p) for p in periodicities] for p in self._periodicities: assert p > 0 assert len(self._periodicities) or self.input_window_size assert output_window_size > 0 def initialize_graph(self, input_statistics=None): super(ARModel, self).initialize_graph(input_statistics=input_statistics) self._model_scope = tf.compat.v1.variable_scope( # The trailing slash means we strip all enclosing variable_scopes, which # unfortunately is necessary because the model gets called inside and # outside a "while" scope (for prediction and training respectively), # and the variables names need to match. "model/", use_resource=True) self._model_instance = self._model_factory( num_features=self.num_features, input_window_size=self.input_window_size, output_window_size=self.output_window_size) def get_start_state(self): # State which matches the format we'll return later. Typically this will not # be used by the model directly, but the shapes and dtypes should match so # that the serving input_receiver_fn gets placeholder shapes correct. return (tf.zeros([self.input_window_size], dtype=tf.dtypes.int64), tf.zeros([self.input_window_size, self.num_features], dtype=self.dtype), tf.zeros([self.input_window_size, self.exogenous_size], dtype=self.dtype)) # TODO(allenl,agarwal): Support sampling for AR. def random_model_parameters(self, seed=None): pass def generate(self, number_of_series, series_length, model_parameters=None, seed=None): pass def _predicted_covariance_op(self, activations, num_values): activation, activation_size = activations[-1] if self.loss == ARModel.NORMAL_LIKELIHOOD_LOSS: log_sigma_square = model_utils.fully_connected( activation, activation_size, self.output_window_size * num_values, name="log_sigma_square", activation=None) predicted_covariance = gen_math_ops.exp(log_sigma_square) predicted_covariance = tf.reshape( predicted_covariance, [-1, self.output_window_size, num_values]) else: shape = tf.stack([ tf.compat.v1.shape(activation)[0], tf.constant(self.output_window_size), tf.constant(num_values) ]) predicted_covariance = tf.ones(shape=shape, dtype=activation.dtype) return predicted_covariance def _predicted_mean_op(self, activations): activation, activation_size = activations[-1] predicted_mean = model_utils.fully_connected( activation, activation_size, self.output_window_size * self.num_features, name="predicted_mean", activation=None) return tf.reshape(predicted_mean, [-1, self.output_window_size, self.num_features]) def prediction_ops(self, times, values, exogenous_regressors): """Compute model predictions given input data. Args: times: A [batch size, self.window_size] integer Tensor, the first self.input_window_size times in each part of the batch indicating input features, and the last self.output_window_size times indicating prediction times. values: A [batch size, self.input_window_size, self.num_features] Tensor with input features. exogenous_regressors: A [batch size, self.window_size, self.exogenous_size] Tensor with exogenous features. Returns: Tuple (predicted_mean, predicted_covariance), where each element is a Tensor with shape [batch size, self.output_window_size, self.num_features]. """ times.get_shape().assert_is_compatible_with([None, self.window_size]) batch_size = tf.compat.v1.shape(times)[0] if self.input_window_size: values.get_shape().assert_is_compatible_with( [None, self.input_window_size, self.num_features]) if exogenous_regressors is not None: exogenous_regressors.get_shape().assert_is_compatible_with( [None, self.window_size, self.exogenous_size]) # Create input features. input_window_features = [] input_feature_size = 0 output_window_features = [] output_feature_size = 0 if self._periodicities: _, time_features = self._compute_time_features(times) num_time_features = self._buckets * len(self._periodicities) time_features = tf.reshape( time_features, [batch_size, self.window_size, num_time_features]) input_time_features, output_time_features = tf.split( time_features, (self.input_window_size, self.output_window_size), axis=1) input_feature_size += num_time_features output_feature_size += num_time_features input_window_features.append(input_time_features) output_window_features.append(output_time_features) if self.input_window_size: inp = tf.slice(values, [0, 0, 0], [-1, self.input_window_size, -1]) input_window_features.append( tf.reshape(inp, [batch_size, self.input_window_size, self.num_features])) input_feature_size += self.num_features if self.exogenous_size: input_exogenous_features, output_exogenous_features = tf.split( exogenous_regressors, (self.input_window_size, self.output_window_size), axis=1) input_feature_size += self.exogenous_size output_feature_size += self.exogenous_size input_window_features.append(input_exogenous_features) output_window_features.append(output_exogenous_features) assert input_window_features input_window_features = tf.concat(input_window_features, axis=2) if output_window_features: output_window_features = tf.concat(output_window_features, axis=2) else: output_window_features = tf.zeros( [batch_size, self.output_window_size, 0], dtype=self.dtype) static_batch_size = times.get_shape().dims[0].value input_window_features.set_shape( [static_batch_size, self.input_window_size, input_feature_size]) output_window_features.set_shape( [static_batch_size, self.output_window_size, output_feature_size]) return self._output_window_predictions(input_window_features, output_window_features) def _output_window_predictions(self, input_window_features, output_window_features): with self._model_scope: predictions = self._model_instance(input_window_features, output_window_features) result_shape = [None, self.output_window_size, self.num_features] for v in predictions.values(): v.set_shape(result_shape) return predictions def loss_op(self, targets, prediction_ops): """Create loss_op.""" prediction = prediction_ops["mean"] if self.loss == ARModel.NORMAL_LIKELIHOOD_LOSS: covariance = prediction_ops["covariance"] sigma = tf.math.sqrt(tf.math.maximum(covariance, 1e-5)) normal = distributions.normal.Normal(loc=targets, scale=sigma) loss_op = -tf.math.reduce_sum(normal.log_prob(prediction)) else: assert self.loss == ARModel.SQUARED_LOSS, self.loss loss_op = tf.math.reduce_sum(tf.math.square(prediction - targets)) loss_op /= tf.cast( tf.math.reduce_prod(tf.compat.v1.shape(targets)), loss_op.dtype) return loss_op def _process_exogenous_features(self, times, features): embedded = super(ARModel, self)._process_exogenous_features( times=times, features=features) if embedded is None: assert self.exogenous_size == 0 # No embeddings. Return a zero-size [batch, times, 0] array so we don't # have to special case it downstream. return tf.zeros( tf.concat([tf.compat.v1.shape(times), tf.constant([0])], axis=0)) else: return embedded # TODO(allenl, agarwal): Consider better ways of warm-starting predictions. def predict(self, features): """Computes predictions multiple steps into the future. Args: features: A dictionary with the following key/value pairs: PredictionFeatures.TIMES: A [batch size, predict window size] integer Tensor of times, after the window of data indicated by `STATE_TUPLE`, to make predictions for. PredictionFeatures.STATE_TUPLE: A tuple of (times, values), times with shape [batch size, self.input_window_size], values with shape [batch size, self.input_window_size, self.num_features] representing a segment of the time series before `TIMES`. This data is used to start of the autoregressive computation. This should have data for at least self.input_window_size timesteps. And any exogenous features, with shapes prefixed by shape of `TIMES`. Returns: A dictionary with keys, "mean", "covariance". The values are Tensors of shape [batch_size, predict window size, num_features] and correspond to the values passed in `TIMES`. """ if not self._graph_initialized: self.initialize_graph() predict_times = tf.cast( ops.convert_to_tensor(features[PredictionFeatures.TIMES]), tf.dtypes.int32) exogenous_regressors = self._process_exogenous_features( times=predict_times, features={ key: value for key, value in features.items() if key not in [ TrainEvalFeatures.TIMES, TrainEvalFeatures.VALUES, PredictionFeatures.STATE_TUPLE ] }) with tf.control_dependencies([ tf.compat.v1.debugging.assert_equal( tf.compat.v1.shape(predict_times)[1], tf.compat.v1.shape(exogenous_regressors)[1]) ]): exogenous_regressors = tf.identity(exogenous_regressors) batch_size = tf.compat.v1.shape(predict_times)[0] num_predict_values = tf.compat.v1.shape(predict_times)[1] prediction_iterations = ( (num_predict_values + self.output_window_size - 1) // self.output_window_size) # Pad predict_times and exogenous regressors so as to have exact multiple of # self.output_window_size values per example. padding_size = ( prediction_iterations * self.output_window_size - num_predict_values) predict_times = tf.compat.v1.pad(predict_times, [[0, 0], [0, padding_size]]) exogenous_regressors = tf.compat.v1.pad(exogenous_regressors, [[0, 0], [0, padding_size], [0, 0]]) state = features[PredictionFeatures.STATE_TUPLE] (state_times, state_values, state_exogenous_regressors) = state state_times = tf.cast(ops.convert_to_tensor(state_times), tf.dtypes.int32) state_values = ops.convert_to_tensor(state_values, dtype=self.dtype) state_exogenous_regressors = ops.convert_to_tensor( state_exogenous_regressors, dtype=self.dtype) initial_input_times = predict_times[:, :self.output_window_size] initial_input_exogenous_regressors = ( exogenous_regressors[:, :self.output_window_size, :]) if self.input_window_size > 0: initial_input_times = tf.concat( [state_times[:, -self.input_window_size:], initial_input_times], 1) values_size = tf.compat.v1.shape(state_values)[1] times_size = tf.compat.v1.shape(state_times)[1] with tf.control_dependencies([ tf.compat.v1.debugging.assert_greater_equal(values_size, self.input_window_size), tf.compat.v1.debugging.assert_equal(values_size, times_size) ]): initial_input_values = state_values[:, -self.input_window_size:, :] initial_input_exogenous_regressors = tf.concat([ state_exogenous_regressors[:, -self.input_window_size:, :], initial_input_exogenous_regressors[:, :self.output_window_size, :] ], axis=1) else: initial_input_values = 0 # Iterate over the predict_times, predicting self.output_window_size values # in each iteration. def _while_condition(iteration_number, *unused_args): return tf.math.less(iteration_number, prediction_iterations) def _while_body(iteration_number, input_times, input_values, input_exogenous_regressors, mean_ta, covariance_ta): """Predict self.output_window_size values.""" prediction_ops = self.prediction_ops(input_times, input_values, input_exogenous_regressors) predicted_mean = prediction_ops["mean"] predicted_covariance = prediction_ops["covariance"] offset = self.output_window_size * tf.math.minimum( iteration_number + 1, prediction_iterations - 1) if self.input_window_size > 0: if self.output_window_size < self.input_window_size: new_input_values = tf.concat( [input_values[:, self.output_window_size:, :], predicted_mean], 1) new_input_exogenous_regressors = tf.concat([ input_exogenous_regressors[:, -self.input_window_size:, :], exogenous_regressors[ :, offset:offset + self.output_window_size, :] ], axis=1) new_input_times = tf.concat([ input_times[:, -self.input_window_size:], predict_times[:, offset:offset + self.output_window_size] ], 1) else: new_input_values = predicted_mean[:, -self.input_window_size:, :] new_input_exogenous_regressors = exogenous_regressors[ :, offset - self.input_window_size:offset + self.output_window_size, :] new_input_times = predict_times[ :, offset - self.input_window_size:offset + self.output_window_size] else: new_input_values = input_values new_input_exogenous_regressors = exogenous_regressors[ :, offset:offset + self.output_window_size, :] new_input_times = predict_times[:, offset:offset + self.output_window_size] new_input_times.set_shape(initial_input_times.get_shape()) new_input_exogenous_regressors.set_shape( initial_input_exogenous_regressors.get_shape()) new_mean_ta = mean_ta.write(iteration_number, predicted_mean) if isinstance(covariance_ta, tf.TensorArray): new_covariance_ta = covariance_ta.write(iteration_number, predicted_covariance) else: new_covariance_ta = covariance_ta return (iteration_number + 1, new_input_times, new_input_values, new_input_exogenous_regressors, new_mean_ta, new_covariance_ta) # Note that control_flow_ops.while_loop doesn't seem happy with None. Hence # using 0 for cases where we don't want to predict covariance. covariance_ta_init = ( tf.TensorArray(dtype=self.dtype, size=prediction_iterations) if self.loss != ARModel.SQUARED_LOSS else 0.) mean_ta_init = tf.TensorArray(dtype=self.dtype, size=prediction_iterations) _, _, _, _, mean_ta, covariance_ta = tf.compat.v1.while_loop( _while_condition, _while_body, [ 0, initial_input_times, initial_input_values, initial_input_exogenous_regressors, mean_ta_init, covariance_ta_init ]) def _parse_ta(values_ta): """Helper function to parse the returned TensorArrays.""" if not isinstance(values_ta, tf.TensorArray): return None predictions_length = prediction_iterations * self.output_window_size # Shape [prediction_iterations, batch_size, self.output_window_size, # self.num_features] values_packed = values_ta.stack() # Transpose to move batch dimension outside. output_values = tf.reshape( tf.compat.v1.transpose(values_packed, [1, 0, 2, 3]), tf.stack([batch_size, predictions_length, -1])) # Clip to desired size return output_values[:, :num_predict_values, :] predicted_mean = _parse_ta(mean_ta) predicted_covariance = _parse_ta(covariance_ta) if predicted_covariance is None: predicted_covariance = tf.compat.v1.ones_like(predicted_mean) # Transform and scale the mean and covariance appropriately. predicted_mean = self._scale_back_data(predicted_mean) predicted_covariance = self._scale_back_variance(predicted_covariance) return {"mean": predicted_mean, "covariance": predicted_covariance} def _process_window(self, features, mode, exogenous_regressors): """Compute model outputs on a single window of data.""" times = tf.cast(features[TrainEvalFeatures.TIMES], tf.dtypes.int64) values = tf.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype) exogenous_regressors = tf.cast(exogenous_regressors, dtype=self.dtype) original_values = values # Extra shape checking for the window size (above that in # `head.create_estimator_spec`). expected_times_shape = [None, self.window_size] if not times.get_shape().is_compatible_with(expected_times_shape): raise ValueError( ("ARModel with input_window_size={input_window_size} " "and output_window_size={output_window_size} expects " "feature '{times_feature}' to have shape (batch_size, " "{window_size}) (for any batch_size), but got shape {times_shape}. " "If you are using RandomWindowInputFn, set " "window_size={window_size} or adjust the input_window_size and " "output_window_size arguments to ARModel.").format( input_window_size=self.input_window_size, output_window_size=self.output_window_size, times_feature=TrainEvalFeatures.TIMES, window_size=self.window_size, times_shape=times.get_shape())) values = self._scale_data(values) if self.input_window_size > 0: input_values = values[:, :self.input_window_size, :] else: input_values = None prediction_ops = self.prediction_ops(times, input_values, exogenous_regressors) prediction = prediction_ops["mean"] covariance = prediction_ops["covariance"] targets = tf.slice(values, [0, self.input_window_size, 0], [-1, -1, -1]) targets.get_shape().assert_is_compatible_with(prediction.get_shape()) if (mode == estimator_lib.ModeKeys.EVAL and self.loss == ARModel.SQUARED_LOSS): # Report an evaluation loss which matches the expected # (observed - predicted) ** 2. # Note that this affects only evaluation; the training loss is unaffected. loss = self.loss_op( self._scale_back_data(targets), {"mean": self._scale_back_data(prediction_ops["mean"])}) else: loss = self.loss_op(targets, prediction_ops) # Scale back the prediction. prediction = self._scale_back_data(prediction) covariance = self._scale_back_variance(covariance) return model.ModelOutputs( loss=loss, end_state=(times[:, -self.input_window_size:], values[:, -self.input_window_size:, :], exogenous_regressors[:, -self.input_window_size:, :]), predictions={ "mean": prediction, "covariance": covariance, "observed": original_values[:, -self.output_window_size:] }, prediction_times=times[:, -self.output_window_size:]) def get_batch_loss(self, features, mode, state): """Computes predictions and a loss. Args: features: A dictionary (such as is produced by a chunker) with the following key/value pairs (shapes are given as required for training): TrainEvalFeatures.TIMES: A [batch size, self.window_size] integer Tensor with times for each observation. To train on longer sequences, the data should first be chunked. TrainEvalFeatures.VALUES: A [batch size, self.window_size, self.num_features] Tensor with values for each observation. When evaluating, `TIMES` and `VALUES` must have a window size of at least self.window_size, but it may be longer, in which case the last window_size - self.input_window_size times (or fewer if this is not divisible by self.output_window_size) will be evaluated on with non-overlapping output windows (and will have associated predictions). This is primarily to support qualitative evaluation/plotting, and is not a recommended way to compute evaluation losses (since there is no overlap in the output windows, which for window-based models is an undesirable bias). mode: The tf.estimator.ModeKeys mode to use (TRAIN or EVAL). state: Unused Returns: A model.ModelOutputs object. Raises: ValueError: If `mode` is not TRAIN or EVAL, or if static shape information is incorrect. """ features = { feature_name: ops.convert_to_tensor(feature_value) for feature_name, feature_value in features.items() } times = features[TrainEvalFeatures.TIMES] exogenous_regressors = self._process_exogenous_features( times=times, features={ key: value for key, value in features.items() if key not in [ TrainEvalFeatures.TIMES, TrainEvalFeatures.VALUES, PredictionFeatures.STATE_TUPLE ] }) if mode == estimator_lib.ModeKeys.TRAIN: # For training, we require the window size to be self.window_size as # iterating sequentially on larger windows could introduce a bias. return self._process_window( features, mode=mode, exogenous_regressors=exogenous_regressors) elif mode == estimator_lib.ModeKeys.EVAL: # For evaluation, we allow the user to pass in a larger window, in which # case we try to cover as much of the window as possible without # overlap. Quantitative evaluation is more efficient/correct with fixed # windows matching self.window_size (as with training), but this looping # allows easy plotting of "in-sample" predictions. times.get_shape().assert_has_rank(2) static_window_size = times.get_shape().dims[1].value if (static_window_size is not None and static_window_size < self.window_size): raise ValueError( ("ARModel requires a window of at least input_window_size + " "output_window_size to evaluate on (input_window_size={}, " "output_window_size={}, and got shape {} for feature '{}' (batch " "size, window size)).").format(self.input_window_size, self.output_window_size, times.get_shape(), TrainEvalFeatures.TIMES)) num_iterations = ( (tf.compat.v1.shape(times)[1] - self.input_window_size) // self.output_window_size) output_size = num_iterations * self.output_window_size # Rather than dealing with overlapping windows of output, discard a bit at # the beginning if output windows don't cover evenly. crop_length = output_size + self.input_window_size features = { feature_name: feature_value[:, -crop_length:] for feature_name, feature_value in features.items() } # Note that, unlike the ARModel's predict() while_loop, each iteration # here can run in parallel, since we are not feeding predictions or state # from previous iterations. def _while_condition(iteration_number, loss_ta, mean_ta, covariance_ta): del loss_ta, mean_ta, covariance_ta # unused return iteration_number < num_iterations def _while_body(iteration_number, loss_ta, mean_ta, covariance_ta): """Perform a processing step on a single window of data.""" base_offset = iteration_number * self.output_window_size model_outputs = self._process_window( features={ feature_name: feature_value[:, base_offset:base_offset + self.window_size] for feature_name, feature_value in features.items() }, mode=mode, exogenous_regressors=exogenous_regressors[:, base_offset:base_offset + self.window_size]) # This code needs to be updated if new predictions are added in # self._process_window assert len(model_outputs.predictions) == 3 assert "mean" in model_outputs.predictions assert "covariance" in model_outputs.predictions assert "observed" in model_outputs.predictions return (iteration_number + 1, loss_ta.write(iteration_number, model_outputs.loss), mean_ta.write(iteration_number, model_outputs.predictions["mean"]), covariance_ta.write(iteration_number, model_outputs.predictions["covariance"])) _, loss_ta, mean_ta, covariance_ta = tf.compat.v1.while_loop( _while_condition, _while_body, [ 0, tf.TensorArray(dtype=self.dtype, size=num_iterations), tf.TensorArray(dtype=self.dtype, size=num_iterations), tf.TensorArray(dtype=self.dtype, size=num_iterations) ]) values = tf.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype) batch_size = tf.compat.v1.shape(times)[0] prediction_shape = [ batch_size, self.output_window_size * num_iterations, self.num_features ] (previous_state_times, previous_state_values, previous_state_exogenous_regressors) = state # Make sure returned state always has windows of self.input_window_size, # even if we were passed fewer than self.input_window_size points this # time. if self.input_window_size > 0: new_state_times = tf.concat( [previous_state_times, tf.cast(times, dtype=tf.dtypes.int64)], axis=1)[:, -self.input_window_size:] new_state_times.set_shape((None, self.input_window_size)) new_state_values = tf.concat( [previous_state_values, self._scale_data(values)], axis=1)[:, -self.input_window_size:, :] new_state_values.set_shape( (None, self.input_window_size, self.num_features)) new_exogenous_regressors = tf.concat( [previous_state_exogenous_regressors, exogenous_regressors], axis=1)[:, -self.input_window_size:, :] new_exogenous_regressors.set_shape( (None, self.input_window_size, self.exogenous_size)) else: # There is no state to keep, and the strided slices above do not handle # input_window_size=0. new_state_times = previous_state_times new_state_values = previous_state_values new_exogenous_regressors = previous_state_exogenous_regressors return model.ModelOutputs( loss=tf.math.reduce_mean(loss_ta.stack(), axis=0), end_state=(new_state_times, new_state_values, new_exogenous_regressors), predictions={ "mean": tf.reshape( tf.compat.v1.transpose(mean_ta.stack(), [1, 0, 2, 3]), prediction_shape), "covariance": tf.reshape( tf.compat.v1.transpose(covariance_ta.stack(), [1, 0, 2, 3]), prediction_shape), "observed": values[:, -output_size:] }, prediction_times=times[:, -output_size:]) else: raise ValueError( "Unknown mode '{}' passed to get_batch_loss.".format(mode)) def _compute_time_features(self, time): """Compute some features on the time value.""" batch_size = tf.compat.v1.shape(time)[0] num_periods = len(self._periodicities) # Reshape to 3D. periods = tf.constant( self._periodicities, shape=[1, 1, num_periods, 1], dtype=time.dtype) time = tf.reshape(time, [batch_size, -1, 1, 1]) window_offset = time / self._periodicities # Cast to appropriate type and scale to [0, 1) range mod = ( tf.cast(time % periods, self.dtype) * self._buckets / tf.cast(periods, self.dtype)) # Bucketize based on some fixed width intervals. For a value t and interval # [a, b), we return (t - a) if a <= t < b, else 0. intervals = tf.reshape( tf.range(self._buckets, dtype=self.dtype), [1, 1, 1, self._buckets]) mod = tf.nn.relu(mod - intervals) mod = tf.where(mod < 1.0, mod, tf.compat.v1.zeros_like(mod)) return window_offset, mod ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/ar_model_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for ar_model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import functools import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator.canned.timeseries import ar_model from tensorflow_estimator.python.estimator.canned.timeseries.estimators import LSTMAutoRegressor from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import PredictionFeatures from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import TrainEvalFeatures @test_util.run_v1_only("Currently incompatible with ResourceVariable") class ARModelTest(tf.test.TestCase): def test_wrong_window_size(self): estimator = LSTMAutoRegressor( periodicities=10, input_window_size=10, output_window_size=6, num_features=1) def _bad_window_size_input_fn(): return ({ TrainEvalFeatures.TIMES: [[1]], TrainEvalFeatures.VALUES: [[[1.]]] }, None) def _good_data(): return ({ TrainEvalFeatures.TIMES: tf.range(16)[None, :], TrainEvalFeatures.VALUES: tf.reshape(tf.range(16), [1, 16, 1]) }, None) with self.assertRaisesRegexp(ValueError, "set window_size=16"): estimator.train(input_fn=_bad_window_size_input_fn, steps=1) # Get a checkpoint for evaluation estimator.train(input_fn=_good_data, steps=1) with self.assertRaisesRegexp(ValueError, "requires a window of at least"): estimator.evaluate(input_fn=_bad_window_size_input_fn, steps=1) def test_predictions_direct_lstm(self): model = ar_model.ARModel( periodicities=2, num_features=1, num_time_buckets=10, input_window_size=2, output_window_size=2, prediction_model_factory=functools.partial( ar_model.LSTMPredictionModel, num_units=16)) with tf.compat.v1.Session(): predicted_values = model.predict({ PredictionFeatures.TIMES: [[4, 6, 10]], PredictionFeatures.STATE_TUPLE: ([[1, 2]], [[[1.], [2.]]], [[[], []]]) }) tf.compat.v1.initializers.global_variables().run() self.assertAllEqual(predicted_values["mean"].eval().shape, [1, 3, 1]) def test_long_eval(self): model = ar_model.ARModel( periodicities=2, num_features=1, num_time_buckets=10, input_window_size=2, output_window_size=1) raw_features = { TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]], TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]] } model.initialize_graph() with tf.compat.v1.variable_scope("armodel"): raw_evaluation = model.define_loss( raw_features, mode=estimator_lib.ModeKeys.EVAL) with tf.compat.v1.Session() as sess: tf.compat.v1.initializers.global_variables().run() raw_evaluation_evaled = sess.run(raw_evaluation) self.assertAllEqual([[5, 7, 11]], raw_evaluation_evaled.prediction_times) for feature_name in raw_evaluation.predictions: self.assertAllEqual( [1, 3, 1], # batch, window, num_features. The window size has 2 # cut off for the first input_window. raw_evaluation_evaled.predictions[feature_name].shape) def test_long_eval_discard_indivisible(self): model = ar_model.ARModel( periodicities=2, num_features=1, num_time_buckets=10, input_window_size=2, output_window_size=2) raw_features = { TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]], TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]] } model.initialize_graph() raw_evaluation = model.define_loss( raw_features, mode=estimator_lib.ModeKeys.EVAL) with tf.compat.v1.Session() as sess: tf.compat.v1.initializers.global_variables().run() raw_evaluation_evaled = sess.run(raw_evaluation) self.assertAllEqual([[7, 11]], raw_evaluation_evaled.prediction_times) for feature_name in raw_evaluation.predictions: self.assertAllEqual( [1, 2, 1], # batch, window, num_features. The window has two cut # off for the first input window and one discarded so # that the remainder is divisible into output windows. raw_evaluation_evaled.predictions[feature_name].shape) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/ar_model_training_test.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for training ar_model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator.canned.timeseries import ar_model from tensorflow_estimator.python.estimator.canned.timeseries.estimators import LSTMAutoRegressor from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import PredictionFeatures from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import TrainEvalFeatures class InputFnBuilder(object): def __init__(self, noise_stddev, periods, window_size, batch_size, num_samples=200): self.window_size = window_size self.batch_size = batch_size split = int(num_samples * 0.8) self.initialize_data = lambda: self.initialize_data_with_properties( noise_stddev, periods, num_samples, split) def initialize_data_with_properties(self, noise_stddev, periods, num_samples, split): time = 1 + 3 * tf.range(num_samples, dtype=tf.dtypes.int64) time_offset = 2 * math.pi * tf.cast(time % periods[0], tf.dtypes.float32) / periods[0] time_offset = time_offset[:, None] if len(periods) > 1: time_offset2 = tf.cast(time % periods[1], tf.dtypes.float32) / periods[1] time_offset2 = time_offset2[:, None] data1 = tf.math.sin(time_offset / 2.0)**2 * (1 + time_offset2) else: data1 = tf.math.sin(2 * time_offset) + tf.math.cos(3 * time_offset) data1_noise = \ noise_stddev / 4. * tf.random.normal([num_samples], 1)[:, None] data1 = tf.math.add(data1, data1_noise) data2 = tf.math.sin(3 * time_offset) + tf.math.cos(5 * time_offset) data2_noise = \ noise_stddev / 3. * tf.random.normal([num_samples], 1)[:, None] data2 = tf.math.add(data2, data2_noise) data = tf.concat((4 * data1, 3 * data2), 1) self.train_data, self.test_data = data[0:split], data[split:] self.train_time, self.test_time = time[0:split], time[split:] def train_or_test_input_fn(self, time, data): def map_to_dict(time, data): return {TrainEvalFeatures.TIMES: time, TrainEvalFeatures.VALUES: data} def batch_windows(time, data): return tf.compat.v1.data.Dataset.zip((time, data)).batch( self.window_size, drop_remainder=True) dataset = tf.compat.v1.data.Dataset.from_tensor_slices((time, data)) dataset = dataset.window(self.window_size, shift=1, drop_remainder=True) dataset = dataset.shuffle(1000, seed=2).repeat() dataset = dataset.flat_map(batch_windows).batch( self.batch_size).map(map_to_dict) return dataset def train_input_fn(self): self.initialize_data() return self.train_or_test_input_fn(self.train_time, self.train_data) def test_input_fn(self): self.initialize_data() return self.train_or_test_input_fn(self.test_time, self.test_data) def prediction_input_fn(self): def map_to_dict(predict_times, predict_true_values, state_times, state_values, state_exogenous): return ({ PredictionFeatures.TIMES: predict_times[None, :], TrainEvalFeatures.VALUES: predict_true_values[None, :], PredictionFeatures.STATE_TUPLE: (state_times[None, :], state_values[None, :], state_exogenous[None, :]) }, {}) self.initialize_data() predict_times = tf.concat( [self.train_time[self.window_size:], self.test_time], 0)[None, :] predict_true_values = tf.concat( [self.train_data[self.window_size:], self.test_data], 0)[None, :] state_times = tf.cast(self.train_time[:self.window_size][None, :], tf.dtypes.float32) state_values = tf.cast(self.train_data[:self.window_size, :][None, :], tf.dtypes.float32) state_exogenous = state_times[:, :, None][:, :, :0] dataset = tf.compat.v1.data.Dataset.from_tensor_slices( (predict_times, predict_true_values, state_times, state_values, state_exogenous)) dataset = dataset.map(map_to_dict) return dataset def true_values(self): self.initialize_data() predict_true_values = tf.concat( [self.train_data[self.window_size:], self.test_data], 0)[None, :] true_values = predict_true_values[0, :, 0] return true_values @test_util.run_v1_only("Currently incompatible with ResourceVariable") class ARModelTrainingTest(tf.test.TestCase): def train_helper(self, input_window_size, loss, max_loss=None, periods=(25,)): data_noise_stddev = 0.2 if max_loss is None: if loss == ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS: max_loss = 1.0 else: max_loss = 0.05 / (data_noise_stddev**2) output_window_size = 10 window_size = input_window_size + output_window_size input_fn_builder = InputFnBuilder( noise_stddev=data_noise_stddev, periods=periods, window_size=window_size, batch_size=64) class _RunConfig(estimator_lib.RunConfig): @property def tf_random_seed(self): return 3 estimator = LSTMAutoRegressor( periodicities=periods, input_window_size=input_window_size, output_window_size=output_window_size, num_features=2, num_timesteps=20, num_units=16, loss=loss, config=_RunConfig()) # Test training # Note that most models will require many more steps to fully converge. We # have used a small number of steps here to keep the running time small. estimator.train(input_fn=input_fn_builder.train_input_fn, steps=75) test_evaluation = estimator.evaluate( input_fn=input_fn_builder.test_input_fn, steps=1) test_loss = test_evaluation["loss"] tf.compat.v1.logging.warn("Final test loss: %f", test_loss) self.assertLess(test_loss, max_loss) if loss == ar_model.ARModel.SQUARED_LOSS: # Test that the evaluation loss is reported without input scaling. self.assertAllClose( test_loss, tf.math.reduce_mean( (test_evaluation["mean"] - test_evaluation["observed"])**2)) # Test predict (predictions,) = tuple( estimator.predict(input_fn=input_fn_builder.prediction_input_fn)) predicted_mean = predictions["mean"][:, 0] if loss == ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS: variances = predictions["covariance"][:, 0] standard_deviations = tf.math.sqrt(variances) # Note that we may get tighter bounds with more training steps. true_values = input_fn_builder.true_values() errors = tf.math.abs(predicted_mean - true_values) > 4 * standard_deviations fraction_errors = tf.math.reduce_mean(tf.cast(errors, tf.dtypes.float32)) tf.compat.v1.logging.warn("Fraction errors: %f", self.evaluate(fraction_errors)) def test_autoregression_squared(self): self.train_helper(input_window_size=15, loss=ar_model.ARModel.SQUARED_LOSS) def test_autoregression_short_input_window(self): self.train_helper(input_window_size=8, loss=ar_model.ARModel.SQUARED_LOSS) def test_autoregression_normal(self): self.train_helper( input_window_size=10, loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, max_loss=50.) # Just make sure there are no exceptions. def test_autoregression_normal_multiple_periods(self): self.train_helper( input_window_size=10, loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, max_loss=2.0, periods=(25, 55)) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/estimators.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Estimators for time series models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import functools import tensorflow as tf from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator.canned import optimizers from tensorflow_estimator.python.estimator.canned.timeseries import ar_model from tensorflow_estimator.python.estimator.canned.timeseries import feature_keys from tensorflow_estimator.python.estimator.canned.timeseries import head as ts_head_lib from tensorflow_estimator.python.estimator.canned.timeseries import math_utils from tensorflow_estimator.python.estimator.canned.timeseries import state_management from tensorflow_estimator.python.estimator.export import export_lib class TimeSeriesRegressor(estimator_lib.Estimator): """An Estimator to fit and evaluate a time series model.""" def __init__(self, model, state_manager=None, optimizer=None, model_dir=None, config=None, head_type=ts_head_lib.TimeSeriesRegressionHead): """Initialize the Estimator. Args: model: The time series model to wrap (inheriting from TimeSeriesModel). state_manager: The state manager to use, or (by default) PassthroughStateManager if none is needed. optimizer: The optimization algorithm to use when training, inheriting from tf.train.Optimizer. Defaults to Adam with step size 0.02. model_dir: See `Estimator`. config: See `Estimator`. head_type: The kind of head to use for the model (inheriting from `TimeSeriesRegressionHead`). """ input_statistics_generator = math_utils.InputStatisticsFromMiniBatch( dtype=model.dtype, num_features=model.num_features) if state_manager is None: if isinstance(model, ar_model.ARModel): state_manager = state_management.FilteringOnlyStateManager() else: state_manager = state_management.PassthroughStateManager() if optimizer is None: optimizer = tf.compat.v1.train.AdamOptimizer(0.02) self._model = model ts_regression_head = head_type( model=model, state_manager=state_manager, optimizer=optimizer, input_statistics_generator=input_statistics_generator) model_fn = ts_regression_head.create_estimator_spec super(TimeSeriesRegressor, self).__init__( model_fn=model_fn, model_dir=model_dir, config=config) def _model_start_state_placeholders(self, batch_size_tensor, static_batch_size=None): """Creates placeholders with zeroed start state for the current model.""" gathered_state = {} # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with tf.Graph().as_default(): self._model.initialize_graph() # Evaluate the initial state as same-dtype "zero" values. These zero # constants aren't used, but are necessary for feeding to # placeholder_with_default for the "cold start" case where state is not # fed to the model. def _zeros_like_constant(tensor): return tf.get_static_value(tf.compat.v1.zeros_like(tensor)) start_state = tf.nest.map_structure(_zeros_like_constant, self._model.get_start_state()) for prefixed_state_name, state in ts_head_lib.state_to_dictionary( start_state).items(): state_shape_with_batch = tf.TensorShape( (static_batch_size,)).concatenate(state.shape) default_state_broadcast = tf.tile( state[None, ...], multiples=tf.concat( [batch_size_tensor[None], tf.ones(len(state.shape), dtype=tf.dtypes.int32)], axis=0)) gathered_state[ prefixed_state_name] = tf.compat.v1.placeholder_with_default( input=default_state_broadcast, name=prefixed_state_name, shape=state_shape_with_batch) return gathered_state def build_one_shot_parsing_serving_input_receiver_fn(self, filtering_length, prediction_length, default_batch_size=None, values_input_dtype=None, truncate_values=False): """Build an input_receiver_fn for export_saved_model accepting tf.Examples. Only compatible with `OneShotPredictionHead` (see `head`). Args: filtering_length: The number of time steps used as input to the model, for which values are provided. If more than `filtering_length` values are provided (via `truncate_values`), only the first `filtering_length` values are used. prediction_length: The number of time steps requested as predictions from the model. Times and all exogenous features must be provided for these steps. default_batch_size: If specified, must be a scalar integer. Sets the batch size in the static shape information of all feature Tensors, which means only this batch size will be accepted by the exported model. If None (default), static shape information for batch sizes is omitted. values_input_dtype: An optional dtype specification for values in the tf.Example protos (either float32 or int64, since these are the numeric types supported by tf.Example). After parsing, values are cast to the model's dtype (float32 or float64). truncate_values: If True, expects `filtering_length + prediction_length` values to be provided, but only uses the first `filtering_length`. If False (default), exactly `filtering_length` values must be provided. Returns: An input_receiver_fn which may be passed to the Estimator's export_saved_model. Expects features contained in a vector of serialized tf.Examples with shape [batch size] (dtype `tf.string`), each tf.Example containing features with the following shapes: times: [filtering_length + prediction_length] integer values: [filtering_length, num features] floating point. If `truncate_values` is True, expects `filtering_length + prediction_length` values but only uses the first `filtering_length`. all exogenous features: [filtering_length + prediction_length, ...] (various dtypes) """ if values_input_dtype is None: values_input_dtype = tf.dtypes.float32 if truncate_values: values_proto_length = filtering_length + prediction_length else: values_proto_length = filtering_length def _serving_input_receiver_fn(): """A receiver function to be passed to export_saved_model.""" times_column = tf.feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=tf.dtypes.int64) values_column = tf.feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features,)) parsed_features_no_sequence = ( tf.compat.v1.feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, tf.io.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length,) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length,) + feature_spec.shape)) elif feature_spec.dtype == tf.dtypes.string: parsed_features[key] = tf.io.FixedLenFeature( shape=(filtering_length + prediction_length,), dtype=tf.dtypes.string) else: # VarLenFeature raise ValueError("VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = tf.compat.v1.placeholder( shape=[default_batch_size], dtype=tf.dtypes.string, name="input") features = tf.compat.v1.io.parse_example( serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = tf.compat.v1.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = tf.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=tf.compat.v1.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(features, {"examples": tfexamples}) return _serving_input_receiver_fn def build_raw_serving_input_receiver_fn(self, default_batch_size=None, default_series_length=None): """Build an input_receiver_fn for export_saved_model which accepts arrays. Automatically creates placeholders for exogenous `FeatureColumn`s passed to the model. Args: default_batch_size: If specified, must be a scalar integer. Sets the batch size in the static shape information of all feature Tensors, which means only this batch size will be accepted by the exported model. If None (default), static shape information for batch sizes is omitted. default_series_length: If specified, must be a scalar integer. Sets the series length in the static shape information of all feature Tensors, which means only this series length will be accepted by the exported model. If None (default), static shape information for series length is omitted. Returns: An input_receiver_fn which may be passed to the Estimator's export_saved_model. """ def _serving_input_receiver_fn(): """A receiver function to be passed to export_saved_model.""" placeholders = {} time_placeholder = tf.compat.v1.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=tf.dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( tf.compat.v1.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=tf.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with tf.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( tf.compat.v1.feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = tf.compat.v1.io.parse_example( serialized=tf.compat.v1.placeholder( shape=[None], dtype=tf.dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items() } for feature_key, (batch_only_feature_shape, value_dtype) in (exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = tf.compat.v1.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) batch_size_tensor = tf.compat.v1.shape(time_placeholder)[0] placeholders.update( self._model_start_state_placeholders( batch_size_tensor, static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(placeholders, placeholders) return _serving_input_receiver_fn # TODO(b/113684821): Add detailed documentation on what the input_fn should do. # Add an example of making and returning a Dataset object. Determine if # endogenous features can be passed in as FeatureColumns. Move ARModel's loss # functions into a more general location. class LSTMAutoRegressor(TimeSeriesRegressor): """An Estimator for an LSTM autoregressive model. LSTMAutoRegressor is a window-based model, inputting fixed windows of length `input_window_size` and outputting fixed windows of length `output_window_size`. These two parameters must add up to the window_size of data returned by the `input_fn`. Each periodicity in the `periodicities` arg is divided by the `num_timesteps` into timesteps that are represented as time features added to the model. A good heuristic for picking an appropriate periodicity for a given data set would be the length of cycles in the data. For example, energy usage in a home is typically cyclic each day. If the time feature in a home energy usage dataset is in the unit of hours, then 24 would be an appropriate periodicity. Similarly, a good heuristic for `num_timesteps` is how often the data is expected to change within the cycle. For the aforementioned home energy usage dataset and periodicity of 24, then 48 would be a reasonable value if usage is expected to change every half hour. Each feature's value for a given example with time t is the difference between t and the start of the timestep it falls under. If it doesn't fall under a feature's associated timestep, then that feature's value is zero. For example: if `periodicities` = (9, 12) and `num_timesteps` = 3, then 6 features would be added to the model, 3 for periodicity 9 and 3 for periodicity 12. For an example data point where t = 17: - It's in the 3rd timestep for periodicity 9 (2nd period is 9-18 and 3rd timestep is 15-18) - It's in the 2nd timestep for periodicity 12 (2nd period is 12-24 and 2nd timestep is between 16-20). Therefore the 6 added features for this row with t = 17 would be: # Feature name (periodicity#_timestep#), feature value P9_T1, 0 # not in first timestep P9_T2, 0 # not in second timestep P9_T3, 2 # 17 - 15 since 15 is the start of the 3rd timestep P12_T1, 0 # not in first timestep P12_T2, 1 # 17 - 16 since 16 is the start of the 2nd timestep P12_T3, 0 # not in third timestep Example Code: ```python extra_feature_columns = ( feature_column.numeric_column("exogenous_variable"), ) estimator = LSTMAutoRegressor( periodicities=10, input_window_size=10, output_window_size=5, model_dir="/path/to/model/dir", num_features=1, extra_feature_columns=extra_feature_columns, num_timesteps=50, num_units=10, optimizer=tf.train.ProximalAdagradOptimizer(...)) # Input builders def input_fn_train(): return { "times": tf.range(15)[None, :], "values": tf.random_normal(shape=[1, 15, 1]) } estimator.train(input_fn=input_fn_train, steps=100) def input_fn_eval(): pass metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) def input_fn_predict(): pass predictions = estimator.predict(input_fn=input_fn_predict) ``` """ def __init__(self, periodicities, input_window_size, output_window_size, model_dir=None, num_features=1, extra_feature_columns=None, num_timesteps=10, loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, num_units=128, optimizer="Adam", config=None): """Initialize the Estimator. Args: periodicities: periodicities of the input data, in the same units as the time feature (for example 24 if feeding hourly data with a daily periodicity, or 60 * 24 if feeding minute-level data with daily periodicity). Note this can be a single value or a list of values for multiple periodicities. input_window_size: Number of past time steps of data to look at when doing the regression. output_window_size: Number of future time steps to predict. Note that setting this value to > 1 empirically seems to give a better fit. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. num_features: The dimensionality of the time series (default value is one for univariate, more than one for multivariate). extra_feature_columns: A list of `tf.feature_column`s (for example `tf.feature_column.embedding_column`) corresponding to features which provide extra information to the model but are not part of the series to be predicted. num_timesteps: Number of buckets into which to divide (time % periodicity). This value multiplied by the number of periodicities is the number of time features added to the model. loss: Loss function to use for training. Currently supported values are SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For SQUARED_LOSS, the evaluation loss is reported based on un-scaled observations and predictions, while the training loss is computed on normalized data. num_units: The size of the hidden state in the encoder and decoder LSTM cells. optimizer: string, `tf.train.Optimizer` object, or callable that defines the optimizer algorithm to use for training. Defaults to the Adam optimizer with a learning rate of 0.01. config: Optional `estimator.RunConfig` object to configure the runtime settings. """ optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=0.01) model = ar_model.ARModel( periodicities=periodicities, input_window_size=input_window_size, output_window_size=output_window_size, num_features=num_features, exogenous_feature_columns=extra_feature_columns, num_time_buckets=num_timesteps, loss=loss, prediction_model_factory=functools.partial( ar_model.LSTMPredictionModel, num_units=num_units)) state_manager = state_management.FilteringOnlyStateManager() super(LSTMAutoRegressor, self).__init__( model=model, state_manager=state_manager, optimizer=optimizer, model_dir=model_dir, config=config, head_type=ts_head_lib.OneShotPredictionHead) ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/estimators_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import functools import tempfile import six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator.canned.timeseries import ar_model from tensorflow_estimator.python.estimator.canned.timeseries import estimators from tensorflow_estimator.python.estimator.canned.timeseries import feature_keys from tensorflow_estimator.python.estimator.canned.timeseries import saved_model_utils class _SeedRunConfig(estimator_lib.RunConfig): @property def tf_random_seed(self): return 3 def _generate_data(): time = tf.range(20, dtype=tf.dtypes.int64) data = tf.reshape(tf.range(20, dtype=tf.dtypes.float32), (20, 1)) exogenous = data return time, data, exogenous def _build_input_fn_with_seed(seed): def map_to_dict(time, data, exogenous): return { feature_keys.TrainEvalFeatures.TIMES: time, feature_keys.TrainEvalFeatures.VALUES: data, "exogenous": exogenous } def batch_windows(time, data, exogenous): return tf.compat.v1.data.Dataset.zip((time, data, exogenous)).batch( 16, drop_remainder=True) def input_fn(): dataset = tf.compat.v1.data.Dataset.from_tensor_slices(_generate_data()) dataset = dataset.window(16, shift=1, drop_remainder=True) dataset = dataset.shuffle(1000, seed=seed).repeat() dataset = dataset.flat_map(batch_windows).batch(16).map(map_to_dict) return dataset return input_fn @test_util.run_v1_only("Currently incompatible with ResourceVariable") class TimeSeriesRegressorTest(tf.test.TestCase): def _fit_restore_fit_test_template(self, estimator_fn, test_saved_model): """Tests restoring previously fit models.""" temp_dir = self.get_temp_dir() model_dir = tempfile.mkdtemp(dir=temp_dir) exogenous_feature_columns = (tf.feature_column.numeric_column("exogenous"),) first_estimator = estimator_fn(model_dir, exogenous_feature_columns) train_input_fn = _build_input_fn_with_seed(2) eval_input_fn = _build_input_fn_with_seed(3) first_estimator.train(input_fn=train_input_fn, steps=1) first_evaluation = first_estimator.evaluate(input_fn=eval_input_fn, steps=1) first_loss_before_fit = first_evaluation["loss"] self.assertAllEqual(first_loss_before_fit, first_evaluation["average_loss"]) self.assertAllEqual([], first_loss_before_fit.shape) first_estimator.train(input_fn=train_input_fn, steps=1) first_loss_after_fit = first_estimator.evaluate( input_fn=eval_input_fn, steps=1)["loss"] self.assertAllEqual([], first_loss_after_fit.shape) second_estimator = estimator_fn(model_dir, exogenous_feature_columns) second_estimator.train(input_fn=train_input_fn, steps=1) second_evaluation = second_estimator.evaluate( input_fn=eval_input_fn, steps=1) exogenous_values_ten_steps = { "exogenous": tf.range(10, dtype=tf.dtypes.float32)[None, :, None] } input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn() export_location = first_estimator.export_saved_model( temp_dir, input_receiver_fn) if not test_saved_model: return with tf.Graph().as_default(): with tf.compat.v1.Session() as sess: signatures = tf.compat.v1.saved_model.load(sess, [tf.saved_model.SERVING], export_location) # Test that prediction and filtering can continue from evaluation output _ = saved_model_utils.predict_continuation( continue_from=second_evaluation, steps=10, exogenous_features=exogenous_values_ten_steps, signatures=signatures, session=sess) times, values, _ = _generate_data() first_filtering = saved_model_utils.filter_continuation( continue_from=second_evaluation, features={ feature_keys.FilteringFeatures.TIMES: times[None, -1] + 2, feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2., "exogenous": values[None, -1, None] + 12. }, signatures=signatures, session=sess) # Test that prediction and filtering can continue from filtering output second_saved_prediction = saved_model_utils.predict_continuation( continue_from=first_filtering, steps=1, exogenous_features={ "exogenous": tf.range(1, dtype=tf.dtypes.float32)[None, :, None] }, signatures=signatures, session=sess) self.assertEqual( times[-1] + 3, tf.compat.v1.squeeze( second_saved_prediction[feature_keys.PredictionResults.TIMES])) saved_model_utils.filter_continuation( continue_from=first_filtering, features={ feature_keys.FilteringFeatures.TIMES: times[-1] + 3, feature_keys.FilteringFeatures.VALUES: values[-1] + 3., "exogenous": values[-1, None] + 13. }, signatures=signatures, session=sess) # Test cold starting six.assertCountEqual( self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "exogenous" ], signatures.signature_def[ feature_keys.SavedModelLabels.COLD_START_FILTER].inputs.keys()) batched_times = tf.tile( tf.range(30, dtype=tf.dtypes.int64)[None, :], (10, 1)) batched_values = tf.ones([10, 30, 1]) state = saved_model_utils.cold_start_filter( signatures=signatures, session=sess, features={ feature_keys.FilteringFeatures.TIMES: batched_times, feature_keys.FilteringFeatures.VALUES: batched_values, "exogenous": 10. + batched_values }) predict_times = math_ops.tile( tf.range(30, 45, dtype=tf.dtypes.int64)[None, :], (10, 1)) predictions = saved_model_utils.predict_continuation( continue_from=state, times=predict_times, exogenous_features={ "exogenous": math_ops.tile(tf.range(15, dtype=tf.dtypes.float32), (10,)) [None, :, None] }, signatures=signatures, session=sess) self.assertAllEqual([10, 15, 1], predictions["mean"].shape) def disabled_test_time_series_regressor(self): def _estimator_fn(model_dir, exogenous_feature_columns): return estimators.TimeSeriesRegressor( model=ar_model.ARModel( periodicities=10, input_window_size=10, output_window_size=6, num_features=1, exogenous_feature_columns=exogenous_feature_columns, prediction_model_factory=functools.partial( ar_model.LSTMPredictionModel, num_units=10)), config=_SeedRunConfig(), model_dir=model_dir) self._fit_restore_fit_test_template(_estimator_fn, test_saved_model=True) def test_ar_lstm_regressor(self): def _estimator_fn(model_dir, exogenous_feature_columns): return estimators.LSTMAutoRegressor( periodicities=10, input_window_size=10, output_window_size=6, model_dir=model_dir, num_features=1, extra_feature_columns=exogenous_feature_columns, num_units=10, config=_SeedRunConfig()) # LSTMAutoRegressor uses OneShotPredictionHead which does not work with # saved models. self._fit_restore_fit_test_template(_estimator_fn, test_saved_model=False) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/feature_keys.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Commonly used special feature names for time series models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf class State(object): """Key formats for accepting/returning state.""" # The model-dependent state to start from, as a single tuple. STATE_TUPLE = "start_tuple" # Same meaning as STATE_TUPLE, but prefixes keys representing flattened model # state rather than mapping to a nested tuple containing model state, # primarily for use with export_saved_model. STATE_PREFIX = "model_state" class Times(object): """Key formats for accepting/returning times.""" # An increasing vector of integers. TIMES = "times" class Values(object): """Key formats for accepting/returning values.""" # Floating point, with one or more values corresponding to each time in TIMES. VALUES = "values" class TrainEvalFeatures(Times, Values): """Feature names used during training and evaluation.""" pass class PredictionFeatures(Times, State): """Feature names used during prediction.""" pass class FilteringFeatures(Times, Values, State): """Special feature names for filtering.""" pass class PredictionResults(Times): """Keys returned when predicting (not comprehensive).""" pass class FilteringResults(Times, State): """Keys returned from evaluation/filtering.""" pass class SavedModelLabels(object): """Names of signatures exported with export_saved_model.""" PREDICT = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY FILTER = "filter" COLD_START_FILTER = "cold_start_filter" ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/head.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Timeseries head.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import re import tensorflow as tf from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned.timeseries import feature_keys from tensorflow_estimator.python.estimator.export import export_lib class _NoStatePredictOutput(export_lib.PredictOutput): def as_signature_def(self, receiver_tensors): no_state_receiver_tensors = { key: value for key, value in receiver_tensors.items() if not key.startswith(feature_keys.State.STATE_PREFIX) } return super( _NoStatePredictOutput, self).as_signature_def(receiver_tensors=no_state_receiver_tensors) class TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-access """Determines input and output signatures for a time series model.""" def __init__(self, model, state_manager, optimizer, input_statistics_generator=None, name=None): """Creates a `_Head` for time series regression. Args: model: A model for time series regression. state_manager: A state manager. optimizer: An optimizer. input_statistics_generator: A input statistics generator. name: An optional name for the model. """ self.model = model self.state_manager = state_manager self.optimizer = optimizer self.input_statistics_generator = input_statistics_generator self._name = name @property def name(self): return self._name # TODO(terrytangyuan): consolidate `model_outputs` and `_Head.LossSpec` # once `_Head.create_loss` becomes extendable def create_loss(self, features, mode, logits=None, labels=None): """See `_Head`.""" model_outputs = self.state_manager.define_loss(self.model, features, mode) tf.compat.v1.summary.scalar( head_lib._summary_key(self._name, metric_keys.MetricKeys.LOSS), model_outputs.loss) return model_outputs @property def logits_dimension(self): """See `_Head`.""" return 1 def _train_ops(self, features): """Add training ops to the graph.""" mode = estimator_lib.ModeKeys.TRAIN with tf.compat.v1.variable_scope( "model", # Use ResourceVariables to avoid race conditions. use_resource=True): model_outputs = self.create_loss(features, mode) train_op = self.optimizer.minimize( model_outputs.loss, global_step=tf.compat.v1.train.get_global_step()) return estimator_lib.EstimatorSpec( loss=model_outputs.loss, mode=mode, train_op=train_op) def _evaluate_ops(self, features): """Add ops for evaluation (aka filtering) to the graph.""" mode = estimator_lib.ModeKeys.EVAL with tf.compat.v1.variable_scope("model", use_resource=True): model_outputs = self.create_loss(features, mode) metrics = {} # Just output in-sample predictions for the last chunk seen for prediction_key, prediction_value in model_outputs.predictions.items(): metrics[prediction_key] = _identity_metric_single(prediction_key, prediction_value) metrics[feature_keys.FilteringResults.TIMES] = _identity_metric_single( feature_keys.FilteringResults.TIMES, model_outputs.prediction_times) metrics[feature_keys.FilteringResults.STATE_TUPLE] = ( _identity_metric_nested(feature_keys.FilteringResults.STATE_TUPLE, model_outputs.end_state)) metrics[metric_keys.MetricKeys.LOSS_MEAN] = tf.compat.v1.metrics.mean( model_outputs.loss, name="average_loss") return estimator_lib.EstimatorSpec( loss=model_outputs.loss, mode=mode, eval_metric_ops=metrics, # needed for custom metrics. predictions=model_outputs.predictions) def _predict_ops(self, features): """Add ops for prediction to the graph.""" with tf.compat.v1.variable_scope("model", use_resource=True): prediction = self.model.predict(features=features) prediction[feature_keys.PredictionResults.TIMES] = features[ feature_keys.PredictionFeatures.TIMES] return estimator_lib.EstimatorSpec( predictions=prediction, mode=estimator_lib.ModeKeys.PREDICT) def _serving_ops(self, features): """Add ops for serving to the graph.""" with tf.compat.v1.variable_scope("model", use_resource=True): prediction_outputs = self.model.predict(features=features) with tf.compat.v1.variable_scope("model", reuse=True): filtering_outputs = self.create_loss(features, estimator_lib.ModeKeys.EVAL) with tf.compat.v1.variable_scope("model", reuse=True): no_state_features = { k: v for k, v in features.items() if not k.startswith(feature_keys.State.STATE_PREFIX) } # Ignore any state management when cold-starting. The model's default # start state is replicated across the batch. cold_filtering_outputs = self.model.define_loss( features=no_state_features, mode=estimator_lib.ModeKeys.EVAL) return estimator_lib.EstimatorSpec( mode=estimator_lib.ModeKeys.PREDICT, export_outputs={ feature_keys.SavedModelLabels.PREDICT: export_lib.PredictOutput(prediction_outputs), feature_keys.SavedModelLabels.FILTER: export_lib.PredictOutput( state_to_dictionary(filtering_outputs.end_state)), feature_keys.SavedModelLabels.COLD_START_FILTER: _NoStatePredictOutput( state_to_dictionary(cold_filtering_outputs.end_state)) }, # Likely unused, but it is necessary to return `predictions` to satisfy # the Estimator's error checking. predictions={}) def _convert_feature_to_tensor(self, name, value): """Casts features to the correct dtype based on their name.""" if name in [ feature_keys.TrainEvalFeatures.TIMES, feature_keys.PredictionFeatures.TIMES ]: return tf.cast(value, tf.dtypes.int64) if name == feature_keys.TrainEvalFeatures.VALUES: return tf.cast(value, self.model.dtype) if name == feature_keys.PredictionFeatures.STATE_TUPLE: return value # Correct dtypes are model-dependent return tf.compat.v1.convert_to_tensor_or_sparse_tensor(value) def _gather_state(self, features): """Returns `features` with state packed, indicates if packing was done.""" prefixed_state_re = re.compile(r"^" + feature_keys.State.STATE_PREFIX + r"_(\d+)$") numbered_state = [] for key, tensor in features.items(): search_result = prefixed_state_re.search(key) if search_result: numbered_state.append((int(search_result.group(1)), key, tensor)) if not numbered_state: return features, False features = features.copy() for _, key, _ in numbered_state: del features[key] numbered_state.sort(key=lambda number, *_: number) features[feature_keys.State.STATE_TUPLE] = tf.nest.pack_sequence_as( structure=self.model.get_start_state(), flat_sequence=[tensor for _, _, tensor in numbered_state]) return features, True def _check_predict_features(self, features): """Raises errors if features are not suitable for prediction.""" if feature_keys.PredictionFeatures.TIMES not in features: raise ValueError("Expected a '{}' feature for prediction.".format( feature_keys.PredictionFeatures.TIMES)) if feature_keys.PredictionFeatures.STATE_TUPLE not in features: raise ValueError("Expected a '{}' feature for prediction.".format( feature_keys.PredictionFeatures.STATE_TUPLE)) times_feature = features[feature_keys.PredictionFeatures.TIMES] if not times_feature.get_shape().is_compatible_with([None, None]): raise ValueError( ("Expected shape (batch dimension, window size) for feature '{}' " "(got shape {})").format(feature_keys.PredictionFeatures.TIMES, times_feature.get_shape())) _check_feature_shapes_compatible_with( features=features, compatible_with_name=feature_keys.PredictionFeatures.TIMES, compatible_with_value=times_feature, ignore=set([ # Model-dependent shapes feature_keys.PredictionFeatures.STATE_TUPLE ])) def create_estimator_spec(self, features, mode, labels=None): """Performs basic error checking and returns an EstimatorSpec.""" with ops.name_scope(self._name, "head"): # for better error messages. if labels is not None and not (isinstance(labels, dict) and labels == {}): # pylint: disable=g-explicit-bool-comparison raise ValueError( "The model received a `labels`, which is not supported. " "Pass '{}' and '{}' as features.".format( feature_keys.TrainEvalFeatures.TIMES, feature_keys.TrainEvalFeatures.VALUES)) del labels features = { name: self._convert_feature_to_tensor(name=name, value=value) for name, value in features.items() } if self.input_statistics_generator is not None: input_statistics = self.input_statistics_generator.initialize_graph( features, update_statistics=(mode == estimator_lib.ModeKeys.TRAIN)) else: input_statistics = None self.model.initialize_graph(input_statistics=input_statistics) # _gather_state requires the model to have its graph initialized (so it # has access to the structure of the model's state) features, passed_flat_state = self._gather_state(features) if (mode == estimator_lib.ModeKeys.TRAIN or mode == estimator_lib.ModeKeys.EVAL): _check_train_eval_features(features, self.model) elif mode == estimator_lib.ModeKeys.PREDICT: self._check_predict_features(features) else: raise ValueError("Unknown mode '{}' passed to model_fn.".format(mode)) self.state_manager.initialize_graph( model=self.model, input_statistics=input_statistics) if mode == estimator_lib.ModeKeys.TRAIN: return self._train_ops(features) elif mode == estimator_lib.ModeKeys.EVAL: return self._evaluate_ops(features) elif mode == estimator_lib.ModeKeys.PREDICT and not passed_flat_state: return self._predict_ops(features) elif mode == estimator_lib.ModeKeys.PREDICT and passed_flat_state: # The mode is PREDICT, but we're actually in export_saved_model for # serving. We want to return two graphs: one for filtering (state + data # -> state) and one for predicting (state -> prediction). return self._serving_ops(features) class OneShotPredictionHead(TimeSeriesRegressionHead): """A time series head which exports a single stateless serving signature. The serving default signature exported by this head expects `times`, `values`, and any exogenous features, but no state. `values` has shape `[batch_size, filter_length, num_features]` and `times` has shape `[batch_size, total_length]`, where `total_length > filter_length`. Any exogenous features must have their shapes prefixed by the shape of the `times` feature. When serving, first performs filtering on the series up to `filter_length` starting from the default start state for the model, then computes predictions on the remainder of the series, returning them. Model state is neither accepted nor returned, so filtering must be performed each time predictions are requested when using this head. """ def _check_predict_features(self, features): """Raises errors if features are not suitable for one-shot prediction.""" if feature_keys.PredictionFeatures.TIMES not in features: raise ValueError("Expected a '{}' feature for prediction.".format( feature_keys.PredictionFeatures.TIMES)) if feature_keys.TrainEvalFeatures.VALUES not in features: raise ValueError("Expected a '{}' feature for prediction.".format( feature_keys.TrainEvalFeatures.VALUES)) if feature_keys.PredictionFeatures.STATE_TUPLE not in features: raise ValueError("Expected a '{}' feature for prediction.".format( feature_keys.PredictionFeatures.STATE_TUPLE)) times_feature = features[feature_keys.PredictionFeatures.TIMES] if not times_feature.get_shape().is_compatible_with([None, None]): raise ValueError( ("Expected shape (batch dimension, window size) for feature '{}' " "(got shape {})").format(feature_keys.PredictionFeatures.TIMES, times_feature.get_shape())) _check_feature_shapes_compatible_with( features=features, compatible_with_name=feature_keys.PredictionFeatures.TIMES, compatible_with_value=times_feature, ignore=set([ # Model-dependent shapes feature_keys.PredictionFeatures.STATE_TUPLE, # One shot prediction head relies on values being shorter than # times. Even though we're predicting eventually, we need values for # the filtering phase. feature_keys.TrainEvalFeatures.VALUES, ])) def _evaluate_ops(self, features): """Add ops for evaluation (aka filtering) to the graph.""" spec = super(OneShotPredictionHead, self)._evaluate_ops(features) # No state is fed to OneShotPredictionHead, so we don't return it; it being # a tuple can cause issues for downstream infrastructure. del spec.eval_metric_ops[feature_keys.State.STATE_TUPLE] return spec def _serving_ops(self, features): """Add ops for serving to the graph.""" with tf.compat.v1.variable_scope("model", use_resource=True): filtering_features = {} prediction_features = {} values_length = tf.compat.v1.shape( features[feature_keys.FilteringFeatures.VALUES])[1] for key, value in features.items(): if key == feature_keys.State.STATE_TUPLE: # Ignore state input. The model's default start state is replicated # across the batch. continue if key == feature_keys.FilteringFeatures.VALUES: filtering_features[key] = value else: filtering_features[key] = value[:, :values_length] prediction_features[key] = value[:, values_length:] cold_filtering_outputs = self.model.define_loss( features=filtering_features, mode=estimator_lib.ModeKeys.EVAL) prediction_features[feature_keys.State.STATE_TUPLE] = ( cold_filtering_outputs.end_state) with tf.compat.v1.variable_scope("model", reuse=True): prediction_outputs = self.model.predict(features=prediction_features) return estimator_lib.EstimatorSpec( mode=estimator_lib.ModeKeys.PREDICT, export_outputs={ feature_keys.SavedModelLabels.PREDICT: _NoStatePredictOutput(prediction_outputs), }, # Likely unused, but it is necessary to return `predictions` to satisfy # the Estimator's error checking. predictions={}) def _check_feature_shapes_compatible_with(features, compatible_with_name, compatible_with_value, ignore=None): """Checks all features are compatible with the given time-like feature.""" if ignore is None: ignore = set() for name, value in features.items(): if name in ignore: continue feature_shape = value.get_shape() if feature_shape.ndims is None: continue if feature_shape.ndims < 2: raise ValueError( ("Features must have shape (batch dimension, window size, ...) " "(got rank {} for feature '{}')").format(feature_shape.ndims, name)) if not feature_shape[:2].is_compatible_with( compatible_with_value.get_shape()): raise ValueError( ("Features must have shape (batch dimension, window size, ...) " "where batch dimension and window size match the " "'{times_feature}' feature (got shape {feature_shape} for " "feature '{feature_name}' but shape {times_shape} for feature " "'{times_feature}')").format( times_feature=compatible_with_name, feature_shape=feature_shape, feature_name=name, times_shape=compatible_with_value.get_shape())) def _check_train_eval_features(features, model): """Raise errors if features are not suitable for training/evaluation.""" if feature_keys.TrainEvalFeatures.TIMES not in features: raise ValueError("Expected a '{}' feature for training/evaluation.".format( feature_keys.TrainEvalFeatures.TIMES)) if feature_keys.TrainEvalFeatures.VALUES not in features: raise ValueError("Expected a '{}' feature for training/evaluation.".format( feature_keys.TrainEvalFeatures.VALUES)) times_feature = features[feature_keys.TrainEvalFeatures.TIMES] if not times_feature.get_shape().is_compatible_with([None, None]): raise ValueError( ("Expected shape (batch dimension, window size) for feature '{}' " "(got shape {})").format(feature_keys.TrainEvalFeatures.TIMES, times_feature.get_shape())) values_feature = features[feature_keys.TrainEvalFeatures.VALUES] if not values_feature.get_shape().is_compatible_with( [None, None, model.num_features]): raise ValueError( ("Expected shape (batch dimension, window size, {num_features}) " "for feature '{feature_name}', since the model was configured " "with num_features={num_features} (got shape {got_shape})").format( num_features=model.num_features, feature_name=feature_keys.TrainEvalFeatures.VALUES, got_shape=times_feature.get_shape())) _check_feature_shapes_compatible_with( features=features, compatible_with_name=feature_keys.TrainEvalFeatures.TIMES, compatible_with_value=times_feature, ignore=set([ feature_keys.State.STATE_TUPLE # Model-dependent shapes ])) def _identity_metric_single(name, input_tensor): """A metric which takes on its last updated value. This keeps evaluation metrics in sync with one another, since update ops are run separately from their result Tensors. Simply returning (input_tensor, no_op) as a metric with a value but no update means that a metric will come from a different batch of data than metrics which cache values in a Variable (e.g. the default loss metric). Args: name: A name for the metric. input_tensor: Any Tensor. Returns: A tuple of (value, update_op). """ metric_variable = tf.compat.v1.Variable( name="{}_identity_metric".format(name), initial_value=tf.zeros([], dtype=input_tensor.dtype), collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES], validate_shape=False) update_op = tf.compat.v1.assign( metric_variable, input_tensor, validate_shape=False) # This shape will be correct once the first update runs (but may be # incomplete, so is not helpful for initializing the variable). metric_variable.set_shape(input_tensor.get_shape()) return (metric_variable.value(), update_op) def _identity_metric_nested(name, input_tensors): """Create identity metrics for a nested tuple of Tensors.""" update_ops = [] value_tensors = [] for tensor_number, tensor in enumerate(tf.nest.flatten(input_tensors)): value_tensor, update_op = _identity_metric_single( name="{}_{}".format(name, tensor_number), input_tensor=tensor) update_ops.append(update_op) value_tensors.append(value_tensor) return (tf.nest.pack_sequence_as(input_tensors, value_tensors), tf.group(*update_ops)) def state_to_dictionary(state_tuple): """Flatten model state into a dictionary with string keys.""" flattened = {} for state_number, state_value in enumerate(tf.nest.flatten(state_tuple)): prefixed_state_name = "{}_{:02d}".format(feature_keys.State.STATE_PREFIX, state_number) flattened[prefixed_state_name] = state_value return flattened ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/head_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for head.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import functools import os from absl.testing import parameterized import numpy import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator import extenders from tensorflow_estimator.python.estimator.canned.timeseries import ar_model from tensorflow_estimator.python.estimator.canned.timeseries import estimators as ts_estimators from tensorflow_estimator.python.estimator.canned.timeseries import feature_keys from tensorflow_estimator.python.estimator.canned.timeseries import head as ts_head_lib from tensorflow_estimator.python.estimator.canned.timeseries import model from tensorflow_estimator.python.estimator.canned.timeseries import state_management class HeadTest(tf.test.TestCase): def test_labels_provided_error(self): model_fn = _stub_model_fn() for mode in [ estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL, estimator_lib.ModeKeys.PREDICT ]: with self.assertRaisesRegexp(ValueError, "received a `labels`"): model_fn(features={}, labels={"a": "b"}, mode=mode) with self.assertRaisesRegexp(ValueError, "received a `labels`"): model_fn(features={}, labels=tf.zeros([]), mode=mode) def test_unknown_mode(self): model_fn = _stub_model_fn() with self.assertRaisesRegexp(ValueError, "Unknown mode 'Not a mode'"): model_fn(features={}, labels={}, mode="Not a mode") class _TickerModel(object): num_features = 1 dtype = tf.dtypes.float32 def initialize_graph(self, input_statistics): pass def define_loss(self, features, mode): del mode # unused return model.ModelOutputs( loss=features["ticker"], end_state=(features["ticker"], features["ticker"]), prediction_times=tf.zeros(()), predictions={"ticker": features["ticker"]}) @test_util.run_v1_only("Currently incompatible with ResourceVariable") class EvaluationMetricsTests(tf.test.TestCase): def test_metrics_consistent(self): # Tests that the identity metrics used to report in-sample predictions match # the behavior of standard metrics. g = tf.Graph() with g.as_default(): features = { feature_keys.TrainEvalFeatures.TIMES: tf.zeros((1, 1)), feature_keys.TrainEvalFeatures.VALUES: tf.zeros((1, 1, 1)), "ticker": tf.reshape( tf.cast( tf.compat.v1.Variable( name="ticker", initial_value=0, dtype=tf.dtypes.int64, collections=[tf.compat.v1.GraphKeys.LOCAL_VARIABLES]) .count_up_to(10), dtype=tf.dtypes.float32), (1, 1, 1)) } model_fn = ts_head_lib.TimeSeriesRegressionHead( model=_TickerModel(), state_manager=state_management.PassthroughStateManager(), optimizer=tf.compat.v1.train.GradientDescentOptimizer( 0.001)).create_estimator_spec outputs = model_fn( features=features, labels=None, mode=estimator_lib.ModeKeys.EVAL) metric_update_ops = [ metric[1] for metric in outputs.eval_metric_ops.values() ] loss_mean, loss_update = tf.compat.v1.metrics.mean(outputs.loss) metric_update_ops.append(loss_update) with self.cached_session() as sess: coordinator = tf.train.Coordinator() tf.compat.v1.train.queue_runner.start_queue_runners( sess, coord=coordinator) tf.compat.v1.initializers.local_variables().run() sess.run(metric_update_ops) loss_evaled, metric_evaled, nested_metric_evaled = sess.run( (loss_mean, outputs.eval_metric_ops["ticker"][0], outputs.eval_metric_ops[ feature_keys.FilteringResults.STATE_TUPLE][0][0])) # The custom model_utils metrics for in-sample predictions should be in # sync with the Estimator's mean metric for model loss. self.assertAllClose(0., loss_evaled) self.assertAllClose((((0.,),),), metric_evaled) self.assertAllClose((((0.,),),), nested_metric_evaled) coordinator.request_stop() coordinator.join() def test_custom_metrics(self): """Tests that the custom metrics can be applied to the estimator.""" model_dir = self.get_temp_dir() estimator = ts_estimators.LSTMAutoRegressor( periodicities=1, input_window_size=1, output_window_size=1, num_features=1, num_units=4, optimizer=tf.compat.v1.train.AdamOptimizer(0.001), config=estimator_lib.RunConfig(tf_random_seed=4), model_dir=model_dir) def input_fn(): return { feature_keys.TrainEvalFeatures.TIMES: [[1, 2, 3], [7, 8, 9]], feature_keys.TrainEvalFeatures.VALUES: numpy.array([[[0.], [1.], [0.]], [[2.], [3.], [2.]]]) } def metrics_fn(predictions, features): # checking that the inputs are properly passed. predict = predictions["mean"] target = features[feature_keys.TrainEvalFeatures.VALUES][:, -1, 0] return { "plain_boring_metric386": (tf.math.reduce_mean(tf.math.abs(predict - target)), tf.no_op()), "fun_metric101": (tf.math.reduce_sum(predict + target), tf.no_op()), } # Evaluation without training is enough for testing custom metrics. estimator = extenders.add_metrics(estimator, metrics_fn) evaluation = estimator.evaluate(input_fn, steps=1) self.assertIn("plain_boring_metric386", evaluation) self.assertIn("fun_metric101", evaluation) self.assertIn("average_loss", evaluation) class _StubModel(object): num_features = 3 dtype = tf.dtypes.float64 def initialize_graph(self, input_statistics): del input_statistics # unused def _stub_model_fn(): return ts_head_lib.TimeSeriesRegressionHead( model=_StubModel(), state_manager=state_management.PassthroughStateManager(), optimizer=tf.compat.v1.train.AdamOptimizer(0.001)).create_estimator_spec class TrainEvalFeatureCheckingTests(tf.test.TestCase): def test_no_time_feature(self): model_fn = _stub_model_fn() for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: with self.assertRaisesRegexp( ValueError, "Expected a '{}' feature".format( feature_keys.TrainEvalFeatures.TIMES)): model_fn( features={feature_keys.TrainEvalFeatures.VALUES: [[[1.]]]}, labels=None, mode=mode) def test_no_value_feature(self): model_fn = _stub_model_fn() for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: with self.assertRaisesRegexp( ValueError, "Expected a '{}' feature".format( feature_keys.TrainEvalFeatures.VALUES)): model_fn( features={feature_keys.TrainEvalFeatures.TIMES: [[1]]}, labels=None, mode=mode) def test_bad_time_rank(self): model_fn = _stub_model_fn() for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: with self.assertRaisesRegexp( ValueError, "Expected shape.*for feature '{}'".format( feature_keys.TrainEvalFeatures.TIMES)): model_fn( features={ feature_keys.TrainEvalFeatures.TIMES: [[[1]]], feature_keys.TrainEvalFeatures.VALUES: [[[1.]]] }, labels=None, mode=mode) def test_bad_value_rank(self): model_fn = _stub_model_fn() for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: with self.assertRaisesRegexp( ValueError, "Expected shape.*for feature '{}'".format( feature_keys.TrainEvalFeatures.VALUES)): model_fn( features={ feature_keys.TrainEvalFeatures.TIMES: [[1]], feature_keys.TrainEvalFeatures.VALUES: [[1.]] }, labels=None, mode=mode) def test_bad_value_num_features(self): model_fn = _stub_model_fn() for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: with self.assertRaisesRegexp( ValueError, "Expected shape.*, 3.*for feature '{}'".format( feature_keys.TrainEvalFeatures.VALUES)): model_fn( features={ feature_keys.TrainEvalFeatures.TIMES: [[1]], feature_keys.TrainEvalFeatures.VALUES: [[[1.]]] }, labels=None, mode=mode) def test_bad_exogenous_shape(self): model_fn = _stub_model_fn() for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: with self.assertRaisesRegexp( ValueError, "Features must have shape.*for feature 'exogenous'"): model_fn( features={ feature_keys.TrainEvalFeatures.TIMES: [[1]], feature_keys.TrainEvalFeatures.VALUES: [[[1., 2., 3.]]], "exogenous": [[1], [2]] }, labels=None, mode=mode) class PredictFeatureCheckingTests(tf.test.TestCase): def test_no_time_feature(self): model_fn = _stub_model_fn() with self.assertRaisesRegexp( ValueError, "Expected a '{}' feature".format( feature_keys.PredictionFeatures.TIMES)): model_fn( features={ feature_keys.PredictionFeatures.STATE_TUPLE: ([[[1.]]], 1.) }, labels=None, mode=estimator_lib.ModeKeys.PREDICT) def test_no_start_state_feature(self): model_fn = _stub_model_fn() with self.assertRaisesRegexp( ValueError, "Expected a '{}' feature".format( feature_keys.PredictionFeatures.STATE_TUPLE)): model_fn( features={feature_keys.PredictionFeatures.TIMES: [[1]]}, labels=None, mode=estimator_lib.ModeKeys.PREDICT) def test_bad_time_rank(self): model_fn = _stub_model_fn() with self.assertRaisesRegexp( ValueError, "Expected shape.*for feature '{}'".format( feature_keys.PredictionFeatures.TIMES)): model_fn( features={ feature_keys.PredictionFeatures.TIMES: 1, feature_keys.PredictionFeatures.STATE_TUPLE: (1, (2, 3.)) }, labels=None, mode=estimator_lib.ModeKeys.PREDICT) def test_bad_exogenous_shape(self): model_fn = _stub_model_fn() with self.assertRaisesRegexp( ValueError, "Features must have shape.*for feature 'exogenous'"): model_fn( features={ feature_keys.PredictionFeatures.TIMES: [[1]], feature_keys.PredictionFeatures.STATE_TUPLE: (1, (2, 3.)), "exogenous": 1. }, labels=None, mode=estimator_lib.ModeKeys.PREDICT) @test_util.run_v1_only("Currently incompatible with ResourceVariable") class OneShotTests(parameterized.TestCase): def test_one_shot_prediction_head_export(self): def _new_temp_dir(): return os.path.join(tf.compat.v1.test.get_temp_dir(), str(ops.uid())) model_dir = _new_temp_dir() categorical_column = tf.feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ tf.feature_column.numeric_column("2d_exogenous_feature", shape=(2,)), tf.feature_column.embedding_column( categorical_column=categorical_column, dimension=10) ] estimator = ts_estimators.TimeSeriesRegressor( model=ar_model.ARModel( periodicities=10, input_window_size=10, output_window_size=6, num_features=5, exogenous_feature_columns=exogenous_feature_columns, prediction_model_factory=functools.partial( ar_model.LSTMPredictionModel, num_units=10)), head_type=ts_head_lib.OneShotPredictionHead, model_dir=model_dir) def train_input_fn(): num_range = tf.range(16, dtype=tf.dtypes.int64) features = { feature_keys.TrainEvalFeatures.TIMES: tf.compat.v1.expand_dims(num_range, axis=0), feature_keys.TrainEvalFeatures.VALUES: tf.compat.v1.expand_dims( tf.tile(num_range[:, None], [1, 5]), axis=0), "2d_exogenous_feature": tf.ones([1, 16, 2]), "categorical_exogenous_feature": tf.compat.v1.expand_dims( tf.tile(["strkey"], [16])[:, None], axis=0) } return features estimator.train(input_fn=train_input_fn, steps=5) result = estimator.evaluate(input_fn=train_input_fn, steps=1) self.assertIn("average_loss", result) self.assertNotIn(feature_keys.State.STATE_TUPLE, result) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_saved_model(_new_temp_dir(), input_receiver_fn) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session() as session: signatures = tf.compat.v1.saved_model.load(session, [tf.saved_model.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual(self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature" ], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile( numpy.arange(20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile( numpy.array(["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items() } fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape) # Build a parsing input function, then make a tf.Example for it to parse. export_location = estimator.export_saved_model( _new_temp_dir(), estimator.build_one_shot_parsing_serving_input_receiver_fn( filtering_length=20, prediction_length=15)) graph = tf.Graph() with graph.as_default(): with tf.compat.v1.Session() as session: example = example_pb2.Example() times = example.features.feature[feature_keys.TrainEvalFeatures.TIMES] values = example.features.feature[feature_keys.TrainEvalFeatures.VALUES] times.int64_list.value.extend(range(35)) for i in range(20): values.float_list.value.extend( [float(i) * 2. + feature_number for feature_number in range(5)]) real_feature = example.features.feature["2d_exogenous_feature"] categortical_feature = example.features.feature[ "categorical_exogenous_feature"] for i in range(35): real_feature.float_list.value.extend([1, 1]) categortical_feature.bytes_list.value.append(b"strkey") # Serialize the tf.Example for feeding to the Session examples = [example.SerializeToString()] * 2 signatures = tf.compat.v1.saved_model.load(session, [tf.saved_model.SERVING], export_location) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] ((_, input_value),) = predict_signature.inputs.items() feeds = {graph.as_graph_element(input_value.name): examples} fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/math_utils.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Miscellaneous utilities used by time series models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import tensorflow as tf from tensorflow.python.ops import gen_math_ops from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import TrainEvalFeatures def replicate_state(start_state, batch_size): """Create batch versions of state. Takes a list of Tensors, adds a batch dimension, and replicates batch_size times across that batch dimension. Used to replicate the non-batch state returned by get_start_state in define_loss. Args: start_state: Model-defined state to replicate. batch_size: Batch dimension for data. Returns: Replicated versions of the state. """ flattened_state = tf.nest.flatten(start_state) replicated_state = [ tf.tile( tf.compat.v1.expand_dims(state_nonbatch, 0), tf.concat([[batch_size], tf.ones([tf.rank(state_nonbatch)], dtype=tf.dtypes.int32)], 0)) for state_nonbatch in flattened_state ] return tf.nest.pack_sequence_as(start_state, replicated_state) Moments = collections.namedtuple("Moments", ["mean", "variance"]) # Currently all of these statistics are computed incrementally (i.e. are updated # every time a new mini-batch of training data is presented) when this object is # created in InputStatisticsFromMiniBatch. InputStatistics = collections.namedtuple( "InputStatistics", [ # The mean and variance of each feature in a chunk (with a size # configured in the statistics object) at the start of the series. A # tuple of (mean, variance), each with shape [number of features], # floating point. One use is in state space models, to keep priors # calibrated even as earlier parts of the series are presented. If this # object was created by InputStatisticsFromMiniBatch, these moments are # computed based on the earliest chunk of data presented so far. # However, there is a race condition in the update, so these may reflect # statistics later in the series, but should eventually reflect # statistics in a chunk at the series start. "series_start_moments", # The mean and variance of each feature over the entire series. A tuple # of (mean, variance), each with shape [number of features]. If this # object was created by InputStatisticsFromMiniBatch, these moments are # estimates based on the data seen so far. "overall_feature_moments", # The first (lowest) time in the series, a scalar integer. If this # object was created by InputStatisticsFromMiniBatch, this is the lowest # time seen so far rather than the lowest time that will ever be seen # (guaranteed to be at least as low as the lowest time presented in the # current minibatch). "start_time", # Count of data points, a scalar integer. If this object was created by # InputStatisticsFromMiniBatch, this is an estimate of the total number # of observations in the whole dataset computed based on the density of # the series and the minimum and maximum times seen. "total_observation_count", ]) # TODO(allenl): It would be nice to do something with full series statistics # when the user provides that. class InputStatisticsFromMiniBatch(object): """Generate statistics from mini-batch input.""" def __init__(self, num_features, dtype, starting_variance_window_size=16): """Configure the input statistics object. Args: num_features: Number of features for the time series dtype: The floating point data type to use. starting_variance_window_size: The number of datapoints to use when computing the mean and variance at the start of the series. """ self._starting_variance_window_size = starting_variance_window_size self._num_features = num_features self._dtype = dtype def initialize_graph(self, features, update_statistics=True): """Create any ops needed to provide input statistics. Should be called before statistics are requested. Args: features: A dictionary, the output of a `TimeSeriesInputFn` (with keys TrainEvalFeatures.TIMES and TrainEvalFeatures.VALUES). update_statistics: Whether `features` should be used to update adaptive statistics. Typically True for training and false for evaluation. Returns: An InputStatistics object composed of Variables, which will be updated based on mini-batches of data if requested. """ if (TrainEvalFeatures.TIMES in features and TrainEvalFeatures.VALUES in features): times = features[TrainEvalFeatures.TIMES] values = features[TrainEvalFeatures.VALUES] else: # times and values may not be available, for example during prediction. We # still need to retrieve our variables so that they can be read from, even # if we're not going to update them. times = None values = None # Create/retrieve variables representing input statistics, initialized # without data to avoid deadlocking if variables are initialized before # queue runners are started. with tf.compat.v1.variable_scope("input_statistics", use_resource=True): statistics = self._create_variable_statistics_object() with tf.compat.v1.variable_scope( "input_statistics_auxiliary", use_resource=True): # Secondary statistics, necessary for the incremental computation of the # primary statistics (e.g. counts and sums for computing a mean # incrementally). auxiliary_variables = self._AdaptiveInputAuxiliaryStatistics( num_features=self._num_features, dtype=self._dtype) if update_statistics and times is not None and values is not None: # If we have times and values from mini-batch input, create update ops to # take the new data into account. assign_op = self._update_statistics_from_mini_batch( statistics, auxiliary_variables, times, values) with tf.control_dependencies([assign_op]): stat_variables = tf.nest.pack_sequence_as( statistics, [tf.identity(tensor) for tensor in tf.nest.flatten(statistics)]) # Since start time updates have a race condition, ensure that the # reported start time is at least as low as the lowest time in this # mini-batch. The start time should converge on the correct value # eventually even with the race condition, but for example state space # models have an assertion which could fail without this # post-processing. min_time = tf.cast(tf.math.reduce_min(times), tf.dtypes.int64) start_time = tf.math.minimum(stat_variables.start_time, min_time) return stat_variables._replace(start_time=start_time) else: return statistics class _AdaptiveInputAuxiliaryStatistics( collections.namedtuple( "_AdaptiveInputAuxiliaryStatistics", [ # The maximum time seen (best effort if updated from multiple # workers; see notes about race condition below). "max_time_seen", # The number of chunks seen. "chunk_count", # The sum across chunks of their "time density" (number of times # per example). "inter_observation_duration_sum", # The number of examples seen (each example has a single time # associated with it and one or more real-valued features). "example_count", # The sum of values for each feature. Shape [number of features]. "overall_feature_sum", # The sum of squared values for each feature. # Shape [number of features]. "overall_feature_sum_of_squares", ])): """Extra statistics used to incrementally update InputStatistics.""" def __new__(cls, num_features, dtype): return super( InputStatisticsFromMiniBatch # pylint: disable=protected-access ._AdaptiveInputAuxiliaryStatistics, cls).__new__( cls, max_time_seen=tf.compat.v1.get_variable( name="max_time_seen", initializer=tf.dtypes.int64.min, dtype=tf.dtypes.int64, trainable=False), chunk_count=tf.compat.v1.get_variable( name="chunk_count", initializer=tf.compat.v1.initializers.zeros(), shape=[], dtype=tf.dtypes.int64, trainable=False), inter_observation_duration_sum=tf.compat.v1.get_variable( name="inter_observation_duration_sum", initializer=tf.compat.v1.initializers.zeros(), shape=[], dtype=dtype, trainable=False), example_count=tf.compat.v1.get_variable( name="example_count", shape=[], dtype=tf.dtypes.int64, trainable=False), overall_feature_sum=tf.compat.v1.get_variable( name="overall_feature_sum", shape=[num_features], dtype=dtype, initializer=tf.compat.v1.initializers.zeros(), trainable=False), overall_feature_sum_of_squares=tf.compat.v1.get_variable( name="overall_feature_sum_of_squares", shape=[num_features], dtype=dtype, initializer=tf.compat.v1.initializers.zeros(), trainable=False)) def _update_statistics_from_mini_batch(self, statistics, auxiliary_variables, times, values): """Given mini-batch input, update `statistics` and `auxiliary_variables`.""" values = tf.cast(values, self._dtype) # The density (measured in times per observation) that we see in each part # of the mini-batch. batch_inter_observation_duration = ( tf.cast( tf.math.reduce_max(times, axis=1) - tf.math.reduce_min(times, axis=1), self._dtype) / tf.cast(tf.compat.v1.shape(times)[1] - 1, self._dtype)) # Co-locate updates with their variables to minimize race conditions when # updating statistics. with tf.compat.v1.device(auxiliary_variables.max_time_seen.device): # There is a race condition if this value is being updated from multiple # workers. However, it should eventually reach the correct value if the # last chunk is presented enough times. latest_time = tf.cast(tf.math.reduce_max(times), tf.dtypes.int64) max_time_seen = tf.math.maximum(auxiliary_variables.max_time_seen, latest_time) max_time_seen_assign = tf.compat.v1.assign( auxiliary_variables.max_time_seen, max_time_seen) with tf.compat.v1.device(auxiliary_variables.chunk_count.device): chunk_count_assign = tf.compat.v1.assign_add( auxiliary_variables.chunk_count, tf.compat.v1.shape(times, out_type=tf.dtypes.int64)[0]) with tf.compat.v1.device( auxiliary_variables.inter_observation_duration_sum.device): inter_observation_duration_assign = tf.compat.v1.assign_add( auxiliary_variables.inter_observation_duration_sum, tf.math.reduce_sum(batch_inter_observation_duration)) with tf.compat.v1.device(auxiliary_variables.example_count.device): example_count_assign = tf.compat.v1.assign_add( auxiliary_variables.example_count, tf.compat.v1.size(times, out_type=tf.dtypes.int64)) # Note: These mean/variance updates assume that all points are equally # likely, which is not true if _chunks_ are sampled uniformly from the space # of all possible contiguous chunks, since points at the start and end of # the series are then members of fewer chunks. For series which are much # longer than the chunk size (the usual/expected case), this effect becomes # irrelevant. with tf.compat.v1.device(auxiliary_variables.overall_feature_sum.device): overall_feature_sum_assign = tf.compat.v1.assign_add( auxiliary_variables.overall_feature_sum, tf.math.reduce_sum(values, axis=[0, 1])) with tf.compat.v1.device( auxiliary_variables.overall_feature_sum_of_squares.device): overall_feature_sum_of_squares_assign = tf.compat.v1.assign_add( auxiliary_variables.overall_feature_sum_of_squares, tf.math.reduce_sum(values**2, axis=[0, 1])) per_chunk_aux_updates = tf.group(max_time_seen_assign, chunk_count_assign, inter_observation_duration_assign, example_count_assign, overall_feature_sum_assign, overall_feature_sum_of_squares_assign) with tf.control_dependencies([per_chunk_aux_updates]): example_count_float = tf.cast(auxiliary_variables.example_count, self._dtype) new_feature_mean = ( auxiliary_variables.overall_feature_sum / example_count_float) overall_feature_mean_update = tf.compat.v1.assign( statistics.overall_feature_moments.mean, new_feature_mean) overall_feature_var_update = tf.compat.v1.assign( statistics.overall_feature_moments.variance, # De-biased n / (n - 1) variance correction example_count_float / (example_count_float - 1.) * (auxiliary_variables.overall_feature_sum_of_squares / example_count_float - new_feature_mean**2)) # TODO(b/35675805): Remove this cast min_time_batch = tf.cast( tf.compat.v1.math.argmin(times[:, 0]), tf.dtypes.int32) def series_start_updates(): # If this is the lowest-time chunk that we have seen so far, update # series start moments to reflect that. Note that these statistics are # "best effort", as there are race conditions in the update (however, # they should eventually converge if the start of the series is # presented enough times). mean, variance = tf.compat.v1.nn.moments( values[min_time_batch, :self._starting_variance_window_size], axes=[0]) return tf.group( tf.compat.v1.assign(statistics.series_start_moments.mean, mean), tf.compat.v1.assign(statistics.series_start_moments.variance, variance)) with tf.compat.v1.device(statistics.start_time.device): series_start_update = tf.compat.v1.cond( # Update moments whenever we even match the lowest time seen so far, # to ensure that series start statistics are eventually updated to # their correct values, despite race conditions (i.e. eventually # statistics.start_time will reflect the global lowest time, and # given that we will eventually update the series start moments to # their correct values). tf.math.less_equal(times[min_time_batch, 0], tf.cast(statistics.start_time, times.dtype)), series_start_updates, tf.no_op) with tf.control_dependencies([series_start_update]): # There is a race condition if this update is performed in parallel on # multiple workers. Since models may be sensitive to being presented # with times before the putative start time, the value of this # variable is post-processed above to guarantee that each worker is # presented with a start time which is at least as low as the lowest # time in its current mini-batch. min_time = tf.cast(tf.math.reduce_min(times), tf.dtypes.int64) start_time = tf.math.minimum(statistics.start_time, min_time) start_time_update = tf.compat.v1.assign(statistics.start_time, start_time) inter_observation_duration_estimate = ( auxiliary_variables.inter_observation_duration_sum / tf.cast(auxiliary_variables.chunk_count, self._dtype)) # Estimate the total number of observations as: # (end time - start time + 1) * average intra-chunk time density total_observation_count_update = tf.compat.v1.assign( statistics.total_observation_count, tf.cast( gen_math_ops.round( tf.cast(max_time_seen_assign - start_time_update + 1, self._dtype) / inter_observation_duration_estimate), tf.dtypes.int64)) per_chunk_stat_updates = tf.group(overall_feature_mean_update, overall_feature_var_update, series_start_update, start_time_update, total_observation_count_update) return per_chunk_stat_updates def _create_variable_statistics_object(self): """Creates non-trainable variables representing input statistics.""" series_start_moments = Moments( mean=tf.compat.v1.get_variable( name="series_start_mean", shape=[self._num_features], dtype=self._dtype, initializer=tf.compat.v1.initializers.zeros(), trainable=False), variance=tf.compat.v1.get_variable( name="series_start_variance", shape=[self._num_features], dtype=self._dtype, initializer=tf.compat.v1.initializers.ones(), trainable=False)) overall_feature_moments = Moments( mean=tf.compat.v1.get_variable( name="overall_feature_mean", shape=[self._num_features], dtype=self._dtype, initializer=tf.compat.v1.initializers.zeros(), trainable=False), variance=tf.compat.v1.get_variable( name="overall_feature_var", shape=[self._num_features], dtype=self._dtype, initializer=tf.compat.v1.initializers.ones(), trainable=False)) start_time = tf.compat.v1.get_variable( name="start_time", dtype=tf.dtypes.int64, initializer=tf.dtypes.int64.max, trainable=False) total_observation_count = tf.compat.v1.get_variable( name="total_observation_count", shape=[], dtype=tf.dtypes.int64, initializer=tf.compat.v1.initializers.ones(), trainable=False) return InputStatistics( series_start_moments=series_start_moments, overall_feature_moments=overall_feature_moments, start_time=start_time, total_observation_count=total_observation_count) ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/math_utils_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for math_utils.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow_estimator.python.estimator.canned.timeseries import math_utils from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import TrainEvalFeatures class InputStatisticsTests(tf.test.TestCase): def _input_statistics_test_template(self, stat_object, num_features, dtype, warmup_iterations=0, rtol=1e-6, data_length=4): graph = tf.Graph() with graph.as_default(): data_length_range = tf.range(data_length, dtype=dtype) num_features_range = tf.range(num_features, dtype=dtype) times = 2 * data_length_range[None, :] - 3 values = (data_length_range[:, None] + num_features_range[None, :])[None, ...] features = { TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values, } statistics = stat_object.initialize_graph(features=features) with self.session(graph=graph) as session: tf.compat.v1.initializers.global_variables().run() coordinator = tf.train.Coordinator() tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coordinator) for _ in range(warmup_iterations): # A control dependency should ensure that, for queue-based statistics, # a use of any statistic is preceded by an update of all adaptive # statistics. self.evaluate(statistics.total_observation_count) self.assertAllClose( tf.range(num_features, dtype=dtype) + tf.math.reduce_mean(data_length_range)[None], self.evaluate(statistics.series_start_moments.mean), rtol=rtol) self.assertAllClose( tf.tile( tf.math.reduce_variance(data_length_range)[None], [num_features]), self.evaluate(statistics.series_start_moments.variance), rtol=rtol) self.assertAllClose( tf.math.reduce_mean(values[0], axis=0), self.evaluate(statistics.overall_feature_moments.mean), rtol=rtol) self.assertAllClose( tf.math.reduce_variance(values[0], axis=0), self.evaluate(statistics.overall_feature_moments.variance), rtol=rtol) self.assertAllClose(-3, self.evaluate(statistics.start_time), rtol=rtol) self.assertAllClose( data_length, self.evaluate(statistics.total_observation_count), rtol=rtol) coordinator.request_stop() coordinator.join() def test_queue(self): for dtype in [tf.dtypes.float32, tf.dtypes.float64]: for num_features in [1, 2, 3]: self._input_statistics_test_template( math_utils.InputStatisticsFromMiniBatch( num_features=num_features, dtype=dtype), num_features=num_features, dtype=dtype, warmup_iterations=1000, rtol=0.1) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/model.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Base class for time series models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import collections import six import tensorflow as tf from tensorflow_estimator.python.estimator.canned.timeseries import math_utils from tensorflow_estimator.python.estimator.canned.timeseries.feature_keys import TrainEvalFeatures ModelOutputs = collections.namedtuple( # pylint: disable=invalid-name typename="ModelOutputs", field_names=[ "loss", # The scalar value to be minimized during training. "end_state", # A nested tuple specifying the model's state after # running on the specified data "predictions", # A dictionary of predictions, each with shape prefixed # by the shape of `prediction_times`. "prediction_times" # A [batch size x window size] integer Tensor # indicating times for which values in `predictions` # were computed. ]) @six.add_metaclass(abc.ABCMeta) class TimeSeriesModel(object): """Base class for creating generative time series models.""" def __init__(self, num_features, exogenous_feature_columns=None, dtype=tf.dtypes.float32): """Constructor for generative models. Args: num_features: Number of features for the time series exogenous_feature_columns: A list of `tf.feature_column`s (for example `tf.feature_column.embedding_column`) corresponding to exogenous features which provide extra information to the model but are not part of the series to be predicted. Passed to `tf.feature_column.input_layer`. dtype: The floating point datatype to use. """ if exogenous_feature_columns: self._exogenous_feature_columns = exogenous_feature_columns else: self._exogenous_feature_columns = [] self.num_features = num_features self.dtype = dtype self._input_statistics = None self._graph_initialized = False self._stats_means = None self._stats_sigmas = None @property def exogenous_feature_columns(self): """`tf.feature_colum`s for features which are not predicted.""" return self._exogenous_feature_columns # TODO(allenl): Move more of the generic machinery for generating and # predicting into TimeSeriesModel, and possibly share it between generate() # and predict() def generate(self, number_of_series, series_length, model_parameters=None, seed=None): """Sample synthetic data from model parameters, with optional substitutions. Returns `number_of_series` possible sequences of future values, sampled from the generative model with each conditioned on the previous. Samples are based on trained parameters, except for those parameters explicitly overridden in `model_parameters`. For distributions over future observations, see predict(). Args: number_of_series: Number of time series to create. series_length: Length of each time series. model_parameters: A dictionary mapping model parameters to values, which replace trained parameters when generating data. seed: If specified, return deterministic time series according to this value. Returns: A dictionary with keys TrainEvalFeatures.TIMES (mapping to an array with shape [number_of_series, series_length]) and TrainEvalFeatures.VALUES (mapping to an array with shape [number_of_series, series_length, num_features]). """ raise NotImplementedError("This model does not support generation.") def initialize_graph(self, input_statistics=None): """Define ops for the model, not depending on any previously defined ops. Args: input_statistics: A math_utils.InputStatistics object containing input statistics. If None, data-independent defaults are used, which may result in longer or unstable training. """ self._graph_initialized = True self._input_statistics = input_statistics if self._input_statistics: self._stats_means, variances = ( self._input_statistics.overall_feature_moments) self._stats_sigmas = tf.math.sqrt(variances) def _scale_data(self, data): """Scale data according to stats (input scale -> model scale).""" if self._input_statistics is not None: return (data - self._stats_means) / self._stats_sigmas else: return data def _scale_variance(self, variance): """Scale variances according to stats (input scale -> model scale).""" if self._input_statistics is not None: return variance / self._input_statistics.overall_feature_moments.variance else: return variance def _scale_back_data(self, data): """Scale back data according to stats (model scale -> input scale).""" if self._input_statistics is not None: return (data * self._stats_sigmas) + self._stats_means else: return data def _scale_back_variance(self, variance): """Scale back variances according to stats (model scale -> input scale).""" if self._input_statistics is not None: return variance * self._input_statistics.overall_feature_moments.variance else: return variance def _check_graph_initialized(self): if not self._graph_initialized: raise ValueError( "TimeSeriesModels require initialize_graph() to be called before " "use. This defines variables and ops in the default graph, and " "allows Tensor-valued input statistics to be specified.") def define_loss(self, features, mode): """Default loss definition with state replicated across a batch. Time series passed to this model have a batch dimension, and each series in a batch can be operated on in parallel. This loss definition assumes that each element of the batch represents an independent sample conditioned on the same initial state (i.e. it is simply replicated across the batch). A batch size of one provides sequential operations on a single time series. More complex processing may operate instead on get_start_state() and get_batch_loss() directly. Args: features: A dictionary (such as is produced by a chunker) with at minimum the following key/value pairs (others corresponding to the `exogenous_feature_columns` argument to `__init__` may be included representing exogenous regressors): TrainEvalFeatures.TIMES: A [batch size x window size] integer Tensor with times for each observation. If there is no artificial chunking, the window size is simply the length of the time series. TrainEvalFeatures.VALUES: A [batch size x window size x num features] Tensor with values for each observation. mode: The tf.estimator.ModeKeys mode to use (TRAIN, EVAL). For INFER, see predict(). Returns: A ModelOutputs object. """ self._check_graph_initialized() start_state = math_utils.replicate_state( start_state=self.get_start_state(), batch_size=tf.compat.v1.shape(features[TrainEvalFeatures.TIMES])[0]) return self.get_batch_loss(features=features, mode=mode, state=start_state) # TODO(vitalyk,allenl): Better documentation surrounding options for chunking, # references to papers, etc. @abc.abstractmethod def get_start_state(self): """Returns a tuple of state for the start of the time series. For example, a mean and covariance. State should not have a batch dimension, and will often be TensorFlow Variables to be learned along with the rest of the model parameters. """ pass @abc.abstractmethod def get_batch_loss(self, features, mode, state): """Return predictions, losses, and end state for a time series. Args: features: A dictionary with times, values, and (optionally) exogenous regressors. See `define_loss`. mode: The tf.estimator.ModeKeys mode to use (TRAIN, EVAL, INFER). state: Model-dependent state, each with size [batch size x ...]. The number and type will typically be fixed by the model (for example a mean and variance). Returns: A ModelOutputs object. """ pass @abc.abstractmethod def predict(self, features): """Returns predictions of future observations given an initial state. Computes distributions for future observations. For sampled draws from the model where each is conditioned on the previous, see generate(). Args: features: A dictionary with at minimum the following key/value pairs (others corresponding to the `exogenous_feature_columns` argument to `__init__` may be included representing exogenous regressors): PredictionFeatures.TIMES: A [batch size x window size] Tensor with times to make predictions for. Times must be increasing within each part of the batch, and must be greater than the last time `state` was updated. PredictionFeatures.STATE_TUPLE: Model-dependent state, each with size [batch size x ...]. The number and type will typically be fixed by the model (for example a mean and variance). Typically these will be the end state returned by get_batch_loss, predicting beyond that data. Returns: A dictionary with model-dependent predictions corresponding to the requested times. Keys indicate the type of prediction, and values have shape [batch size x window size x ...]. For example state space models return a "predicted_mean" and "predicted_covariance". """ pass def _get_exogenous_embedding_shape(self): """Computes the shape of the vector returned by _process_exogenous_features. Returns: The shape as a list. Does not include a batch dimension. """ if not self._exogenous_feature_columns: return (0,) with tf.Graph().as_default(): parsed_features = ( tf.compat.v1.feature_column.make_parse_example_spec( self._exogenous_feature_columns)) placeholder_features = tf.compat.v1.io.parse_example( serialized=tf.compat.v1.placeholder( shape=[None], dtype=tf.dtypes.string), features=parsed_features) embedded = tf.compat.v1.feature_column.input_layer( features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:] def _process_exogenous_features(self, times, features): """Create a single vector from exogenous features. Args: times: A [batch size, window size] vector of times for this batch, primarily used to check the shape information of exogenous features. features: A dictionary of exogenous features corresponding to the columns in self._exogenous_feature_columns. Each value should have a shape prefixed by [batch size, window size]. Returns: A Tensor with shape [batch size, window size, exogenous dimension], where the size of the exogenous dimension depends on the exogenous feature columns passed to the model's constructor. Raises: ValueError: If an exogenous feature has an unknown rank. """ if self._exogenous_feature_columns: exogenous_features_single_batch_dimension = {} for name, tensor in features.items(): if tensor.get_shape().ndims is None: # input_from_feature_columns does not support completely unknown # feature shapes, so we save on a bit of logic and provide a better # error message by checking that here. raise ValueError( ("Features with unknown rank are not supported. Got shape {} for " "feature {}.").format(tensor.get_shape(), name)) tensor_shape_dynamic = tf.compat.v1.shape(tensor) tensor = tf.reshape( tensor, tf.concat([[tensor_shape_dynamic[0] * tensor_shape_dynamic[1]], tensor_shape_dynamic[2:]], axis=0)) # Avoid shape warnings when embedding "scalar" exogenous features (those # with only batch and window dimensions); input_from_feature_columns # expects input ranks to match the embedded rank. if tensor.get_shape().ndims == 1 and tensor.dtype != tf.dtypes.string: exogenous_features_single_batch_dimension[name] = tensor[:, None] else: exogenous_features_single_batch_dimension[name] = tensor embedded_exogenous_features_single_batch_dimension = ( tf.compat.v1.feature_column.input_layer( features=exogenous_features_single_batch_dimension, feature_columns=self._exogenous_feature_columns, trainable=True)) exogenous_regressors = tf.reshape( embedded_exogenous_features_single_batch_dimension, tf.concat([ tf.compat.v1.shape(times), tf.compat.v1.shape( embedded_exogenous_features_single_batch_dimension)[1:] ], axis=0)) exogenous_regressors.set_shape(times.get_shape().concatenate( embedded_exogenous_features_single_batch_dimension.get_shape()[1:])) exogenous_regressors = tf.cast(exogenous_regressors, dtype=self.dtype) else: # Not having any exogenous features is a special case so that models can # avoid superfluous updates, which may not be free of side effects due to # bias terms in transformations. exogenous_regressors = None return exogenous_regressors ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/model_utils.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Helper functions for training and constructing time series Models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy import tensorflow as tf from tensorflow_estimator.python.estimator.canned.timeseries import feature_keys # TODO(agarwal): Remove and replace with functionality from tf.slim def fully_connected(inp, inp_size, layer_size, name, activation=tf.nn.relu, dtype=tf.dtypes.float32): """Helper method to create a fully connected hidden layer.""" wt = tf.compat.v1.get_variable( name="{}_weight".format(name), shape=[inp_size, layer_size], dtype=dtype) bias = tf.compat.v1.get_variable( name="{}_bias".format(name), shape=[layer_size], initializer=tf.compat.v1.initializers.zeros()) output = tf.compat.v1.nn.xw_plus_b(inp, wt, bias) if activation is not None: assert callable(activation) output = activation(output) return output def canonicalize_times_or_steps_from_output(times, steps, previous_model_output): """Canonicalizes either relative or absolute times, with error checking.""" if steps is not None and times is not None: raise ValueError("Only one of `steps` and `times` may be specified.") if steps is None and times is None: raise ValueError("One of `steps` and `times` must be specified.") if times is not None: times = numpy.array(times) if len(times.shape) != 2: times = times[None, ...] if (previous_model_output[feature_keys.FilteringResults.TIMES].shape[0] != times.shape[0]): raise ValueError( ("`times` must have a batch dimension matching" " the previous model output (got a batch dimension of {} for `times`" " and {} for the previous model output).").format( times.shape[0], previous_model_output[ feature_keys.FilteringResults.TIMES].shape[0])) if not (previous_model_output[feature_keys.FilteringResults.TIMES][:, -1] < times[:, 0]).all(): raise ValueError("Prediction times must be after the corresponding " "previous model output.") if steps is not None: predict_times = ( previous_model_output[feature_keys.FilteringResults.TIMES][:, -1:] + 1 + numpy.arange(steps)[None, ...]) else: predict_times = times return predict_times ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/saved_model_utils.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Convenience functions for working with time series saved_models. @@predict_continuation @@cold_start_filter @@filter_continuation """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy from tensorflow.python.util.all_util import remove_undocumented from tensorflow_estimator.python.estimator.canned.timeseries import feature_keys as _feature_keys from tensorflow_estimator.python.estimator.canned.timeseries import head as _head from tensorflow_estimator.python.estimator.canned.timeseries import model_utils as _model_utils def _canonicalize_numpy_data(data, require_single_batch): """Do basic checking and reshaping for Numpy data. Args: data: A dictionary mapping keys to Numpy arrays, with several possible shapes (requires keys `TrainEvalFeatures.TIMES` and `TrainEvalFeatures.VALUES`): Single example; `TIMES` is a scalar and `VALUES` is either a scalar or a vector of length [number of features]. Sequence; `TIMES` is a vector of shape [series length], `VALUES` either has shape [series length] (univariate) or [series length x number of features] (multivariate). Batch of sequences; `TIMES` is a vector of shape [batch size x series length], `VALUES` has shape [batch size x series length] or [batch size x series length x number of features]. In any case, `VALUES` and any exogenous features must have their shapes prefixed by the shape of the value corresponding to the `TIMES` key. require_single_batch: If True, raises an error if the provided data has a batch dimension > 1. Returns: A dictionary with features normalized to have shapes prefixed with [batch size x series length]. The sizes of dimensions which were omitted in the inputs are 1. Raises: ValueError: If dimensions are incorrect or do not match, or required features are missing. """ features = {key: numpy.array(value) for key, value in data.items()} if (_feature_keys.TrainEvalFeatures.TIMES not in features or _feature_keys.TrainEvalFeatures.VALUES not in features): raise ValueError("{} and {} are required features.".format( _feature_keys.TrainEvalFeatures.TIMES, _feature_keys.TrainEvalFeatures.VALUES)) times = features[_feature_keys.TrainEvalFeatures.TIMES] for key, value in features.items(): if value.shape[:len(times.shape)] != times.shape: raise ValueError( ("All features must have their shapes prefixed by the shape of the" " times feature. Got shape {} for feature '{}', but shape {} for" " '{}'").format(value.shape, key, times.shape, _feature_keys.TrainEvalFeatures.TIMES)) if not times.shape: # a single example if not features[_feature_keys.TrainEvalFeatures.VALUES].shape: # univariate # Add a feature dimension (with one feature) features[_feature_keys.TrainEvalFeatures.VALUES] = features[ _feature_keys.TrainEvalFeatures.VALUES][..., None] elif len(features[_feature_keys.TrainEvalFeatures.VALUES].shape) > 1: raise ValueError( ("Got an unexpected number of dimensions for the '{}' feature." " Was expecting at most 1 dimension" " ([number of features]) since '{}' does not " "have a batch or time dimension, but got shape {}").format( _feature_keys.TrainEvalFeatures.VALUES, _feature_keys.TrainEvalFeatures.TIMES, features[_feature_keys.TrainEvalFeatures.VALUES].shape)) # Add trivial batch and time dimensions for every feature features = {key: value[None, None, ...] for key, value in features.items()} if len(times.shape) == 1: # shape [series length] if len(features[_feature_keys.TrainEvalFeatures.VALUES].shape ) == 1: # shape [series length] # Add a feature dimension (with one feature) features[_feature_keys.TrainEvalFeatures.VALUES] = features[ _feature_keys.TrainEvalFeatures.VALUES][..., None] elif len(features[_feature_keys.TrainEvalFeatures.VALUES].shape) > 2: raise ValueError( ("Got an unexpected number of dimensions for the '{}' feature." " Was expecting at most 2 dimensions" " ([series length, number of features]) since '{}' does not " "have a batch dimension, but got shape {}").format( _feature_keys.TrainEvalFeatures.VALUES, _feature_keys.TrainEvalFeatures.TIMES, features[_feature_keys.TrainEvalFeatures.VALUES].shape)) # Add trivial batch dimensions for every feature features = {key: value[None, ...] for key, value in features.items()} elif len(features[_feature_keys.TrainEvalFeatures.TIMES].shape ) != 2: # shape [batch size, series length] raise ValueError( ("Got an unexpected number of dimensions for times. Was expecting at " "most two ([batch size, series length]), but got shape {}.").format( times.shape)) if require_single_batch: # We don't expect input to be already batched; batching is done later if features[_feature_keys.TrainEvalFeatures.TIMES].shape[0] != 1: raise ValueError("Got batch input, was expecting unbatched input.") return features def _colate_features_to_feeds_and_fetches(signature, features, graph, continue_from=None): """Uses a saved model signature to construct feed and fetch dictionaries.""" if continue_from is None: state_values = {} elif _feature_keys.FilteringResults.STATE_TUPLE in continue_from: # We're continuing from an evaluation, so we need to unpack/flatten state. state_values = _head.state_to_dictionary( continue_from[_feature_keys.FilteringResults.STATE_TUPLE]) else: state_values = continue_from input_feed_tensors_by_name = { input_key: graph.as_graph_element(input_value.name) for input_key, input_value in signature.inputs.items() } output_tensors_by_name = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in signature.outputs.items() } feed_dict = {} for state_key, state_value in state_values.items(): feed_dict[input_feed_tensors_by_name[state_key]] = state_value for feature_key, feature_value in features.items(): feed_dict[input_feed_tensors_by_name[feature_key]] = feature_value return output_tensors_by_name, feed_dict def predict_continuation(continue_from, signatures, session, steps=None, times=None, exogenous_features=None): """Perform prediction using an exported saved model. Args: continue_from: A dictionary containing the results of either an Estimator's evaluate method or filter_continuation. Used to determine the model state to make predictions starting from. signatures: The `MetaGraphDef` protocol buffer returned from `tf.saved_model.loader.load`. Used to determine the names of Tensors to feed and fetch. Must be from the same model as `continue_from`. session: The session to use. The session's graph must be the one into which `tf.saved_model.loader.load` loaded the model. steps: The number of steps to predict (scalar), starting after the evaluation or filtering. If `times` is specified, `steps` must not be; one is required. times: A [batch_size x window_size] array of integers (not a Tensor) indicating times to make predictions for. These times must be after the corresponding evaluation or filtering. If `steps` is specified, `times` must not be; one is required. If the batch dimension is omitted, it is assumed to be 1. exogenous_features: Optional dictionary. If specified, indicates exogenous features for the model to use while making the predictions. Values must have shape [batch_size x window_size x ...], where `batch_size` matches the batch dimension used when creating `continue_from`, and `window_size` is either the `steps` argument or the `window_size` of the `times` argument (depending on which was specified). Returns: A dictionary with model-specific predictions (typically having keys "mean" and "covariance") and a _feature_keys.PredictionResults.TIMES key indicating the times for which the predictions were computed. Raises: ValueError: If `times` or `steps` are misspecified. """ if exogenous_features is None: exogenous_features = {} predict_times = _model_utils.canonicalize_times_or_steps_from_output( times=times, steps=steps, previous_model_output=continue_from) features = {_feature_keys.PredictionFeatures.TIMES: predict_times} features.update(exogenous_features) predict_signature = signatures.signature_def[ _feature_keys.SavedModelLabels.PREDICT] output_tensors_by_name, feed_dict = _colate_features_to_feeds_and_fetches( continue_from=continue_from, signature=predict_signature, features=features, graph=session.graph) output = session.run(output_tensors_by_name, feed_dict=feed_dict) output[_feature_keys.PredictionResults.TIMES] = features[ _feature_keys.PredictionFeatures.TIMES] return output def cold_start_filter(signatures, session, features): """Perform filtering using an exported saved model. Filtering refers to updating model state based on new observations. Predictions based on the returned model state will be conditioned on these observations. Starts from the model's default/uninformed state. Args: signatures: The `MetaGraphDef` protocol buffer returned from `tf.saved_model.loader.load`. Used to determine the names of Tensors to feed and fetch. Must be from the same model as `continue_from`. session: The session to use. The session's graph must be the one into which `tf.saved_model.loader.load` loaded the model. features: A dictionary mapping keys to Numpy arrays, with several possible shapes (requires keys `FilteringFeatures.TIMES` and `FilteringFeatures.VALUES`): Single example; `TIMES` is a scalar and `VALUES` is either a scalar or a vector of length [number of features]. Sequence; `TIMES` is a vector of shape [series length], `VALUES` either has shape [series length] (univariate) or [series length x number of features] (multivariate). Batch of sequences; `TIMES` is a vector of shape [batch size x series length], `VALUES` has shape [batch size x series length] or [batch size x series length x number of features]. In any case, `VALUES` and any exogenous features must have their shapes prefixed by the shape of the value corresponding to the `TIMES` key. Returns: A dictionary containing model state updated to account for the observations in `features`. """ filter_signature = signatures.signature_def[ _feature_keys.SavedModelLabels.COLD_START_FILTER] features = _canonicalize_numpy_data(data=features, require_single_batch=False) output_tensors_by_name, feed_dict = _colate_features_to_feeds_and_fetches( signature=filter_signature, features=features, graph=session.graph) output = session.run(output_tensors_by_name, feed_dict=feed_dict) # Make it easier to chain filter -> predict by keeping track of the current # time. output[_feature_keys.FilteringResults.TIMES] = features[ _feature_keys.FilteringFeatures.TIMES] return output def filter_continuation(continue_from, signatures, session, features): """Perform filtering using an exported saved model. Filtering refers to updating model state based on new observations. Predictions based on the returned model state will be conditioned on these observations. Args: continue_from: A dictionary containing the results of either an Estimator's evaluate method or a previous filter step (cold start or continuation). Used to determine the model state to start filtering from. signatures: The `MetaGraphDef` protocol buffer returned from `tf.saved_model.loader.load`. Used to determine the names of Tensors to feed and fetch. Must be from the same model as `continue_from`. session: The session to use. The session's graph must be the one into which `tf.saved_model.loader.load` loaded the model. features: A dictionary mapping keys to Numpy arrays, with several possible shapes (requires keys `FilteringFeatures.TIMES` and `FilteringFeatures.VALUES`): Single example; `TIMES` is a scalar and `VALUES` is either a scalar or a vector of length [number of features]. Sequence; `TIMES` is a vector of shape [series length], `VALUES` either has shape [series length] (univariate) or [series length x number of features] (multivariate). Batch of sequences; `TIMES` is a vector of shape [batch size x series length], `VALUES` has shape [batch size x series length] or [batch size x series length x number of features]. In any case, `VALUES` and any exogenous features must have their shapes prefixed by the shape of the value corresponding to the `TIMES` key. Returns: A dictionary containing model state updated to account for the observations in `features`. """ filter_signature = signatures.signature_def[ _feature_keys.SavedModelLabels.FILTER] features = _canonicalize_numpy_data(data=features, require_single_batch=False) output_tensors_by_name, feed_dict = _colate_features_to_feeds_and_fetches( continue_from=continue_from, signature=filter_signature, features=features, graph=session.graph) output = session.run(output_tensors_by_name, feed_dict=feed_dict) # Make it easier to chain filter -> predict by keeping track of the current # time. output[_feature_keys.FilteringResults.TIMES] = features[ _feature_keys.FilteringFeatures.TIMES] return output remove_undocumented(module_name=__name__) ================================================ FILE: tensorflow_estimator/python/estimator/canned/timeseries/state_management.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Classes for wrapping a model to operate on different data shapes.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator.canned.timeseries import feature_keys class PassthroughStateManager(object): """A minimal wrapper for models which do not need state management.""" def __init__(self): self._input_statistics = None self._graph_initialized = False def initialize_graph(self, model, input_statistics=None): """Adds required operations to the graph.""" del model # unused self._graph_initialized = True self._input_statistics = input_statistics def define_loss(self, model, features, mode): """Wrap "model" with StateManager-specific operations. Args: model: The model (inheriting from TimeSeriesModel) to manage state for. features: A dictionary with the following key/value pairs: feature_keys.TrainEvalFeatures.TIMES: A [batch size x window size] Tensor with times for each observation. feature_keys.TrainEvalFeatures.VALUES: A [batch size x window size x num features] Tensor with values for each observation. mode: The tf.estimator.ModeKeys mode to use (TRAIN or EVAL). Returns: A ModelOutputs object. Raises: ValueError: If start state was specified. """ if feature_keys.State.STATE_TUPLE in features: raise ValueError( "Overriding start state is not supported for this model.") return model.define_loss(features, mode) class _OverridableStateManager(PassthroughStateManager): """Base class for state managers which support overriding model state.""" @abc.abstractmethod def _define_loss_with_saved_state(self, model, features, mode): pass def define_loss(self, model, features, mode): """Switches between explicit start state and managed state.""" if feature_keys.FilteringFeatures.STATE_TUPLE in features: # Explicit start state has been provided, so we should use that. if mode == estimator_lib.ModeKeys.TRAIN: raise ValueError( "Overriding saved state for training is not supported (but a value " "for feature {} was specified).".format( feature_keys.FilteringFeatures.STATE_TUPLE)) start_state = features[feature_keys.FilteringFeatures.STATE_TUPLE] del features[feature_keys.FilteringFeatures.STATE_TUPLE] return model.get_batch_loss( features=features, mode=mode, state=start_state) else: # No explicit start state; use managed state. return self._define_loss_with_saved_state( model=model, features=features, mode=mode) class FilteringOnlyStateManager(_OverridableStateManager): """State manager for models which use state only for filtering. Window-based models (ARModel) do not require state to be fed during training (instead requiring a specific window size). Rather than requiring a minimum window size for filtering, these models maintain this window in their state, and so need state to be fed. """ def _define_loss_with_saved_state(self, model, features, mode): return model.define_loss(features, mode) ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/__init__.py ================================================ ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/baseline_estimator_test_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for BaselineEstimatorV1.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.canned import baseline from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io # Names of variables created by model. BIAS_NAME = 'baseline/bias' def assert_close(expected, actual, rtol=1e-04, name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs(expected - actual, 'diff') / tf.math.abs(expected) rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=('Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), name=scope) def save_variables_to_ckpt(model_dir): init_all_op = [tf.compat.v1.initializers.global_variables()] with tf.compat.v1.Session() as sess: sess.run(init_all_op) tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) def _baseline_estimator_fn(weight_column=None, label_dimension=1, **kwargs): return baseline.BaselineEstimator( head=head_lib._regression_head( weight_column=weight_column, label_dimension=label_dimension, loss_reduction=tf.compat.v1.losses.Reduction.SUM), **kwargs) @test_util.run_v1_only('Tests v1 only symbols') class BaselineEstimatorEvaluationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_evaluation_batch(self): """Tests evaluation for batch_size==2.""" with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir) eval_metrics = baseline_estimator.evaluate( input_fn=lambda: ({ 'age': ((1,), (1,)) }, ((10.,), (10.,))), steps=1) # Logit is bias = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the sum over batch = 9 + 9 = 18 # Average loss is the average over batch = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 18., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_weights(self): """Tests evaluation with weights.""" with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} labels = ((10.,), (10.,)) return features, labels baseline_estimator = _baseline_estimator_fn( weight_column='weights', model_dir=self._model_dir) eval_metrics = baseline_estimator.evaluate(input_fn=_input_fn, steps=1) # Logit is bias = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the weighted sum over batch = 9 + 2*9 = 27 # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 27., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_for_multi_dimensions(self): label_dim = 2 with tf.Graph().as_default(): tf.Variable([46.0, 58.0], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_estimator = _baseline_estimator_fn( label_dimension=label_dim, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={ 'age': np.array([[2., 4., 5.]]), }, y=np.array([[46., 58.]]), batch_size=1, num_epochs=None, shuffle=False) eval_metrics = baseline_estimator.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is bias which is [46, 58] self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) @test_util.run_v1_only('Tests v1 only symbols') class BaselineEstimatorPredictTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_1d(self): """Tests predict when all variables are one-dimensional.""" with tf.Graph().as_default(): tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[2.]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = baseline_estimator.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x * weight + bias = 2. * 10. + .2 = 20.2 self.assertAllClose([[.2]], predicted_scores) def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 with tf.Graph().as_default(): tf.Variable( # shape=[label_dimension] [.2, .4, .6], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_estimator = _baseline_estimator_fn( label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = baseline_estimator.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = bias, shape=[batch_size, label_dimension] self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]], predicted_scores) @test_util.run_v1_only('Tests v1 only symbols') class BaselineEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = _baseline_estimator_fn( label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) @test_util.run_v1_only('Tests v1 only symbols') class BaselineEstimatorTrainingTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _mock_optimizer(self, expected_loss=None): expected_var_names = ['%s:0' % BIAS_NAME] def _minimize(loss, global_step=None, var_list=None): trainable_vars = var_list or tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. self.assertEquals(0, loss.shape.ndims) if expected_loss is None: if global_step is not None: return tf.compat.v1.assign_add(global_step, 1).op return tf.no_op() assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): if global_step is not None: return tf.compat.v1.assign_add(global_step, 1).op return tf.no_op() mock_optimizer = tf.compat.v1.test.mock.NonCallableMock( spec=tf.compat.v1.train.Optimizer, wraps=tf.compat.v1.train.Optimizer( use_locking=False, name='my_optimizer')) mock_optimizer.minimize = tf.compat.v1.test.mock.MagicMock(wraps=_minimize) # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. # So, return mock_optimizer itself for deepcopy. mock_optimizer.__deepcopy__ = lambda _: mock_optimizer return mock_optimizer def _assert_checkpoint(self, label_dimension, expected_global_step, expected_bias=None): shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([label_dimension], shapes[BIAS_NAME]) if expected_bias is not None: self.assertEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def testFromScratch(self): # Create BaselineRegressor. label = 5. age = 17 # loss = (logits - label)^2 = (0 - 5.)^2 = 25. mock_optimizer = self._mock_optimizer(expected_loss=25.) baseline_estimator = _baseline_estimator_fn( model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_estimator.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( label_dimension=1, expected_global_step=num_steps, expected_bias=[0.]) def testFromCheckpoint(self): # Create initial checkpoint. bias = 7.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = bias = 6. # loss = (logits - label)^2 = (7 - 5)^2 = 4 mock_optimizer = self._mock_optimizer(expected_loss=4.) baseline_estimator = _baseline_estimator_fn( model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_estimator.train( input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( label_dimension=1, expected_global_step=initial_global_step + num_steps, expected_bias=[bias]) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/baseline_test_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for v1 version of baseline.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import os import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.canned import baseline from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False # pylint rules which are disabled by default for test files. # pylint: disable=invalid-name,protected-access,missing-docstring # Names of variables created by model. BIAS_NAME = 'baseline/bias' def assert_close(expected, actual, rtol=1e-04, name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs(expected - actual, 'diff') / tf.math.abs(expected) rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=('Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), name=scope) def save_variables_to_ckpt(model_dir): init_all_op = [tf.compat.v1.initializers.global_variables()] with tf.compat.v1.Session() as sess: sess.run(init_all_op) tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) def queue_parsed_features(feature_map): tensors_to_enqueue = [] keys = [] for key, tensor in six.iteritems(feature_map): keys.append(key) tensors_to_enqueue.append(tensor) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = tf.queue.FIFOQueue(capacity=100, dtypes=queue_dtypes) tf.compat.v1.train.queue_runner.add_queue_runner( tf.compat.v1.train.queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} def sorted_key_dict(unsorted_dict): return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} def sigmoid(x): return 1 / (1 + np.exp(-1.0 * x)) def _baseline_regressor_fn(*args, **kwargs): return baseline.BaselineRegressor(*args, **kwargs) def _baseline_classifier_fn(*args, **kwargs): return baseline.BaselineClassifier(*args, **kwargs) # Tests for Baseline Regressor. # TODO(b/36813849): Add tests with dynamic shape inputs using placeholders. @test_util.run_v1_only('Tests v1 only symbols') class BaselineRegressorEvaluationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_evaluation_for_simple_data(self): with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) eval_metrics = baseline_regressor.evaluate( input_fn=lambda: ({ 'age': ((1,),) }, ((10.,),)), steps=1) # Logit is bias = 13, while label is 10. Loss is 3**2 = 9. self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 9., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_batch(self): """Tests evaluation for batch_size==2.""" with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) eval_metrics = baseline_regressor.evaluate( input_fn=lambda: ({ 'age': ((1,), (1,)) }, ((10.,), (10.,))), steps=1) # Logit is bias = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the sum over batch = 9 + 9 = 18 # Average loss is the average over batch = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 18., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_weights(self): """Tests evaluation with weights.""" with tf.Graph().as_default(): tf.Variable([13.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} labels = ((10.,), (10.,)) return features, labels baseline_regressor = _baseline_regressor_fn( weight_column='weights', model_dir=self._model_dir) eval_metrics = baseline_regressor.evaluate(input_fn=_input_fn, steps=1) # Logit is bias = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the weighted sum over batch = 9 + 2*9 = 27 # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 27., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_for_multi_dimensions(self): label_dim = 2 with tf.Graph().as_default(): tf.Variable([46.0, 58.0], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn( label_dimension=label_dim, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={ 'age': np.array([[2., 4., 5.]]), }, y=np.array([[46., 58.]]), batch_size=1, num_epochs=None, shuffle=False) eval_metrics = baseline_regressor.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is bias which is [46, 58] self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) @test_util.run_v1_only('Tests v1 only symbols') class BaselineRegressorPredictTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_1d(self): """Tests predict when all variables are one-dimensional.""" with tf.Graph().as_default(): tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[2.]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = baseline_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x * weight + bias = 2. * 10. + .2 = 20.2 self.assertAllClose([[.2]], predicted_scores) def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 with tf.Graph().as_default(): tf.Variable( # shape=[label_dimension] [.2, .4, .6], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) baseline_regressor = _baseline_regressor_fn( label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = baseline_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = bias, shape=[batch_size, label_dimension] self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]], predicted_scores) @test_util.run_v1_only('Tests v1 only symbols') class BaselineRegressorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = _baseline_regressor_fn( label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return # Pandas DataFrame natually supports 1 dim data only. label_dimension = 1 input_dimension = label_dimension batch_size = 10 data = np.array([1., 2., 3., 4.], dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) prediction_length = 4 train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList( value=datum[:label_dimension])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) @test_util.run_v1_only('Tests v1 only symbols') class BaselineRegressorTrainingTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _mock_optimizer(self, expected_loss=None): expected_var_names = ['%s:0' % BIAS_NAME] def _minimize(loss, global_step=None, var_list=None): trainable_vars = var_list or tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. self.assertEquals(0, loss.shape.ndims) if expected_loss is None: if global_step is not None: return tf.compat.v1.assign_add(global_step, 1).op return tf.no_op() assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): if global_step is not None: return tf.compat.v1.assign_add(global_step, 1).op return tf.no_op() mock_optimizer = tf.compat.v1.test.mock.NonCallableMock( spec=tf.compat.v1.train.Optimizer, wraps=tf.compat.v1.train.Optimizer( use_locking=False, name='my_optimizer')) mock_optimizer.minimize = tf.compat.v1.test.mock.MagicMock(wraps=_minimize) # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. # So, return mock_optimizer itself for deepcopy. mock_optimizer.__deepcopy__ = lambda _: mock_optimizer return mock_optimizer def _assert_checkpoint(self, label_dimension, expected_global_step, expected_bias=None): shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([label_dimension], shapes[BIAS_NAME]) if expected_bias is not None: self.assertEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def testFromScratchWithDefaultOptimizer(self): # Create BaselineRegressor. label = 5. age = 17 baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 baseline_regressor.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self._assert_checkpoint(label_dimension=1, expected_global_step=num_steps) def testTrainWithOneDimLabel(self): label_dimension = 1 batch_size = 20 est = _baseline_regressor_fn( label_dimension=label_dimension, model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(label_dimension=1, expected_global_step=200) def testTrainWithOneDimWeight(self): label_dimension = 1 batch_size = 20 est = _baseline_regressor_fn( label_dimension=label_dimension, weight_column='w', model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_1 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(label_dimension=1, expected_global_step=200) def testFromScratch(self): # Create BaselineRegressor. label = 5. age = 17 # loss = (logits - label)^2 = (0 - 5.)^2 = 25. mock_optimizer = self._mock_optimizer(expected_loss=25.) baseline_regressor = _baseline_regressor_fn( model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_regressor.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( label_dimension=1, expected_global_step=num_steps, expected_bias=[0.]) def testFromCheckpoint(self): # Create initial checkpoint. bias = 7.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = bias = 6. # loss = (logits - label)^2 = (7 - 5)^2 = 4 mock_optimizer = self._mock_optimizer(expected_loss=4.) baseline_regressor = _baseline_regressor_fn( model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_regressor.train( input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( label_dimension=1, expected_global_step=initial_global_step + num_steps, expected_bias=[bias]) def testFromCheckpointMultiBatch(self): # Create initial checkpoint. bias = 5.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = bias # logits[0] = 5. # logits[1] = 5. # loss = sum(logits - label)^2 = (5 - 5)^2 + (5 - 3)^2 = 4 mock_optimizer = self._mock_optimizer(expected_loss=4.) baseline_regressor = _baseline_regressor_fn( model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 baseline_regressor.train( input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( label_dimension=1, expected_global_step=initial_global_step + num_steps, expected_bias=bias) # Tests for Baseline Classifier. @test_util.run_v1_only('Tests v1 only symbols') class BaselineClassifierTrainingTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _mock_optimizer(self, expected_loss=None): expected_var_names = ['%s:0' % BIAS_NAME] def _minimize(loss, global_step): trainable_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. self.assertEquals(0, loss.shape.ndims) if expected_loss is None: return tf.compat.v1.assign_add(global_step, 1).op assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): return tf.compat.v1.assign_add(global_step, 1).op mock_optimizer = tf.compat.v1.test.mock.NonCallableMock( spec=tf.compat.v1.train.Optimizer, wraps=tf.compat.v1.train.Optimizer( use_locking=False, name='my_optimizer')) mock_optimizer.minimize = tf.compat.v1.test.mock.MagicMock(wraps=_minimize) # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. # So, return mock_optimizer itself for deepcopy. mock_optimizer.__deepcopy__ = lambda _: mock_optimizer return mock_optimizer def _assert_checkpoint(self, n_classes, expected_global_step, expected_bias=None): logits_dimension = n_classes if n_classes > 2 else 1 shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([logits_dimension], shapes[BIAS_NAME]) if expected_bias is not None: self.assertAllEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def _testFromScratchWithDefaultOptimizer(self, n_classes): label = 0 age = 17 est = baseline.BaselineClassifier( n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self._assert_checkpoint(n_classes, num_steps) def testBinaryClassesFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=2) def testMultiClassesFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=4) def _testTrainWithTwoDimsLabel(self, n_classes): batch_size = 20 est = baseline.BaselineClassifier( n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) data_rank_2 = np.array([[0], [1]]) self.assertEqual((2,), data_rank_1.shape) self.assertEqual((2, 1), data_rank_2.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_2, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithTwoDimsLabel(self): self._testTrainWithTwoDimsLabel(n_classes=2) def testMultiClassesTrainWithTwoDimsLabel(self): self._testTrainWithTwoDimsLabel(n_classes=4) def _testTrainWithOneDimLabel(self, n_classes): batch_size = 20 est = baseline.BaselineClassifier( n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) self.assertEqual((2,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithOneDimLabel(self): self._testTrainWithOneDimLabel(n_classes=2) def testMultiClassesTrainWithOneDimLabel(self): self._testTrainWithOneDimLabel(n_classes=4) def _testTrainWithTwoDimsWeight(self, n_classes): batch_size = 20 est = baseline.BaselineClassifier( weight_column='w', n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) data_rank_2 = np.array([[0], [1]]) self.assertEqual((2,), data_rank_1.shape) self.assertEqual((2, 1), data_rank_2.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_2 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithTwoDimsWeight(self): self._testTrainWithTwoDimsWeight(n_classes=2) def testMultiClassesTrainWithTwoDimsWeight(self): self._testTrainWithTwoDimsWeight(n_classes=4) def _testTrainWithOneDimWeight(self, n_classes): batch_size = 20 est = baseline.BaselineClassifier( weight_column='w', n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) self.assertEqual((2,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_1 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithOneDimWeight(self): self._testTrainWithOneDimWeight(n_classes=2) def testMultiClassesTrainWithOneDimWeight(self): self._testTrainWithOneDimWeight(n_classes=4) def _testFromScratch(self, n_classes): label = 1 age = 17 # For binary classifier: # loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are # all zero initially) and label = 1 so, # loss = 1 * -log ( sigmoid(logits) ) = 0.69315 # For multi class classifier: # loss = cross_entropy(logits, label) where logits are all 0s (weights are # all zero initially) and label = 1 so, # loss = 1 * -log ( 1.0 / n_classes ) # For this particular test case, as logits are same, the formula # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases. mock_optimizer = self._mock_optimizer( expected_loss=(-1 * math.log(1.0 / n_classes))) est = baseline.BaselineClassifier( n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( n_classes, expected_global_step=num_steps, expected_bias=[0.] if n_classes == 2 else [.0] * n_classes) def testBinaryClassesFromScratch(self): self._testFromScratch(n_classes=2) def testMultiClassesFromScratch(self): self._testFromScratch(n_classes=4) def _testFromCheckpoint(self, n_classes): # Create initial checkpoint. label = 1 age = 17 bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # For binary classifier: # logits = bias = -1. # loss = sigmoid_cross_entropy(logits, label) # so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133 # For multi class classifier: # loss = cross_entropy(logits, label) # where logits = bias and label = 1 # so, loss = 1 * -log ( softmax(logits)[1] ) if n_classes == 2: expected_loss = 1.3133 else: logits = bias logits_exp = np.exp(logits) softmax = logits_exp / logits_exp.sum() expected_loss = -1 * math.log(softmax[label]) mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) est = baseline.BaselineClassifier( n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( n_classes, expected_global_step=initial_global_step + num_steps, expected_bias=bias) def testBinaryClassesFromCheckpoint(self): self._testFromCheckpoint(n_classes=2) def testMultiClassesFromCheckpoint(self): self._testFromCheckpoint(n_classes=4) def _testFromCheckpointFloatLabels(self, n_classes): """Tests float labels for binary classification.""" # Create initial checkpoint. if n_classes > 2: return label = 0.8 age = 17 bias = [-1.0] initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = bias = -1. # loss = sigmoid_cross_entropy(logits, label) # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617 mock_optimizer = self._mock_optimizer(expected_loss=1.1132617) est = baseline.BaselineClassifier( n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) def testBinaryClassesFromCheckpointFloatLabels(self): self._testFromCheckpointFloatLabels(n_classes=2) def testMultiClassesFromCheckpointFloatLabels(self): self._testFromCheckpointFloatLabels(n_classes=4) def _testFromCheckpointMultiBatch(self, n_classes): # Create initial checkpoint. label = [1, 0] age = [17, 18.5] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # For binary classifier: # logits = bias # logits[0] = -1. # logits[1] = -1. # loss = sigmoid_cross_entropy(logits, label) # so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133 # loss[1] = (1 - 0) * -log ( 1- sigmoid(-1) ) = 0.3132 # For multi class classifier: # loss = cross_entropy(logits, label) # where logits = bias and label = [1, 0] # so, loss = 1 * -log ( softmax(logits)[label] ) if n_classes == 2: expected_loss = (1.3133 + 0.3132) else: # Expand logits since batch_size=2 logits = bias * np.ones(shape=(2, 1)) logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) expected_loss = expected_loss_0 + expected_loss_1 mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) est = baseline.BaselineClassifier( n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train(input_fn=lambda: ({'age': (age)}, (label)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( n_classes, expected_global_step=initial_global_step + num_steps, expected_bias=bias) def testBinaryClassesFromCheckpointMultiBatch(self): self._testFromCheckpointMultiBatch(n_classes=2) def testMultiClassesFromCheckpointMultiBatch(self): self._testFromCheckpointMultiBatch(n_classes=4) @test_util.run_v1_only('Tests v1 only symbols') class BaselineClassifierEvaluationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _test_evaluation_for_simple_data(self, n_classes): label = 1 age = 1. bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = _baseline_classifier_fn( n_classes=n_classes, model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1) if n_classes == 2: # Binary classes: loss = -log(sigmoid(-1)) / batch size = 1.3133 # Prediction = sigmoid(-1) = 0.2689 expected_metrics = { metric_keys.MetricKeys.LOSS: 1.3133, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: 1.3133, metric_keys.MetricKeys.ACCURACY: 0., metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, metric_keys.MetricKeys.AUC: 0., metric_keys.MetricKeys.AUC_PR: 1., } else: # Multi classes: loss = 1 * -log ( softmax(logits)[label] ) logits = bias logits_exp = np.exp(logits) softmax = logits_exp / logits_exp.sum() expected_loss = -1 * math.log(softmax[label]) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.ACCURACY: 0., } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_for_simple_data(self): self._test_evaluation_for_simple_data(n_classes=2) def test_multi_classes_evaluation_for_simple_data(self): self._test_evaluation_for_simple_data(n_classes=4) def _test_evaluation_batch(self, n_classes): """Tests evaluation for batch_size==2.""" label = [1, 0] age = [17., 18.] bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = _baseline_classifier_fn( n_classes=n_classes, model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({'age': (age)}, (label)), steps=1) if n_classes == 2: # Logits are (-1., -1.) labels are (1, 0). # Loss is # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 # Prediction = sigmoid(-1) = 0.2689 expected_loss = 1.3133 + 0.3132 expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, metric_keys.MetricKeys.ACCURACY: 0.5, metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 0.75, } else: # Expand logits since batch_size=2 logits = bias * np.ones(shape=(2, 1)) logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) expected_loss = expected_loss_0 + expected_loss_1 expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, metric_keys.MetricKeys.ACCURACY: 0.5, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_batch(self): self._test_evaluation_batch(n_classes=2) def test_multi_classes_evaluation_batch(self): self._test_evaluation_batch(n_classes=4) def _test_evaluation_weights(self, n_classes): """Tests evaluation with weights.""" label = [1, 0] age = [17., 18.] weights = [1., 2.] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = _baseline_classifier_fn( n_classes=n_classes, weight_column='w', model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1) if n_classes == 2: # Logits are (-1., -1.) labels are (1, 0). # Loss is # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 # loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132 # weights = [1., 2.] expected_loss = 1.3133 * 1. + 0.3132 * 2. loss_mean = expected_loss / (1.0 + 2.0) label_mean = np.average(label, weights=weights) logits = [-1, -1] logistics = sigmoid(np.array(logits)) predictions_mean = np.average(logistics, weights=weights) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, metric_keys.MetricKeys.LABEL_MEAN: label_mean, metric_keys.MetricKeys.ACCURACY_BASELINE: (max(label_mean, 1 - label_mean)), metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 2. / (1. + 2.), } else: # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) # Expand logits since batch_size=2 logits = bias * np.ones(shape=(2, 1)) logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) loss_mean = np.average([expected_loss_0, expected_loss_1], weights=weights) expected_loss = loss_mean * np.sum(weights) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_weights(self): self._test_evaluation_weights(n_classes=2) def test_multi_classes_evaluation_weights(self): self._test_evaluation_weights(n_classes=4) @test_util.run_v1_only('Tests v1 only symbols') class BaselineClassifierPredictTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _testPredictions(self, n_classes, label_vocabulary, label_output_fn): """Tests predict when all variables are one-dimensional.""" age = 1. bias = [10.0] if n_classes == 2 else [10.0] * n_classes with tf.Graph().as_default(): tf.Variable(bias, name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = _baseline_classifier_fn( label_vocabulary=label_vocabulary, n_classes=n_classes, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'age': np.array([[age]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = list(est.predict(input_fn=predict_input_fn)) if n_classes == 2: scalar_logits = bias[0] two_classes_logits = [0, scalar_logits] two_classes_logits_exp = np.exp(two_classes_logits) softmax = two_classes_logits_exp / two_classes_logits_exp.sum() expected_predictions = { 'class_ids': [1], 'all_class_ids': [0, 1], 'classes': [label_output_fn(1)], 'all_classes': [label_output_fn(0), label_output_fn(1)], 'logistic': [sigmoid(np.array(scalar_logits))], 'logits': [scalar_logits], 'probabilities': softmax, } else: onedim_logits = np.array(bias) class_ids = onedim_logits.argmax() all_class_ids = list(range(len(onedim_logits))) logits_exp = np.exp(onedim_logits) softmax = logits_exp / logits_exp.sum() expected_predictions = { 'class_ids': [class_ids], 'all_class_ids': all_class_ids, 'classes': [label_output_fn(class_ids)], 'all_classes': [label_output_fn(i) for i in all_class_ids], 'logits': onedim_logits, 'probabilities': softmax, } self.assertEqual(1, len(predictions)) # assertAllClose cannot handle byte type. self.assertEqual(expected_predictions['classes'], predictions[0]['classes']) expected_predictions.pop('classes') predictions[0].pop('classes') self.assertAllEqual(expected_predictions['all_classes'], predictions[0]['all_classes']) expected_predictions.pop('all_classes') predictions[0].pop('all_classes') self.assertAllClose( sorted_key_dict(expected_predictions), sorted_key_dict(predictions[0])) def testBinaryClassesWithoutLabelVocabulary(self): n_classes = 2 self._testPredictions( n_classes, label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def testBinaryClassesWithLabelVocabulary(self): n_classes = 2 self._testPredictions( n_classes, label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) def testMultiClassesWithoutLabelVocabulary(self): n_classes = 4 self._testPredictions( n_classes, label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def testMultiClassesWithLabelVocabulary(self): n_classes = 4 self._testPredictions( n_classes, label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) @test_util.run_v1_only('Tests v1 only symbols') class BaselineClassifierIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, prediction_length): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = _baseline_classifier_fn( n_classes=n_classes, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['classes'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, 1), predictions.shape) # EXPORT feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _test_numpy_input_fn(self, n_classes): """Tests complete flow with numpy_input_fn.""" input_dimension = 4 batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) target = np.array([1] * batch_size) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=target, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=target, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( n_classes=n_classes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_numpy_input_fn(self): self._test_numpy_input_fn(n_classes=2) def test_multi_classes_numpy_input_fn(self): self._test_numpy_input_fn(n_classes=4) def _test_pandas_input_fn(self, n_classes): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return # Pandas DataFrame natually supports 1 dim data only. input_dimension = 1 batch_size = 10 data = np.array([1., 2., 3., 4.], dtype=np.float32) target = np.array([1, 0, 1, 0], dtype=np.int32) x = pd.DataFrame({'x': data}) y = pd.Series(target) prediction_length = 4 train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( n_classes=n_classes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_pandas_input_fn(self): self._test_pandas_input_fn(n_classes=2) def test_multi_classes_pandas_input_fn(self): self._test_pandas_input_fn(n_classes=4) def _test_input_fn_from_parse_example(self, n_classes): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) target = np.array([1] * batch_size, dtype=np.int64) serialized_examples = [] for x, y in zip(data, target): example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=x)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[y])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( n_classes=n_classes, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_input_fn_from_parse_example(self): self._test_input_fn_from_parse_example(n_classes=2) def test_multi_classes_input_fn_from_parse_example(self): self._test_input_fn_from_parse_example(n_classes=4) # Tests for Baseline logit_fn. @test_util.run_v1_only('Tests v1 only symbols') class BaselineLogitFnTest(tf.test.TestCase): def test_basic_logit_correctness(self): """baseline_logit_fn simply returns the bias variable.""" with tf.Graph().as_default(): logit_fn = baseline._baseline_logit_fn_builder(num_outputs=2) logits = logit_fn(features={'age': [[23.], [31.]]}) with tf.compat.v1.variable_scope('baseline', reuse=True): bias_var = tf.compat.v1.get_variable('bias') with tf.compat.v1.Session() as sess: sess.run([tf.compat.v1.initializers.global_variables()]) self.assertAllClose([[0., 0.], [0., 0.]], logits.eval()) sess.run(bias_var.assign([10., 5.])) self.assertAllClose([[10., 5.], [10., 5.]], logits.eval()) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/dnn_estimator_test_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for DNNEstimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.canned.v1 import dnn_testing_utils_v1 from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io def _dnn_estimator_fn(weight_column=None, label_dimension=1, **kwargs): """Returns a DNNEstimator that uses regression_head.""" return dnn.DNNEstimator( head=head_lib._regression_head( weight_column=weight_column, label_dimension=label_dimension, # Tests in core (from which this test inherits) test the sum loss. loss_reduction=tf.compat.v1.losses.Reduction.SUM), **kwargs) def _dnn_estimator_classifier_fn(n_classes=3, **kwargs): return dnn.DNNEstimator( head=head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=n_classes), **kwargs) @test_util.run_v1_only('Tests v1 only symbols') class DNNEstimatorEvaluateTest( dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class DNNEstimatorPredictTest(dnn_testing_utils_v1.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorPredictTest.__init__( self, _dnn_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class DNNEstimatorTrainTest(dnn_testing_utils_v1.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorTrainTest.__init__( self, _dnn_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class DNNEstimatorWarmStartingTest(dnn_testing_utils_v1.BaseDNNWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNWarmStartingTest.__init__( self, _dnn_estimator_classifier_fn, _dnn_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class DNNEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNEstimator( head=head_lib._regression_head(label_dimension=label_dimension), hidden_units=(2, 2), feature_columns=feature_columns, model_dir=self._model_dir) # Train num_steps = 10 est.train(train_input_fn, steps=num_steps) # Evaluate scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # Predict predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # Export feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/dnn_linear_combined_estimator_test_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for DNNLinearCombinedEstimatorV1.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.canned import dnn_linear_combined from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.canned.v1 import dnn_testing_utils_v1 from tensorflow_estimator.python.estimator.canned.v1 import linear_testing_utils_v1 from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io def _dnn_only_estimator_fn(hidden_units, feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=None, config=None): return dnn_linear_combined.DNNLinearCombinedEstimator( head=head_lib._regression_head( weight_column=weight_column, label_dimension=label_dimension, # Tests in core (from which this test inherits) test the sum loss. loss_reduction=tf.compat.v1.losses.Reduction.SUM), model_dir=model_dir, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, dnn_hidden_units=hidden_units, dnn_activation_fn=activation_fn, dnn_dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyEstimatorEvaluateTest( dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_only_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyEstimatorPredictTest( dnn_testing_utils_v1.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorPredictTest.__init__( self, _dnn_only_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyEstimatorTrainTest(dnn_testing_utils_v1.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorTrainTest.__init__( self, _dnn_only_estimator_fn) def _linear_only_estimator_fn(feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Ftrl', config=None, partitioner=None, sparse_combiner='sum'): return dnn_linear_combined.DNNLinearCombinedEstimator( head=head_lib._regression_head( weight_column=weight_column, label_dimension=label_dimension, # Tests in core (from which this test inherits) test the sum loss. loss_reduction=tf.compat.v1.losses.Reduction.SUM), model_dir=model_dir, linear_feature_columns=feature_columns, linear_optimizer=optimizer, input_layer_partitioner=partitioner, config=config, linear_sparse_combiner=sparse_combiner) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyEstimatorEvaluateTest( linear_testing_utils_v1.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorEvaluationTest.__init__( self, _linear_only_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyEstimatorPredictTest( linear_testing_utils_v1.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPredictTest.__init__( self, _linear_only_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyEstimatorTrainTest( linear_testing_utils_v1.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorTrainingTest.__init__( self, _linear_only_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class DNNLinearCombinedEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns est = dnn_linear_combined.DNNLinearCombinedEstimator( head=head_lib._regression_head(label_dimension=label_dimension), linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, dnn_hidden_units=(2, 2), model_dir=self._model_dir) # Train num_steps = 10 est.train(train_input_fn, steps=num_steps) # Evaluate scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # Predict predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # Export feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/dnn_linear_combined_test_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for v1 version of dnn_linear_combined.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile from absl.testing import parameterized import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.feature_column import feature_column from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.canned import dnn_linear_combined from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.canned.v1 import dnn_testing_utils_v1 from tensorflow_estimator.python.estimator.canned.v1 import linear_testing_utils_v1 from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False # This is so that we can easily switch between feature_column and # feature_column_v2 for testing. # Note that following V2 version of tests are for feature_column_v2, not the v2 # version of canned estimator. feature_column.numeric_column = feature_column._numeric_column feature_column.categorical_column_with_hash_bucket = feature_column._categorical_column_with_hash_bucket # pylint: disable=line-too-long feature_column.categorical_column_with_vocabulary_list = feature_column._categorical_column_with_vocabulary_list # pylint: disable=line-too-long feature_column.categorical_column_with_vocabulary_file = feature_column._categorical_column_with_vocabulary_file # pylint: disable=line-too-long feature_column.embedding_column = feature_column._embedding_column @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyModelFnTest(dnn_testing_utils_v1.BaseDNNModelFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNModelFnTest.__init__(self, self._dnn_only_model_fn) def _dnn_only_model_fn(self, features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=None, config=None): return dnn_linear_combined._dnn_linear_combined_model_fn( features=features, labels=labels, mode=mode, head=head, linear_feature_columns=[], dnn_hidden_units=hidden_units, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, dnn_activation_fn=activation_fn, dnn_dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config) # A function to mimic linear-regressor init reuse same tests. def _linear_regressor_fn(feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Ftrl', config=None, partitioner=None, sparse_combiner='sum'): return dnn_linear_combined.DNNLinearCombinedRegressor( model_dir=model_dir, linear_feature_columns=feature_columns, linear_optimizer=optimizer, label_dimension=label_dimension, weight_column=weight_column, input_layer_partitioner=partitioner, config=config, linear_sparse_combiner=sparse_combiner) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorPartitionerTest( linear_testing_utils_v1.BaseLinearRegressorPartitionerTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPartitionerTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorPartitionerV2Test( linear_testing_utils_v1.BaseLinearRegressorPartitionerTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPartitionerTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorEvaluationTest( linear_testing_utils_v1.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorEvaluationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorEvaluationV2Test( linear_testing_utils_v1.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorEvaluationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorPredictTest( linear_testing_utils_v1.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPredictTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorPredictV2Test( linear_testing_utils_v1.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPredictTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorIntegrationTest( linear_testing_utils_v1.BaseLinearRegressorIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorIntegrationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorIntegrationV2Test( linear_testing_utils_v1.BaseLinearRegressorIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorIntegrationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorTrainingTest( linear_testing_utils_v1.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorTrainingTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyRegressorTrainingV2Test( linear_testing_utils_v1.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorTrainingTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) def _linear_classifier_fn(feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Ftrl', config=None, partitioner=None, sparse_combiner='sum'): return dnn_linear_combined.DNNLinearCombinedClassifier( model_dir=model_dir, linear_feature_columns=feature_columns, linear_optimizer=optimizer, n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, input_layer_partitioner=partitioner, config=config, linear_sparse_combiner=sparse_combiner) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyClassifierTrainingTest( linear_testing_utils_v1.BaseLinearClassifierTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierTrainingTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyClassifierTrainingV2Test( linear_testing_utils_v1.BaseLinearClassifierTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierTrainingTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyClassifierClassesEvaluationTest( linear_testing_utils_v1.BaseLinearClassifierEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierEvaluationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyClassifierClassesEvaluationV2Test( linear_testing_utils_v1.BaseLinearClassifierEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierEvaluationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyClassifierPredictTest( linear_testing_utils_v1.BaseLinearClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierPredictTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyClassifierPredictV2Test( linear_testing_utils_v1.BaseLinearClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierPredictTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyClassifierIntegrationTest( linear_testing_utils_v1.BaseLinearClassifierIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierIntegrationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearOnlyClassifierIntegrationV2Test( linear_testing_utils_v1.BaseLinearClassifierIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierIntegrationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') @parameterized.parameters((feature_column,), (feature_column_v2,)) class DNNLinearCombinedRegressorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow_helper(self, linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): est = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): linear_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size) def _test_complete_flow_mix1(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): del fc_impl linear_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size) def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): del fc_impl linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size) def _test_numpy_input_fn_helper(self, fc_impl, fn_to_run): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) fn_to_run( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, fc_impl=fc_impl) def test_numpy_input_fn_basic(self, fc_impl): self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow) def test_numpy_input_fn_mix1(self, fc_impl): self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix1) def test_numpy_input_fn_mix2(self, fc_impl): self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix2) def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return label_dimension = 1 batch_size = 10 data = np.linspace(0., 2., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) fn_to_run( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, fc_impl=fc_impl) def test_pandas_input_fn_basic(self, fc_impl): self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow) def test_pandas_input_fn_mix1(self, fc_impl): self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix1) def test_pandas_input_fn_mix2(self, fc_impl): self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix2) def _test_input_fn_from_parse_example_helper(self, fc_impl, fn_to_run): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = linear_testing_utils_v1.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils_v1.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils_v1.queue_parsed_features(feature_map) features.pop('y') return features, None fn_to_run( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, fc_impl=fc_impl) def test_input_fn_from_parse_example_basic(self, fc_impl): self._test_input_fn_from_parse_example_helper(fc_impl, self._test_complete_flow) def test_input_fn_from_parse_example_mix1(self, fc_impl): self._test_input_fn_from_parse_example_helper(fc_impl, self._test_complete_flow_mix1) def test_input_fn_from_parse_example_mix2(self, fc_impl): self._test_input_fn_from_parse_example_helper(fc_impl, self._test_complete_flow_mix2) # A function to mimic dnn-classifier init reuse same tests. def _dnn_classifier_fn(hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', config=None, input_layer_partitioner=None): return dnn_linear_combined.DNNLinearCombinedClassifier( model_dir=model_dir, dnn_hidden_units=hidden_units, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, n_classes=n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, input_layer_partitioner=input_layer_partitioner, config=config) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyClassifierEvaluateTest( dnn_testing_utils_v1.BaseDNNClassifierEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierEvaluateTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyClassifierEvaluateV2Test( dnn_testing_utils_v1.BaseDNNClassifierEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierEvaluateTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyClassifierPredictTest( dnn_testing_utils_v1.BaseDNNClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierPredictTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyClassifierPredictV2Test( dnn_testing_utils_v1.BaseDNNClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierPredictTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyClassifierTrainTest( dnn_testing_utils_v1.BaseDNNClassifierTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierTrainTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyClassifierTrainV2Test( dnn_testing_utils_v1.BaseDNNClassifierTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierTrainTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) # A function to mimic dnn-regressor init reuse same tests. def _dnn_regressor_fn(hidden_units, feature_columns, model_dir=None, label_dimension=1, weight_column=None, optimizer='Adagrad', config=None, input_layer_partitioner=None): return dnn_linear_combined.DNNLinearCombinedRegressor( model_dir=model_dir, dnn_hidden_units=hidden_units, dnn_feature_columns=feature_columns, dnn_optimizer=optimizer, label_dimension=label_dimension, weight_column=weight_column, input_layer_partitioner=input_layer_partitioner, config=config) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyRegressorEvaluateTest( dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyRegressorEvaluateV2Test( dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyRegressorPredictTest( dnn_testing_utils_v1.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorPredictTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyRegressorPredictV2Test( dnn_testing_utils_v1.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorPredictTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyRegressorTrainTest(dnn_testing_utils_v1.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorTrainTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNOnlyRegressorTrainV2Test( dnn_testing_utils_v1.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorTrainTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') @parameterized.parameters((feature_column,), (feature_column_v2,)) class DNNLinearCombinedClassifierIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _as_label(self, data_in_float): return np.rint(data_in_float).astype(np.int64) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size, fc_impl): linear_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ fc_impl.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns est = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self, fc_impl): """Tests complete flow with numpy_input_fn.""" n_classes = 3 input_dimension = 2 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) y_data = self._as_label(np.reshape(data[:batch_size], (batch_size, 1))) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size, fc_impl=fc_impl) def test_pandas_input_fn(self, fc_impl): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return input_dimension = 1 n_classes = 2 batch_size = 10 data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(self._as_label(data)) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size, fc_impl=fc_impl) def test_input_fn_from_parse_example(self, fc_impl): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 n_classes = 3 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List( value=self._as_label(datum[:1]))), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = linear_testing_utils_v1.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils_v1.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils_v1.queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size, fc_impl=fc_impl) @test_util.run_v1_only('Tests v1 only symbols') @parameterized.parameters((feature_column,), (feature_column_v2,)) class DNNLinearCombinedTests(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _mock_optimizer(self, real_optimizer, var_name_prefix): """Verifies global_step is None and var_names start with given prefix.""" def _minimize(loss, global_step=None, var_list=None): self.assertIsNone(global_step) trainable_vars = var_list or tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) var_names = [var.name for var in trainable_vars] self.assertTrue( all([name.startswith(var_name_prefix) for name in var_names])) # var is used to check this op called by training. with ops.name_scope(''): var = tf.Variable(0., name=(var_name_prefix + '_called')) with tf.control_dependencies([var.assign(100.)]): return real_optimizer.minimize(loss, global_step, var_list) optimizer_mock = tf.compat.v1.test.mock.NonCallableMagicMock( spec=tf.compat.v1.train.Optimizer, wraps=real_optimizer) optimizer_mock.minimize = tf.compat.v1.test.mock.MagicMock(wraps=_minimize) return optimizer_mock def test_train_op_calls_both_dnn_and_linear(self, fc_impl): opt = tf.compat.v1.train.GradientDescentOptimizer(1.) x_column = fc_impl.numeric_column('x') input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[0.], [1.]])}, y=np.array([[0.], [1.]]), batch_size=1, shuffle=False) est = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[x_column], # verifies linear_optimizer is used only for linear part. linear_optimizer=self._mock_optimizer(opt, 'linear'), dnn_hidden_units=(2, 2), dnn_feature_columns=[x_column], # verifies dnn_optimizer is used only for linear part. dnn_optimizer=self._mock_optimizer(opt, 'dnn'), model_dir=self._model_dir) est.train(input_fn, steps=1) # verifies train_op fires linear minimize op self.assertEqual(100., tf.train.load_variable(self._model_dir, 'linear_called')) # verifies train_op fires dnn minimize op self.assertEqual(100., tf.train.load_variable(self._model_dir, 'dnn_called')) def test_dnn_and_linear_logits_are_added(self, fc_impl): with tf.Graph().as_default(): tf.Variable([[1.0]], name='linear/linear_model/x/weights') tf.Variable([2.0], name='linear/linear_model/bias_weights') tf.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel') tf.Variable([4.0], name='dnn/hiddenlayer_0/bias') tf.Variable([[5.0]], name='dnn/logits/kernel') tf.Variable([6.0], name='dnn/logits/bias') tf.Variable(1, name='global_step', dtype=tf.dtypes.int64) linear_testing_utils_v1.save_variables_to_ckpt(self._model_dir) x_column = fc_impl.numeric_column('x') est = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=[x_column], dnn_hidden_units=[1], dnn_feature_columns=[x_column], model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # linear logits = 10*1 + 2 = 12 # dnn logits = (10*3 + 4)*5 + 6 = 176 # logits = dnn + linear = 176 + 12 = 188 self.assertAllClose({ prediction_keys.PredictionKeys.PREDICTIONS: [188.], }, next(est.predict(input_fn=input_fn))) @test_util.run_v1_only('Tests v1 only symbols') @parameterized.parameters((feature_column,), (feature_column_v2,)) class DNNLinearCombinedWarmStartingTest(tf.test.TestCase): def setUp(self): # Create a directory to save our old checkpoint and vocabularies to. self._ckpt_and_vocab_dir = tempfile.mkdtemp() # Make a dummy input_fn. def _input_fn(): features = { 'age': [[23.], [31.]], 'city': [['Palo Alto'], ['Mountain View']], } return features, [0, 1] self._input_fn = _input_fn def tearDown(self): # Clean up checkpoint / vocab dir. tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._ckpt_and_vocab_dir) def test_classifier_basic_warm_starting(self, fc_impl): """Tests correctness of DNNLinearCombinedClassifier default warm-start.""" age = fc_impl.numeric_column('age') city = fc_impl.embedding_column( fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedClassifier and train to save a checkpoint. dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, n_classes=4, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedClassifier, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_classifier = ( dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], n_classes=4, linear_optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), dnn_optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=dnn_lc_classifier.model_dir)) warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_classifier.get_variable_names(): self.assertAllClose( dnn_lc_classifier.get_variable_value(variable_name), warm_started_dnn_lc_classifier.get_variable_value(variable_name)) def test_regressor_basic_warm_starting(self, fc_impl): """Tests correctness of DNNLinearCombinedRegressor default warm-start.""" age = fc_impl.numeric_column('age') city = fc_impl.embedding_column( fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedRegressor and train to save a checkpoint. dnn_lc_regressor = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedRegressor, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_regressor = ( dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], linear_optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), dnn_optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=dnn_lc_regressor.model_dir)) warm_started_dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_regressor.get_variable_names(): self.assertAllClose( dnn_lc_regressor.get_variable_value(variable_name), warm_started_dnn_lc_regressor.get_variable_value(variable_name)) def test_warm_starting_selective_variables(self, fc_impl): """Tests selecting variables to warm-start.""" age = fc_impl.numeric_column('age') city = fc_impl.embedding_column( fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedClassifier and train to save a checkpoint. dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, n_classes=4, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedClassifier, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_classifier = ( dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], n_classes=4, linear_optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), dnn_optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), # The provided regular expression will only warm-start the deep # portion of the model. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_lc_classifier.model_dir, vars_to_warm_start='.*(dnn).*'))) warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_classifier.get_variable_names(): if 'dnn' in variable_name: self.assertAllClose( dnn_lc_classifier.get_variable_value(variable_name), warm_started_dnn_lc_classifier.get_variable_value(variable_name)) elif 'linear' in variable_name: linear_values = warm_started_dnn_lc_classifier.get_variable_value( variable_name) # Since they're not warm-started, the linear weights will be # zero-initialized. self.assertAllClose(np.zeros_like(linear_values), linear_values) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/dnn_test_fc_v1_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for dnn.py with feature_column_v1.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile from absl.testing import parameterized import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.canned.v1 import dnn_testing_utils_v1 from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False # Uses feature_column_v1 for testing. feature_column.numeric_column = feature_column._numeric_column # pylint: disable=protected-access def _dnn_classifier_fn(*args, **kwargs): return dnn.DNNClassifier(*args, **kwargs) @test_util.run_v1_only('Tests v1 only symbols') class DNNModelFnTest(dnn_testing_utils_v1.BaseDNNModelFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNModelFnTest.__init__( self, dnn._dnn_model_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNLogitFnTest(dnn_testing_utils_v1.BaseDNNLogitFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNLogitFnTest.__init__( self, dnn.dnn_logit_fn_builder, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNWarmStartingTest(dnn_testing_utils_v1.BaseDNNWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNWarmStartingTest.__init__( self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNClassifierEvaluateTest( dnn_testing_utils_v1.BaseDNNClassifierEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierEvaluateTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNClassifierPredictTest( dnn_testing_utils_v1.BaseDNNClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierPredictTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNClassifierTrainTest(dnn_testing_utils_v1.BaseDNNClassifierTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierTrainTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column) def _dnn_regressor_fn(*args, **kwargs): return dnn.DNNRegressor(*args, **kwargs) @test_util.run_v1_only('Tests v1 only symbols') class DNNRegressorEvaluateTest( dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNRegressorPredictTest(dnn_testing_utils_v1.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorPredictTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class DNNRegressorTrainTest(dnn_testing_utils_v1.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorTrainTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column) def _queue_parsed_features(feature_map): tensors_to_enqueue = [] keys = [] for key, tensor in six.iteritems(feature_map): keys.append(key) tensors_to_enqueue.append(tensor) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = tf.queue.FIFOQueue(capacity=100, dtypes=queue_dtypes) tf.compat.v1.train.queue_runner.add_queue_runner( tf.compat.v1.train.queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} @test_util.run_v1_only('Tests v1 only symbols') class DNNRegressorIntegrationTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return label_dimension = 1 batch_size = 10 data = np.linspace(0., 2., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) @test_util.run_v1_only('Tests v1 only symbols') class DNNClassifierIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _as_label(self, data_in_float): return np.rint(data_in_float).astype(np.int64) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" n_classes = 3 input_dimension = 2 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return input_dimension = 1 n_classes = 3 batch_size = 10 data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(self._as_label(data)) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 n_classes = 3 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List( value=self._as_label(datum[:1]))), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/dnn_test_fc_v2_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for dnn.py with feature_column_v2.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile from absl.testing import parameterized import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.canned.v1 import dnn_testing_utils_v1 from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False def _dnn_classifier_fn(*args, **kwargs): return dnn.DNNClassifier(*args, **kwargs) @test_util.run_v1_only('Tests v1 only symbols') class DNNModelFnV2Test(dnn_testing_utils_v1.BaseDNNModelFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNModelFnTest.__init__( self, dnn._dnn_model_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNLogitFnV2Test(dnn_testing_utils_v1.BaseDNNLogitFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNLogitFnTest.__init__( self, dnn.dnn_logit_fn_builder, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNWarmStartingV2Test(dnn_testing_utils_v1.BaseDNNWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNWarmStartingTest.__init__( self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNClassifierEvaluateV2Test( dnn_testing_utils_v1.BaseDNNClassifierEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierEvaluateTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNClassifierPredictV2Test( dnn_testing_utils_v1.BaseDNNClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierPredictTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNClassifierTrainV2Test(dnn_testing_utils_v1.BaseDNNClassifierTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNClassifierTrainTest.__init__( self, _dnn_classifier_fn, fc_impl=feature_column_v2) def _dnn_regressor_fn(*args, **kwargs): return dnn.DNNRegressor(*args, **kwargs) @test_util.run_v1_only('Tests v1 only symbols') class DNNRegressorEvaluateV2Test( dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorEvaluateTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNRegressorPredictV2Test( dnn_testing_utils_v1.BaseDNNRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorPredictTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class DNNRegressorTrainV2Test(dnn_testing_utils_v1.BaseDNNRegressorTrainTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) dnn_testing_utils_v1.BaseDNNRegressorTrainTest.__init__( self, _dnn_regressor_fn, fc_impl=feature_column_v2) def _queue_parsed_features(feature_map): tensors_to_enqueue = [] keys = [] for key, tensor in six.iteritems(feature_map): keys.append(key) tensors_to_enqueue.append(tensor) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = tf.queue.FIFOQueue(capacity=100, dtypes=queue_dtypes) tf.compat.v1.train.queue_runner.add_queue_runner( tf.compat.v1.train.queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} @test_util.run_v1_only('Tests v1 only symbols') class DNNRegressorIntegrationTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return label_dimension = 1 batch_size = 10 data = np.linspace(0., 2., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) @test_util.run_v1_only('Tests v1 only symbols') class DNNClassifierIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _as_label(self, data_in_float): return np.rint(data_in_float).astype(np.int64) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" n_classes = 3 input_dimension = 2 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': x_data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return input_dimension = 1 n_classes = 3 batch_size = 10 data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(self._as_label(data)) train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 n_classes = 3 batch_size = 10 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List( value=self._as_label(datum[:1]))), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/dnn_testing_utils_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utils to be used in testing DNN estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.mode_keys import ModeKeys # pylint rules which are disabled by default for test files. # pylint: disable=invalid-name,protected-access,missing-docstring # Names of variables created by model. LEARNING_RATE_NAME = 'dnn/regression_head/dnn/learning_rate' HIDDEN_WEIGHTS_NAME_PATTERN = 'dnn/hiddenlayer_%d/kernel' HIDDEN_BIASES_NAME_PATTERN = 'dnn/hiddenlayer_%d/bias' BATCH_NORM_BETA_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/beta' BATCH_NORM_GAMMA_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/gamma' BATCH_NORM_MEAN_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/moving_mean' BATCH_NORM_VARIANCE_NAME_PATTERN = ( 'dnn/hiddenlayer_%d/batchnorm_%d/moving_variance') LOGITS_WEIGHTS_NAME = 'dnn/logits/kernel' LOGITS_BIASES_NAME = 'dnn/logits/bias' OCCUPATION_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/' 'occupation_embedding/embedding_weights') CITY_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/' 'city_embedding/embedding_weights') # This is so that we can easily switch between feature_column and # feature_column_v2 for testing. feature_column.numeric_column = feature_column._numeric_column feature_column.categorical_column_with_hash_bucket = feature_column._categorical_column_with_hash_bucket # pylint: disable=line-too-long feature_column.categorical_column_with_vocabulary_list = feature_column._categorical_column_with_vocabulary_list # pylint: disable=line-too-long feature_column.categorical_column_with_vocabulary_file = feature_column._categorical_column_with_vocabulary_file # pylint: disable=line-too-long feature_column.embedding_column = feature_column._embedding_column def assert_close(expected, actual, rtol=1e-04, message='', name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs((expected - actual) / expected, 'diff') rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=(message, 'Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), summarize=expected.get_shape().num_elements(), name=scope) def create_checkpoint(weights_and_biases, global_step, model_dir, batch_norm_vars=None): """Create checkpoint file with provided model weights. Args: weights_and_biases: Iterable of tuples of weight and bias values. global_step: Initial global step to save in checkpoint. model_dir: Directory into which checkpoint is saved. batch_norm_vars: Variables used for batch normalization. """ weights, biases = zip(*weights_and_biases) if batch_norm_vars: assert len(batch_norm_vars) == len(weights_and_biases) - 1 (bn_betas, bn_gammas, bn_means, bn_variances) = zip(*batch_norm_vars) model_weights = {} # Hidden layer weights. for i in range(0, len(weights) - 1): model_weights[HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i] model_weights[HIDDEN_BIASES_NAME_PATTERN % i] = biases[i] if batch_norm_vars: model_weights[BATCH_NORM_BETA_NAME_PATTERN % (i, i)] = bn_betas[i] model_weights[BATCH_NORM_GAMMA_NAME_PATTERN % (i, i)] = bn_gammas[i] model_weights[BATCH_NORM_MEAN_NAME_PATTERN % (i, i)] = bn_means[i] model_weights[BATCH_NORM_VARIANCE_NAME_PATTERN % (i, i)] = bn_variances[i] # Output layer weights. model_weights[LOGITS_WEIGHTS_NAME] = weights[-1] model_weights[LOGITS_BIASES_NAME] = biases[-1] with tf.Graph().as_default(): # Create model variables. for k, v in six.iteritems(model_weights): tf.Variable(v, name=k, dtype=tf.dtypes.float32) # Create non-model variables. global_step_var = tf.compat.v1.train.create_global_step() # Initialize vars and save checkpoint. with tf.compat.v1.Session() as sess: tf.compat.v1.initializers.global_variables().run() global_step_var.assign(global_step).eval() tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) def mock_head(testcase, hidden_units, logits_dimension, expected_logits): """Returns a mock head that validates logits values and variable names.""" hidden_weights_names = [(HIDDEN_WEIGHTS_NAME_PATTERN + '/part_0:0') % i for i in range(len(hidden_units))] hidden_biases_names = [(HIDDEN_BIASES_NAME_PATTERN + '/part_0:0') % i for i in range(len(hidden_units))] expected_var_names = ( hidden_weights_names + hidden_biases_names + [LOGITS_WEIGHTS_NAME + '/part_0:0', LOGITS_BIASES_NAME + '/part_0:0']) def _create_tpu_estimator_spec(features, mode, logits, labels, train_op_fn=None, optimizer=None): del features, labels # Not used. trainable_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) testcase.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) loss = tf.constant(1.) assert_logits = assert_close( expected_logits, logits, message='Failed for mode={}. '.format(mode)) with tf.control_dependencies([assert_logits]): if mode == ModeKeys.TRAIN: if train_op_fn is not None: train_op = train_op_fn(loss) elif optimizer is not None: train_op = optimizer.minimize(loss, global_step=None) return model_fn._TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op) elif mode == ModeKeys.EVAL: return model_fn._TPUEstimatorSpec(mode=mode, loss=tf.identity(loss)) elif mode == ModeKeys.PREDICT: return model_fn._TPUEstimatorSpec( mode=mode, predictions={'logits': tf.identity(logits)}) else: testcase.fail('Invalid mode: {}'.format(mode)) def _create_estimator_spec(features, mode, logits, labels, train_op_fn=None, optimizer=None): tpu_spec = _create_tpu_estimator_spec(features, mode, logits, labels, train_op_fn, optimizer) return tpu_spec.as_estimator_spec() head = tf.compat.v1.test.mock.NonCallableMagicMock(spec=head_lib._Head) head.logits_dimension = logits_dimension head._create_tpu_estimator_spec = tf.compat.v1.test.mock.MagicMock( wraps=_create_tpu_estimator_spec) head.create_estimator_spec = tf.compat.v1.test.mock.MagicMock( wraps=_create_estimator_spec) return head def mock_optimizer(testcase, hidden_units, expected_loss=None): """Creates a mock optimizer to test the train method. Args: testcase: A TestCase instance. hidden_units: Iterable of integer sizes for the hidden layers. expected_loss: If given, will assert the loss value. Returns: A mock Optimizer. """ hidden_weights_names = [(HIDDEN_WEIGHTS_NAME_PATTERN + '/part_0:0') % i for i in range(len(hidden_units))] hidden_biases_names = [(HIDDEN_BIASES_NAME_PATTERN + '/part_0:0') % i for i in range(len(hidden_units))] expected_var_names = ( hidden_weights_names + hidden_biases_names + [LOGITS_WEIGHTS_NAME + '/part_0:0', LOGITS_BIASES_NAME + '/part_0:0']) def _minimize(loss, global_step=None, var_list=None): """Mock of optimizer.minimize.""" trainable_vars = var_list or tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) testcase.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. testcase.assertEquals(0, loss.shape.ndims) if expected_loss is None: if global_step is not None: return tf.compat.v1.assign_add(global_step, 1).op return tf.no_op() assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): if global_step is not None: return tf.compat.v1.assign_add(global_step, 1).op return tf.no_op() optimizer_mock = tf.compat.v1.test.mock.NonCallableMagicMock( spec=tf.compat.v1.train.Optimizer, wraps=tf.compat.v1.train.Optimizer( use_locking=False, name='my_optimizer')) optimizer_mock.minimize = tf.compat.v1.test.mock.MagicMock(wraps=_minimize) return optimizer_mock class BaseDNNModelFnTest(object): """Tests that _dnn_model_fn passes expected logits to mock head.""" def __init__(self, dnn_model_fn, fc_impl=feature_column): self._dnn_model_fn = dnn_model_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_logits(self, mode, hidden_units, logits_dimension, inputs, expected_logits): """Tests that the expected logits are passed to mock head.""" with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = self._dnn_model_fn( features={'age': tf.constant(inputs)}, labels=tf.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], optimizer=mock_optimizer(self, hidden_units)) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode)) def test_one_dim_logits(self): """Tests one-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]] = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]] logits = [[-1*2.38 +1*0 +0.3]] = [[-2.08]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=1, inputs=[[10.]], expected_logits=[[-2.08]]) def test_multi_dim_logits(self): """Tests multi-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]] = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]] logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38]] = [[-2.08, 2.08, 1.19]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10.]], expected_logits=[[-2.08, 2.08, 1.19]]) def test_multi_example_multi_dim_logits(self): """Tests multiple examples and multi-dimensional logits. input_layer = [[10], [5]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)], [relu(0.6*5 +0.1), relu(0.5*5 -0.1)]] = [[6.1, 4.9], [3.1, 2.4]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)], [relu(1*3.1 -0.8*2.4 +0.2), relu(0.8*3.1 -1*2.4 -0.1)]] = [[2.38, 0], [1.38, 0]] logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38], [-1*1.38 +0.3, 1*1.38 -0.3, 0.5*1.38]] = [[-2.08, 2.08, 1.19], [-1.08, 1.08, 0.69]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10.], [5.]], expected_logits=[[-2.08, 2.08, 1.19], [-1.08, 1.08, .69]]) def test_multi_dim_input_one_dim_logits(self): """Tests multi-dimensional inputs and one-dimensional logits. input_layer = [[10, 8]] hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]] = [[1.3, 0.9]] hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]] = [[0.78, relu(-0.06)]] = [[0.78, 0]] logits = [[-1*0.78 +1*0 +0.3]] = [[-0.48]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=1, inputs=[[10., 8.]], expected_logits=[[-0.48]]) def test_multi_dim_input_multi_dim_logits(self): """Tests multi-dimensional inputs and multi-dimensional logits. input_layer = [[10, 8]] hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]] = [[1.3, 0.9]] hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]] = [[0.78, relu(-0.06)]] = [[0.78, 0]] logits = [[-1*0.78 + 0.3, 1*0.78 -0.3, 0.5*0.78]] = [[-0.48, 0.48, 0.39]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10., 8.]], expected_logits=[[-0.48, 0.48, 0.39]]) def test_multi_feature_column_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = self._dnn_model_fn( features={ 'age': tf.constant(inputs[0]), 'height': tf.constant(inputs[1]) }, labels=tf.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column('age'), self._fc_impl.numeric_column('height') ], optimizer=mock_optimizer(self, hidden_units)) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode)) def test_multi_feature_column_mix_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = self._dnn_model_fn( features={ 'age': tf.constant(inputs[0]), 'height': tf.constant(inputs[1]) }, labels=tf.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column('age'), tf.feature_column.numeric_column('height') ], optimizer=mock_optimizer(self, hidden_units)) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode)) def test_features_tensor_raises_value_error(self): """Tests that passing a Tensor for features raises a ValueError.""" hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[0, 0, 0]] with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) with self.assertRaisesRegexp(ValueError, 'features should be a dict'): self._dnn_model_fn( features=tf.constant(inputs), labels=tf.constant([[1]]), mode=ModeKeys.TRAIN, head=head, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], optimizer=mock_optimizer(self, hidden_units)) class BaseDNNLogitFnTest(object): """Tests correctness of logits calculated from _dnn_logit_fn_builder.""" def __init__(self, dnn_logit_fn_builder, fc_impl=feature_column): self._dnn_logit_fn_builder = dnn_logit_fn_builder self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_logits(self, mode, hidden_units, logits_dimension, inputs, expected_logits, batch_norm=False): """Tests that the expected logits are calculated.""" with tf.Graph().as_default(): # Global step needed for MonitoredSession, which is in turn used to # explicitly set variable weights through a checkpoint. tf.compat.v1.train.create_global_step() # Use a variable scope here with 'dnn', emulating the dnn model_fn, so # the checkpoint naming is shared. with tf.compat.v1.variable_scope('dnn'): input_layer_partitioner = ( tf.compat.v1.min_max_variable_partitioner( max_partitions=0, min_slice_size=64 << 20)) logit_fn = self._dnn_logit_fn_builder( units=logits_dimension, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=input_layer_partitioner, batch_norm=batch_norm) logits = logit_fn(features={'age': tf.constant(inputs)}, mode=mode) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits)) def test_one_dim_logits(self): """Tests one-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]] = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]] logits = [[-1*2.38 +1*0 +0.3]] = [[-2.08]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=1, inputs=[[10.]], expected_logits=[[-2.08]]) def test_one_dim_logits_with_batch_norm(self): """Tests one-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +1), relu(0.5*10 -1)]] = [[7, 4]] hidden_layer_0 = [[relu(0.6*20 +1), relu(0.5*20 -1)]] = [[13, 9]] batch_norm_0, training (epsilon = 0.001): mean1 = 1/2*(7+13) = 10, variance1 = 1/2*(3^2+3^2) = 9 x11 = (7-10)/sqrt(9+0.001) = -0.999944449, x21 = (13-10)/sqrt(9+0.001) = 0.999944449, mean2 = 1/2*(4+9) = 6.5, variance2 = 1/2*(2.5^2+.2.5^2) = 6.25 x12 = (4-6.5)/sqrt(6.25+0.001) = -0.99992001, x22 = (9-6.5)/sqrt(6.25+0.001) = 0.99992001, logits = [[-1*(-0.999944449) + 2*(-0.99992001) + 0.3], [-1*0.999944449 + 2*0.99992001 + 0.3]] = [[-0.699895571],[1.299895571]] batch_norm_0, not training (epsilon = 0.001): moving_mean1 = 0, moving_variance1 = 1 x11 = (7-0)/sqrt(1+0.001) = 6.996502623, x21 = (13-0)/sqrt(1+0.001) = 12.993504871, moving_mean2 = 0, moving_variance2 = 1 x12 = (4-0)/sqrt(1+0.001) = 3.998001499, x22 = (9-0)/sqrt(1+0.001) = 8.995503372, logits = [[-1*6.996502623 + 2*3.998001499 + 0.3], [-1*12.993504871 + 2*8.995503372 + 0.3]] = [[1.299500375],[5.297501873]] """ base_global_step = 100 create_checkpoint( ( ([[.6, .5]], [1., -1.]), ([[-1.], [2.]], [.3]), ), base_global_step, self._model_dir, batch_norm_vars=( [ [0, 0], # beta. [1, 1], # gamma. [0, 0], # moving mean. [1, 1], # moving variance. ],)) self._test_logits( ModeKeys.TRAIN, hidden_units=[2], logits_dimension=1, inputs=[[10.], [20.]], expected_logits=[[-0.699895571], [1.299895571]], batch_norm=True) for mode in [ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=[2], logits_dimension=1, inputs=[[10.], [20.]], expected_logits=[[1.299500375], [5.297501873]], batch_norm=True) def test_multi_dim_logits(self): """Tests multi-dimensional logits. input_layer = [[10]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]] = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]] logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38]] = [[-2.08, 2.08, 1.19]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10.]], expected_logits=[[-2.08, 2.08, 1.19]]) def test_multi_example_multi_dim_logits(self): """Tests multiple examples and multi-dimensional logits. input_layer = [[10], [5]] hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)], [relu(0.6*5 +0.1), relu(0.5*5 -0.1)]] = [[6.1, 4.9], [3.1, 2.4]] hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)], [relu(1*3.1 -0.8*2.4 +0.2), relu(0.8*3.1 -1*2.4 -0.1)]] = [[2.38, 0], [1.38, 0]] logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38], [-1*1.38 +0.3, 1*1.38 -0.3, 0.5*1.38]] = [[-2.08, 2.08, 1.19], [-1.08, 1.08, 0.69]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10.], [5.]], expected_logits=[[-2.08, 2.08, 1.19], [-1.08, 1.08, .69]]) def test_multi_dim_input_one_dim_logits(self): """Tests multi-dimensional inputs and one-dimensional logits. input_layer = [[10, 8]] hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]] = [[1.3, 0.9]] hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]] = [[0.78, relu(-0.06)]] = [[0.78, 0]] logits = [[-1*0.78 +1*0 +0.3]] = [[-0.48]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=1, inputs=[[10., 8.]], expected_logits=[[-0.48]]) def test_multi_dim_input_multi_dim_logits(self): """Tests multi-dimensional inputs and multi-dimensional logits. input_layer = [[10, 8]] hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]] = [[1.3, 0.9]] hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]] = [[0.78, relu(-0.06)]] = [[0.78, 0]] logits = [[-1*0.78 + 0.3, 1*0.78 -0.3, 0.5*0.78]] = [[-0.48, 0.48, 0.39]] """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: self._test_logits( mode, hidden_units=(2, 2), logits_dimension=3, inputs=[[10., 8.]], expected_logits=[[-0.48, 0.48, 0.39]]) def test_multi_feature_column_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: with tf.Graph().as_default(): # Global step needed for MonitoredSession, which is in turn used to # explicitly set variable weights through a checkpoint. tf.compat.v1.train.create_global_step() # Use a variable scope here with 'dnn', emulating the dnn model_fn, so # the checkpoint naming is shared. with tf.compat.v1.variable_scope('dnn'): input_layer_partitioner = ( tf.compat.v1.min_max_variable_partitioner( max_partitions=0, min_slice_size=64 << 20)) logit_fn = self._dnn_logit_fn_builder( units=logits_dimension, hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column('age'), self._fc_impl.numeric_column('height') ], activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=input_layer_partitioner, batch_norm=False) logits = logit_fn( features={ 'age': tf.constant(inputs[0]), 'height': tf.constant(inputs[1]) }, mode=mode) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits)) def test_multi_feature_column_mix_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT]: with tf.Graph().as_default(): # Global step needed for MonitoredSession, which is in turn used to # explicitly set variable weights through a checkpoint. tf.compat.v1.train.create_global_step() # Use a variable scope here with 'dnn', emulating the dnn model_fn, so # the checkpoint naming is shared. with tf.compat.v1.variable_scope('dnn'): input_layer_partitioner = ( tf.compat.v1.min_max_variable_partitioner( max_partitions=0, min_slice_size=64 << 20)) logit_fn = self._dnn_logit_fn_builder( units=logits_dimension, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column('age'), tf.feature_column.numeric_column('height') ], activation_fn=tf.nn.relu, dropout=None, input_layer_partitioner=input_layer_partitioner, batch_norm=False) logits = logit_fn( features={ 'age': tf.constant(inputs[0]), 'height': tf.constant(inputs[1]) }, mode=mode) with tf.compat.v1.train.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits)) class BaseDNNWarmStartingTest(object): def __init__(self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column): self._dnn_classifier_fn = _dnn_classifier_fn self._dnn_regressor_fn = _dnn_regressor_fn self._fc_impl = fc_impl def setUp(self): # Create a directory to save our old checkpoint and vocabularies to. self._ckpt_and_vocab_dir = tempfile.mkdtemp() # Make a dummy input_fn. def _input_fn(): features = { 'city': [['Palo Alto'], ['Mountain View']], 'locality': [['Palo Alto'], ['Mountain View']], 'occupation': [['doctor'], ['consultant']] } return features, [0, 1] self._input_fn = _input_fn def tearDown(self): # Clean up checkpoint / vocab dir. tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._ckpt_and_vocab_dir) def assertAllNotClose(self, t1, t2): """Helper assert for arrays.""" sum_of_abs_diff = 0.0 for x, y in zip(t1, t2): try: for a, b in zip(x, y): sum_of_abs_diff += abs(b - a) except TypeError: sum_of_abs_diff += abs(y - x) self.assertGreater(sum_of_abs_diff, 0) def test_classifier_basic_warm_starting(self): """Tests correctness of DNNClassifier default warm-start.""" city = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNClassifier and train to save a checkpoint. dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], n_classes=4, optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=dnn_classifier.model_dir) warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_classifier.get_variable_names(): self.assertAllClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) def test_regressor_basic_warm_starting(self): """Tests correctness of DNNRegressor default warm-start.""" city = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNRegressor and train to save a checkpoint. dnn_regressor = self._dnn_regressor_fn( hidden_units=[256, 128], feature_columns=[city], model_dir=self._ckpt_and_vocab_dir, optimizer='SGD') dnn_regressor.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNRegressor, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_dnn_regressor = self._dnn_regressor_fn( hidden_units=[256, 128], feature_columns=[city], optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=dnn_regressor.model_dir) warm_started_dnn_regressor.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_regressor.get_variable_names(): self.assertAllClose( dnn_regressor.get_variable_value(variable_name), warm_started_dnn_regressor.get_variable_value(variable_name)) def test_warm_starting_selective_variables(self): """Tests selecting variables to warm-start.""" city = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNClassifier and train to save a checkpoint. dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], n_classes=4, optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), # The provided regular expression will only warm-start the city # embedding, not the kernels and biases of the hidden weights. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, vars_to_warm_start='.*(city).*')) warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_classifier.get_variable_names(): if 'city' in variable_name: self.assertAllClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) elif 'bias' in variable_name: # Hidden layer biases are zero-initialized. bias_values = warm_started_dnn_classifier.get_variable_value( variable_name) self.assertAllClose(np.zeros_like(bias_values), bias_values) elif 'kernel' in variable_name: # We can't override the glorot uniform initializer used for the kernels # in the dense layers, so just make sure we're not getting the same # values from the old checkpoint. self.assertAllNotClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) def test_warm_starting_with_vocab_remapping_and_partitioning(self): """Tests warm-starting with vocab remapping and partitioning.""" vocab_list = ['doctor', 'lawyer', 'consultant'] vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab') with open(vocab_file, 'w') as f: f.write('\n'.join(vocab_list)) occupation = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_file( 'occupation', vocabulary_file=vocab_file, vocabulary_size=len(vocab_list)), dimension=2) # Create a DNNClassifier and train to save a checkpoint. partitioner = tf.compat.v1.fixed_size_partitioner(num_shards=2) dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[occupation], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD', input_layer_partitioner=partitioner) dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). Use a new FeatureColumn with a # different vocabulary for occupation. new_vocab_list = ['doctor', 'consultant', 'engineer'] new_vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'new_occupation_vocab') with open(new_vocab_file, 'w') as f: f.write('\n'.join(new_vocab_list)) new_occupation = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_file( 'occupation', vocabulary_file=new_vocab_file, vocabulary_size=len(new_vocab_list)), dimension=2) # We can create our VocabInfo object from the new and old occupation # FeatureColumn's. occupation_vocab_info = estimator.VocabInfo( new_vocab=new_occupation.categorical_column.vocabulary_file, new_vocab_size=new_occupation.categorical_column.vocabulary_size, num_oov_buckets=new_occupation.categorical_column.num_oov_buckets, old_vocab=occupation.categorical_column.vocabulary_file, old_vocab_size=occupation.categorical_column.vocabulary_size, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. backup_initializer=tf.compat.v1.initializers.random_uniform( minval=0.39, maxval=0.39)) warm_started_dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[occupation], n_classes=4, optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, var_name_to_vocab_info={ OCCUPATION_EMBEDDING_NAME: occupation_vocab_info }, # Explicitly providing None here will only warm-start variables # referenced in var_name_to_vocab_info (no hidden weights will be # warmstarted). vars_to_warm_start=None), input_layer_partitioner=partitioner) warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # 'doctor' was ID-0 and still ID-0. self.assertAllClose( dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[0, :], warm_started_dnn_classifier.get_variable_value( OCCUPATION_EMBEDDING_NAME)[0, :]) # 'consultant' was ID-2 and now ID-1. self.assertAllClose( dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[2, :], warm_started_dnn_classifier.get_variable_value( OCCUPATION_EMBEDDING_NAME)[1, :]) # 'engineer' is a new entry and should be initialized with the # backup_initializer in VocabInfo. self.assertAllClose([0.39] * 2, warm_started_dnn_classifier.get_variable_value( OCCUPATION_EMBEDDING_NAME)[2, :]) for variable_name in warm_started_dnn_classifier.get_variable_names(): if 'bias' in variable_name: # Hidden layer biases are zero-initialized. bias_values = warm_started_dnn_classifier.get_variable_value( variable_name) self.assertAllClose(np.zeros_like(bias_values), bias_values) elif 'kernel' in variable_name: # We can't override the glorot uniform initializer used for the kernels # in the dense layers, so just make sure we're not getting the same # values from the old checkpoint. self.assertAllNotClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) def test_warm_starting_with_naming_change(self): """Tests warm-starting with a Tensor name remapping.""" locality = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'locality', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNClassifier and train to save a checkpoint. dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[locality], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') dnn_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). city = self._fc_impl.embedding_column( self._fc_impl.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) warm_started_dnn_classifier = self._dnn_classifier_fn( hidden_units=[256, 128], feature_columns=[city], n_classes=4, optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), # The 'city' variable correspond to the 'locality' variable in the # previous model. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, var_name_to_prev_var_name={ CITY_EMBEDDING_NAME: CITY_EMBEDDING_NAME.replace('city', 'locality') })) warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_classifier.get_variable_names(): if 'city' in variable_name: self.assertAllClose( dnn_classifier.get_variable_value( CITY_EMBEDDING_NAME.replace('city', 'locality')), warm_started_dnn_classifier.get_variable_value(CITY_EMBEDDING_NAME)) else: self.assertAllClose( dnn_classifier.get_variable_value(variable_name), warm_started_dnn_classifier.get_variable_value(variable_name)) class BaseDNNClassifierEvaluateTest(object): def __init__(self, dnn_classifier_fn, fc_impl=feature_column): self._dnn_classifier_fn = dnn_classifier_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_one_dim(self): """Asserts evaluation metrics for one-dimensional input and logits.""" global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step, self._model_dir) dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10.], [10.]]}, [[1], [0]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08], [-2.08]] => # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]] # loss = -1. * log(0.111) -1. * log(0.889) = 2.31544200 expected_loss = 2.31544200 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2., metric_keys.MetricKeys.ACCURACY: 0.5, metric_keys.MetricKeys.PRECISION: 0.0, metric_keys.MetricKeys.RECALL: 0.0, metric_keys.MetricKeys.PREDICTION_MEAN: 0.11105597, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, # There is no good way to calculate AUC for only two data points. # But that is what the algorithm returns. metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 0.75, tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1)) def test_multi_dim(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) n_classes = 3 dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age', shape=[2])], n_classes=n_classes, model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10., 8.], [10., 8.]]}, [[1], [0]] # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39], [-0.48, 0.48, 0.39]] # probabilities = exp(logits)/sum(exp(logits)) # = [[0.16670536, 0.43538380, 0.39791084], # [0.16670536, 0.43538380, 0.39791084]] # loss = -log(0.43538380) - log(0.16670536) expected_loss = 2.62305466 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, metric_keys.MetricKeys.ACCURACY: 0.5, tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1)) def test_float_labels(self): """Asserts evaluation metrics for float labels in binary classification.""" global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step, self._model_dir) dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10.], [10.]]}, [[0.8], [0.4]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08], [-2.08]] => # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]] # loss = -0.8 * log(0.111) -0.2 * log(0.889) # -0.4 * log(0.111) -0.6 * log(0.889) = 2.7314420 metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1) self.assertAlmostEqual(2.7314420, metrics[metric_keys.MetricKeys.LOSS]) def test_multi_dim_weights(self): """Tests evaluation with weights.""" # Uses same checkpoint with test_multi_dims global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) n_classes = 3 dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age', shape=[2])], n_classes=n_classes, weight_column='w', model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10., 8.], [10., 8.]], 'w': [[10.], [100.]]}, [[1], [0]] # Uses identical numbers as test_multi_dims # See that test for calculation of logits. # loss = -log(0.43538380)*10 - log(0.16670536)*100 expected_loss = 187.468007 metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1) self.assertAlmostEqual( expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3) class BaseDNNRegressorEvaluateTest(object): def __init__(self, dnn_regressor_fn, fc_impl=feature_column): self._dnn_regressor_fn = dnn_regressor_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_one_dim(self): """Asserts evaluation metrics for one-dimensional input and logits.""" # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1. global_step = 100 create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step, self._model_dir) dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): return {'age': [[10.]]}, [[1.]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08]] => predictions = [-2.08]. # loss = (1+2.08)^2 = 9.4864 expected_loss = 9.4864 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, metric_keys.MetricKeys.PREDICTION_MEAN: -2.08, metric_keys.MetricKeys.LABEL_MEAN: 1.0, tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1)) def test_multi_dim(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3. global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) label_dimension = 3 dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age', shape=[2])], label_dimension=label_dimension, model_dir=self._model_dir) def _input_fn(): return {'age': [[10., 8.]]}, [[1., -1., 0.5]] # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929 expected_loss = 4.3929 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension, metric_keys.MetricKeys.PREDICTION_MEAN: 0.39 / 3.0, metric_keys.MetricKeys.LABEL_MEAN: 0.5 / 3.0, tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1)) def test_multi_dim_weights(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" # same checkpoint with test_multi_dim. global_step = 100 create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) label_dimension = 3 dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=[self._fc_impl.numeric_column('age', shape=[2])], label_dimension=label_dimension, weight_column='w', model_dir=self._model_dir) def _input_fn(): return {'age': [[10., 8.]], 'w': [10.]}, [[1., -1., 0.5]] # Uses identical numbers as test_multi_dim. # See that test for calculation of logits. # loss = 4.3929*10 expected_loss = 43.929 metrics = dnn_regressor.evaluate(input_fn=_input_fn, steps=1) self.assertAlmostEqual( expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3) class BaseDNNClassifierPredictTest(object): def __init__(self, dnn_classifier_fn, fc_impl=feature_column): self._dnn_classifier_fn = dnn_classifier_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_one_dim(self, label_vocabulary, label_output_fn): """Asserts predictions for one-dimensional input and logits.""" create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step=0, model_dir=self._model_dir) dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), label_vocabulary=label_vocabulary, feature_columns=(self._fc_impl.numeric_column('x'),), model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => # logistic = exp(-2.08)/(1 + exp(-2.08)) = 0.11105597 # probabilities = [1-logistic, logistic] = [0.88894403, 0.11105597] # class_ids = argmax(probabilities) = [0] predictions = next(dnn_classifier.predict(input_fn=input_fn)) self.assertAllClose([-2.08], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose([0.11105597], predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [0.88894403, 0.11105597], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([0], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual([label_output_fn(0)], predictions[prediction_keys.PredictionKeys.CLASSES]) def test_one_dim_without_label_vocabulary(self): self._test_one_dim( label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def test_one_dim_with_label_vocabulary(self): n_classes = 2 self._test_one_dim( label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) def _test_multi_dim_with_3_classes(self, label_vocabulary, label_output_fn): """Asserts predictions for multi-dimensional input and logits.""" create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step=0, model_dir=self._model_dir) dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),), label_vocabulary=label_vocabulary, n_classes=3, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( # Inputs shape is (batch_size, num_inputs). x={'x': np.array([[10., 8.]])}, batch_size=1, shuffle=False) # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [-0.48, 0.48, 0.39] => # probabilities[i] = exp(logits[i]) / sum_j exp(logits[j]) => # probabilities = [0.16670536, 0.43538380, 0.39791084] # class_ids = argmax(probabilities) = [1] predictions = next(dnn_classifier.predict(input_fn=input_fn)) self.assertItemsEqual([ prediction_keys.PredictionKeys.LOGITS, prediction_keys.PredictionKeys.PROBABILITIES, prediction_keys.PredictionKeys.CLASS_IDS, prediction_keys.PredictionKeys.CLASSES, prediction_keys.PredictionKeys.ALL_CLASS_IDS, prediction_keys.PredictionKeys.ALL_CLASSES ], six.iterkeys(predictions)) self.assertAllClose([-0.48, 0.48, 0.39], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( [0.16670536, 0.43538380, 0.39791084], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllEqual([1], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual([label_output_fn(1)], predictions[prediction_keys.PredictionKeys.CLASSES]) def test_multi_dim_with_3_classes_but_no_label_vocab(self): self._test_multi_dim_with_3_classes( label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def test_multi_dim_with_3_classes_and_label_vocab(self): n_classes = 3 self._test_multi_dim_with_3_classes( label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) class BaseDNNRegressorPredictTest(object): def __init__(self, dnn_regressor_fn, fc_impl=feature_column): self._dnn_regressor_fn = dnn_regressor_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_one_dim(self): """Asserts predictions for one-dimensional input and logits.""" # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1. create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step=0, model_dir=self._model_dir) dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=(self._fc_impl.numeric_column('x'),), model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08]] => predictions = [-2.08]. self.assertAllClose({ prediction_keys.PredictionKeys.PREDICTIONS: [-2.08], }, next(dnn_regressor.predict(input_fn=input_fn))) def test_multi_dim(self): """Asserts predictions for multi-dimensional input and logits.""" # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3. create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), 100, self._model_dir) dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),), label_dimension=3, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( # Inputs shape is (batch_size, num_inputs). x={'x': np.array([[10., 8.]])}, batch_size=1, shuffle=False) # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] => predictions = [-0.48, 0.48, 0.39] self.assertAllClose( { prediction_keys.PredictionKeys.PREDICTIONS: [-0.48, 0.48, 0.39], }, next(dnn_regressor.predict(input_fn=input_fn))) class _SummaryHook(tf.compat.v1.train.SessionRunHook): """Saves summaries every N steps.""" def __init__(self): self._summaries = [] def begin(self): self._summary_op = tf.compat.v1.summary.merge_all() def before_run(self, run_context): return tf.compat.v1.train.SessionRunArgs({'summary': self._summary_op}) def after_run(self, run_context, run_values): s = tf.compat.v1.summary.Summary() s.ParseFromString(run_values.results['summary']) self._summaries.append(s) def summaries(self): return tuple(self._summaries) def _assert_checkpoint(testcase, global_step, input_units, hidden_units, output_units, model_dir): """Asserts checkpoint contains expected variables with proper shapes. Args: testcase: A TestCase instance. global_step: Expected global step value. input_units: The dimension of input layer. hidden_units: Iterable of integer sizes for the hidden layers. output_units: The dimension of output layer (logits). model_dir: The model directory. """ shapes = {name: shape for (name, shape) in tf.train.list_variables(model_dir)} # Global step. testcase.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) testcase.assertEqual( global_step, tf.train.load_variable(model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) # Hidden layer weights. prev_layer_units = input_units for i in range(len(hidden_units)): layer_units = hidden_units[i] testcase.assertAllEqual((prev_layer_units, layer_units), shapes[HIDDEN_WEIGHTS_NAME_PATTERN % i]) testcase.assertAllEqual((layer_units,), shapes[HIDDEN_BIASES_NAME_PATTERN % i]) prev_layer_units = layer_units # Output layer weights. testcase.assertAllEqual((prev_layer_units, output_units), shapes[LOGITS_WEIGHTS_NAME]) testcase.assertAllEqual((output_units,), shapes[LOGITS_BIASES_NAME]) def _assert_simple_summary(testcase, expected_values, actual_summary): """Assert summary the specified simple values. Args: testcase: A TestCase instance. expected_values: Dict of expected tags and simple values. actual_summary: `summary_pb2.Summary`. """ testcase.assertAllClose( expected_values, { v.tag: v.simple_value for v in actual_summary.value if (v.tag in expected_values) }) class BaseDNNClassifierTrainTest(object): def __init__(self, dnn_classifier_fn, fc_impl=feature_column): self._dnn_classifier_fn = dnn_classifier_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_from_scratch_with_default_optimizer_binary(self): hidden_units = (2, 2) dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), model_dir=self._model_dir) # Train for a few steps, then validate final checkpoint. num_steps = 5 dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) def test_from_scratch_with_default_optimizer_multi_class(self): hidden_units = (2, 2) n_classes = 3 dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, then validate final checkpoint. num_steps = 5 dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[2]]), steps=num_steps) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=n_classes, model_dir=self._model_dir) def test_from_scratch_validate_summary(self): hidden_units = (2, 2) opt = mock_optimizer(self, hidden_units=hidden_units) dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) self.assertEqual(0, opt.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(1, opt.minimize.call_count) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: summary_keys = [v.tag for v in summary.value] self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys) self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys) def test_binary_classification(self): base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) # Uses identical numbers as DNNModelFnTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => probabilities = [0.889, 0.111] # loss = -1. * log(0.111) = 2.19772100 expected_loss = 2.19772100 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) self.assertEqual(0, opt.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(1, opt.minimize.call_count) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { metric_keys.MetricKeys.LOSS_MEAN: expected_loss, 'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0., 'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': .5, 'dnn/dnn/logits/fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint( self, base_global_step + num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) def test_binary_classification_float_labels(self): base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) # Uses identical numbers as DNNModelFnTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => probabilities = [0.889, 0.111] # loss = -0.8 * log(0.111) -0.2 * log(0.889) = 1.7817210 expected_loss = 1.7817210 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_classifier = self._dnn_classifier_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) self.assertEqual(0, opt.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[0.8]]), steps=num_steps) self.assertEqual(1, opt.minimize.call_count) def test_multi_class(self): n_classes = 3 base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) # Uses identical numbers as DNNModelFnTest.test_multi_dim_logits. # See that test for calculation of logits. # logits = [-2.08, 2.08, 1.19] => probabilities = [0.0109, 0.7011, 0.2879] # loss = -1. * log(0.7011) = 0.35505795 expected_loss = 0.35505795 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_classifier = self._dnn_classifier_fn( n_classes=n_classes, hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) self.assertEqual(0, opt.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_classifier.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(1, opt.minimize.call_count) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { metric_keys.MetricKeys.LOSS_MEAN: expected_loss, 'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0., 'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': .5, 'dnn/dnn/logits/fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint( self, base_global_step + num_steps, input_units=1, hidden_units=hidden_units, output_units=n_classes, model_dir=self._model_dir) class BaseDNNRegressorTrainTest(object): def __init__(self, dnn_regressor_fn, fc_impl=feature_column): self._dnn_regressor_fn = dnn_regressor_fn self._fc_impl = fc_impl def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_from_scratch_with_default_optimizer(self): hidden_units = (2, 2) dnn_regressor = self._dnn_regressor_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), model_dir=self._model_dir) # Train for a few steps, then validate final checkpoint. num_steps = 5 dnn_regressor.train( input_fn=lambda: ({ 'age': ((1,),) }, ((10,),)), steps=num_steps) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) def test_from_scratch(self): hidden_units = (2, 2) opt = mock_optimizer(self, hidden_units=hidden_units) dnn_regressor = self._dnn_regressor_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) self.assertEqual(0, opt.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_regressor.train( input_fn=lambda: ({ 'age': ((1,),) }, ((5.,),)), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(1, opt.minimize.call_count) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: summary_keys = [v.tag for v in summary.value] self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys) self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys) def test_one_dim(self): """Asserts train loss for one-dimensional input and logits.""" base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), base_global_step, self._model_dir) # Uses identical numbers as DNNModelFnTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => predictions = [-2.08] # loss = (1 + 2.08)^2 = 9.4864 expected_loss = 9.4864 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_regressor = self._dnn_regressor_fn( hidden_units=hidden_units, feature_columns=(self._fc_impl.numeric_column('age'),), optimizer=opt, model_dir=self._model_dir) self.assertEqual(0, opt.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_regressor.train( input_fn=lambda: ({ 'age': [[10.]] }, [[1.]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(1, opt.minimize.call_count) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { metric_keys.MetricKeys.LOSS_MEAN: expected_loss, 'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0., 'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': 0.5, 'dnn/dnn/logits/fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint( self, base_global_step + num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) def test_multi_dim(self): """Asserts train loss for multi-dimensional input and logits.""" base_global_step = 100 hidden_units = (2, 2) create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) input_dimension = 2 label_dimension = 3 # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929 expected_loss = 4.3929 opt = mock_optimizer( self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_regressor = self._dnn_regressor_fn( hidden_units=hidden_units, feature_columns=[ self._fc_impl.numeric_column('age', shape=[input_dimension]) ], label_dimension=label_dimension, optimizer=opt, model_dir=self._model_dir) self.assertEqual(0, opt.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_regressor.train( input_fn=lambda: ({ 'age': [[10., 8.]] }, [[1., -1., 0.5]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(1, opt.minimize.call_count) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension, 'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0., 'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': 0.5, 'dnn/dnn/logits/fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint( self, base_global_step + num_steps, input_units=input_dimension, hidden_units=hidden_units, output_units=label_dimension, model_dir=self._model_dir) ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/linear_estimator_test_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for LinearEstimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.canned.v1 import linear_testing_utils_v1 from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io def _linear_estimator_fn(weight_column=None, label_dimension=1, **kwargs): """Returns a LinearEstimator that uses regression_head.""" return linear.LinearEstimator( head=head_lib._regression_head( weight_column=weight_column, label_dimension=label_dimension, # Tests in core (from which this test inherits) test the sum loss. loss_reduction=tf.compat.v1.losses.Reduction.SUM), **kwargs) @test_util.run_v1_only('Tests v1 only symbols') class LinearEstimatorEvaluateTest( linear_testing_utils_v1.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorEvaluationTest.__init__( self, _linear_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class LinearEstimatorPredictTest( linear_testing_utils_v1.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPredictTest.__init__( self, _linear_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class LinearEstimatorTrainTest( linear_testing_utils_v1.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorTrainingTest.__init__( self, _linear_estimator_fn) @test_util.run_v1_only('Tests v1 only symbols') class LinearEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = linear.LinearEstimator( head=head_lib._regression_head(label_dimension=label_dimension), feature_columns=feature_columns, model_dir=self._model_dir) # Train num_steps = 10 est.train(train_input_fn, steps=num_steps) # Evaluate scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # Predict predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # Export feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/linear_test_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for linear.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow.python.feature_column import feature_column from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.canned.v1 import linear_testing_utils_v1 def _linear_regressor_fn(*args, **kwargs): return linear.LinearRegressor(*args, **kwargs) def _linear_classifier_fn(*args, **kwargs): return linear.LinearClassifier(*args, **kwargs) # Tests for Linear Regressor. @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorPartitionerTest( linear_testing_utils_v1.BaseLinearRegressorPartitionerTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPartitionerTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorPartitionerV2Test( linear_testing_utils_v1.BaseLinearRegressorPartitionerTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPartitionerTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorEvaluationTest( linear_testing_utils_v1.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorEvaluationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorEvaluationV2Test( linear_testing_utils_v1.BaseLinearRegressorEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorEvaluationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorPredictTest( linear_testing_utils_v1.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPredictTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorPredictV2Test( linear_testing_utils_v1.BaseLinearRegressorPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorPredictTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorIntegrationTest( linear_testing_utils_v1.BaseLinearRegressorIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorIntegrationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorIntegrationV2Test( linear_testing_utils_v1.BaseLinearRegressorIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorIntegrationTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorTrainingTest( linear_testing_utils_v1.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorTrainingTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearRegressorTrainingV2Test( linear_testing_utils_v1.BaseLinearRegressorTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearRegressorTrainingTest.__init__( self, _linear_regressor_fn, fc_lib=feature_column_v2) # Tests for Linear Classifier. @test_util.run_v1_only('Tests v1 only symbols') class LinearClassifierTrainingTest( linear_testing_utils_v1.BaseLinearClassifierTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierTrainingTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearClassifierTrainingV2Test( linear_testing_utils_v1.BaseLinearClassifierTrainingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierTrainingTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearClassifierEvaluationTest( linear_testing_utils_v1.BaseLinearClassifierEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierEvaluationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearClassifierEvaluationV2Test( linear_testing_utils_v1.BaseLinearClassifierEvaluationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierEvaluationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearClassifierPredictTest( linear_testing_utils_v1.BaseLinearClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierPredictTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearClassifierPredictV2Test( linear_testing_utils_v1.BaseLinearClassifierPredictTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierPredictTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class LinearClassifierIntegrationTest( linear_testing_utils_v1.BaseLinearClassifierIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierIntegrationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearClassifierIntegrationV2Test( linear_testing_utils_v1.BaseLinearClassifierIntegrationTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearClassifierIntegrationTest.__init__( self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column_v2) # Tests for Linear logit_fn. @test_util.run_v1_only('Tests v1 only symbols') class LinearLogitFnTest(linear_testing_utils_v1.BaseLinearLogitFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearLogitFnTest.__init__( self, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearLogitFnV2Test(linear_testing_utils_v1.BaseLinearLogitFnTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearLogitFnTest.__init__( self, fc_lib=feature_column_v2) # Tests for warm-starting with Linear logit_fn. @test_util.run_v1_only('Tests v1 only symbols') class LinearWarmStartingTest(linear_testing_utils_v1.BaseLinearWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearWarmStartingTest.__init__( self, _linear_classifier_fn, _linear_regressor_fn, fc_lib=feature_column) @test_util.run_v1_only('Tests v1 only symbols') class LinearWarmStartingV2Test( linear_testing_utils_v1.BaseLinearWarmStartingTest, tf.test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) linear_testing_utils_v1.BaseLinearWarmStartingTest.__init__( self, _linear_classifier_fn, _linear_regressor_fn, fc_lib=feature_column_v2) @test_util.run_v1_only('Tests v1 only symbols') class ComputeFractionOfZeroTest(tf.test.TestCase): def _assertSparsity(self, expected_sparsity, tensor): sparsity = linear._compute_fraction_of_zero([tensor]) with self.test_session() as sess: self.assertAllClose(expected_sparsity, sess.run(sparsity)) def test_small_float32(self): self._assertSparsity( 0.75, ops.convert_to_tensor([0, 0, 0, 1], dtype=tf.dtypes.float32)) self._assertSparsity( 0.5, ops.convert_to_tensor([0, 1, 0, 1], dtype=tf.dtypes.float32)) def test_small_int32(self): self._assertSparsity( 0.75, ops.convert_to_tensor([0, 0, 0, 1], dtype=tf.dtypes.int32)) def test_small_float64(self): self._assertSparsity( 0.75, ops.convert_to_tensor([0, 0, 0, 1], dtype=tf.dtypes.float64)) def test_small_int64(self): self._assertSparsity( 0.75, ops.convert_to_tensor([0, 0, 0, 1], dtype=tf.dtypes.int64)) def test_nested(self): self._assertSparsity( 0.75, [ops.convert_to_tensor([0, 0]), ops.convert_to_tensor([0, 1])]) def test_none(self): with self.assertRaises(ValueError): linear._compute_fraction_of_zero([]) def test_empty(self): sparsity = linear._compute_fraction_of_zero([ops.convert_to_tensor([])]) with self.test_session() as sess: sparsity_np = sess.run(sparsity) self.assertTrue( np.isnan(sparsity_np), 'Expected sparsity=nan, got %s' % sparsity_np) def test_multiple_empty(self): sparsity = linear._compute_fraction_of_zero([ ops.convert_to_tensor([]), ops.convert_to_tensor([]), ]) with self.test_session() as sess: sparsity_np = sess.run(sparsity) self.assertTrue( np.isnan(sparsity_np), 'Expected sparsity=nan, got %s' % sparsity_np) def test_some_empty(self): with self.test_session(): self._assertSparsity(0.5, [ ops.convert_to_tensor([]), ops.convert_to_tensor([0.]), ops.convert_to_tensor([1.]), ]) def test_mixed_types(self): with self.test_session(): self._assertSparsity(0.6, [ ops.convert_to_tensor([0, 0, 1, 1, 1], dtype=tf.dtypes.float32), ops.convert_to_tensor([0, 0, 0, 0, 1], dtype=tf.dtypes.int32), ]) def test_2_27_zeros__using_512_MiB_of_ram(self): self._assertSparsity(1., tf.zeros([int(2**27 * 1.01)], dtype=tf.dtypes.int8)) def test_2_27_ones__using_512_MiB_of_ram(self): self._assertSparsity(0., tf.ones([int(2**27 * 1.01)], dtype=tf.dtypes.int8)) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/canned/v1/linear_testing_utils_v1.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utils for testing linear estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math import os import shutil import tempfile import numpy as np import six import tensorflow as tf from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.feature_column import feature_column from tensorflow.python.feature_column import feature_column_v2 from tensorflow.python.framework import ops from tensorflow.python.ops import variables as variables_lib from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator import run_config from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False # pylint rules which are disabled by default for test files. # pylint: disable=invalid-name,protected-access,missing-docstring # Names of variables created by model. AGE_WEIGHT_NAME = 'linear/linear_model/age/weights' HEIGHT_WEIGHT_NAME = 'linear/linear_model/height/weights' OCCUPATION_WEIGHT_NAME = 'linear/linear_model/occupation/weights' BIAS_NAME = 'linear/linear_model/bias_weights' LANGUAGE_WEIGHT_NAME = 'linear/linear_model/language/weights' # This is so that we can easily switch between feature_column and # feature_column_v2 for testing. feature_column.numeric_column = feature_column._numeric_column feature_column.categorical_column_with_hash_bucket = feature_column._categorical_column_with_hash_bucket # pylint: disable=line-too-long feature_column.categorical_column_with_vocabulary_list = feature_column._categorical_column_with_vocabulary_list # pylint: disable=line-too-long feature_column.categorical_column_with_vocabulary_file = feature_column._categorical_column_with_vocabulary_file # pylint: disable=line-too-long feature_column.embedding_column = feature_column._embedding_column def assert_close(expected, actual, rtol=1e-04, name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: expected = ops.convert_to_tensor(expected, name='expected') actual = ops.convert_to_tensor(actual, name='actual') rdiff = tf.math.abs(expected - actual, 'diff') / tf.math.abs(expected) rtol = ops.convert_to_tensor(rtol, name='rtol') return tf.compat.v1.debugging.assert_less( rdiff, rtol, data=('Condition expected =~ actual did not hold element-wise:' 'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff, 'rtol = ', rtol,), name=scope) def save_variables_to_ckpt(model_dir): init_all_op = [tf.compat.v1.initializers.global_variables()] with tf.compat.v1.Session() as sess: sess.run(init_all_op) tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) def queue_parsed_features(feature_map): tensors_to_enqueue = [] keys = [] for key, tensor in six.iteritems(feature_map): keys.append(key) tensors_to_enqueue.append(tensor) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = tf.queue.FIFOQueue(capacity=100, dtypes=queue_dtypes) tf.compat.v1.train.queue_runner.add_queue_runner( tf.compat.v1.train.queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} def sorted_key_dict(unsorted_dict): return {k: unsorted_dict[k] for k in sorted(unsorted_dict)} def sigmoid(x): return 1 / (1 + np.exp(-1.0 * x)) class CheckPartitionerVarHook(tf.compat.v1.train.SessionRunHook): """A `SessionRunHook` to check a partitioned variable.""" def __init__(self, test_case, var_name, var_dim, partitions): self._test_case = test_case self._var_name = var_name self._var_dim = var_dim self._partitions = partitions def begin(self): with tf.compat.v1.variable_scope( tf.compat.v1.get_variable_scope()) as scope: scope.reuse_variables() partitioned_weight = tf.compat.v1.get_variable( self._var_name, shape=(self._var_dim, 1)) self._test_case.assertTrue( isinstance(partitioned_weight, variables_lib.PartitionedVariable)) for part in partitioned_weight: self._test_case.assertEqual(self._var_dim // self._partitions, part.get_shape()[0]) class BaseLinearRegressorPartitionerTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def testPartitioner(self): x_dim = 64 partitions = 4 def _partitioner(shape, dtype): del dtype # unused; required by Fn signature. # Only partition the embedding tensor. return [partitions, 1] if shape[0] == x_dim else [1] regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.categorical_column_with_hash_bucket( 'language', hash_bucket_size=x_dim),), partitioner=_partitioner, model_dir=self._model_dir) def _input_fn(): return { 'language': tf.sparse.SparseTensor( values=['english', 'spanish'], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]) }, [[10.]] hook = CheckPartitionerVarHook(self, LANGUAGE_WEIGHT_NAME, x_dim, partitions) regressor.train(input_fn=_input_fn, steps=1, hooks=[hook]) def testDefaultPartitionerWithMultiplePsReplicas(self): partitions = 2 # This results in weights larger than the default partition size of 64M, # so partitioned weights are created (each weight uses 4 bytes). x_dim = 32 << 20 class FakeRunConfig(run_config.RunConfig): @property def num_ps_replicas(self): return partitions # Mock the device setter as ps is not available on test machines. with tf.compat.v1.test.mock.patch.object( estimator, '_get_replica_device_setter', return_value=lambda _: '/cpu:0'): linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.categorical_column_with_hash_bucket( 'language', hash_bucket_size=x_dim),), config=FakeRunConfig(), model_dir=self._model_dir) def _input_fn(): return { 'language': tf.sparse.SparseTensor( values=['english', 'spanish'], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]) }, [[10.]] hook = CheckPartitionerVarHook(self, LANGUAGE_WEIGHT_NAME, x_dim, partitions) linear_regressor.train(input_fn=_input_fn, steps=1, hooks=[hook]) # TODO(b/36813849): Add tests with dynamic shape inputs using placeholders. class BaseLinearRegressorEvaluationTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_evaluation_for_simple_data(self): with tf.Graph().as_default(): tf.Variable([[11.0]], name=AGE_WEIGHT_NAME) tf.Variable([2.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate( input_fn=lambda: ({ 'age': ((1,),) }, ((10.,),)), steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. Loss is 3**2 = 9. self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 9., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_batch(self): """Tests evaluation for batch_size==2.""" with tf.Graph().as_default(): tf.Variable([[11.0]], name=AGE_WEIGHT_NAME) tf.Variable([2.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate( input_fn=lambda: ({ 'age': ((1,), (1,)) }, ((10.,), (10.,))), steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the sum over batch = 9 + 9 = 18 # Average loss is the average over batch = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 18., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_weights(self): """Tests evaluation with weights.""" with tf.Graph().as_default(): tf.Variable([[11.0]], name=AGE_WEIGHT_NAME) tf.Variable([2.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))} labels = ((10.,), (10.,)) return features, labels linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), weight_column='weights', model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate(input_fn=_input_fn, steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the weighted sum over batch = 9 + 2*9 = 27 # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 27., metric_keys.MetricKeys.LOSS_MEAN: 9., metric_keys.MetricKeys.PREDICTION_MEAN: 13., metric_keys.MetricKeys.LABEL_MEAN: 10., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics) def test_evaluation_for_multi_dimensions(self): x_dim = 3 label_dim = 2 with tf.Graph().as_default(): tf.Variable([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name=AGE_WEIGHT_NAME) tf.Variable([7.0, 8.0], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age', shape=(x_dim,)),), label_dimension=label_dim, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={ 'age': np.array([[2., 4., 5.]]), }, y=np.array([[46., 58.]]), batch_size=1, num_epochs=None, shuffle=False) eval_metrics = linear_regressor.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is # [2., 4., 5.] * [1.0, 2.0] + [7.0, 8.0] = [39, 50] + [7.0, 8.0] # [3.0, 4.0] # [5.0, 6.0] # which is [46, 58] self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) def test_evaluation_for_multiple_feature_columns(self): with tf.Graph().as_default(): tf.Variable([[10.0]], name=AGE_WEIGHT_NAME) tf.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME) tf.Variable([5.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) batch_size = 2 feature_columns = [ self._fc_lib.numeric_column('age'), self._fc_lib.numeric_column('height') ] input_fn = numpy_io.numpy_input_fn( x={ 'age': np.array([20, 40]), 'height': np.array([4, 8]) }, y=np.array([[213.], [421.]]), batch_size=batch_size, num_epochs=None, shuffle=False) est = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir) eval_metrics = est.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] = # [213.0, 421.0], while label is [213., 421.]. Loss = 0. self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) def test_evaluation_for_multiple_feature_columns_mix(self): with tf.Graph().as_default(): tf.Variable([[10.0]], name=AGE_WEIGHT_NAME) tf.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME) tf.Variable([5.0], name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) batch_size = 2 feature_columns = [ feature_column.numeric_column('age'), tf.feature_column.numeric_column('height') ] def _input_fn(): features_ds = tf.compat.v1.data.Dataset.from_tensor_slices({ 'age': np.array([20, 40]), 'height': np.array([4, 8]) }) labels_ds = tf.compat.v1.data.Dataset.from_tensor_slices( np.array([[213.], [421.]])) return (tf.compat.v1.data.Dataset.zip( (features_ds, labels_ds)).batch(batch_size).repeat(None)) est = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir) eval_metrics = est.evaluate(input_fn=_input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN, tf.compat.v1.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] = # [213.0, 421.0], while label is [213., 421.]. Loss = 0. self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS]) class BaseLinearRegressorPredictTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def test_1d(self): """Tests predict when all variables are one-dimensional.""" with tf.Graph().as_default(): tf.Variable([[10.]], name='linear/linear_model/x/weights') tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('x'),), model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[2.]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x * weight + bias = 2. * 10. + .2 = 20.2 self.assertAllClose([[20.2]], predicted_scores) def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 x_dim = 4 feature_columns = (self._fc_lib.numeric_column('x', shape=(x_dim,)),) with tf.Graph().as_default(): tf.Variable( # shape=[x_dim, label_dimension] [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]], name='linear/linear_model/x/weights') tf.Variable( # shape=[label_dimension] [.2, .4, .6], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = x * weight + bias, shape=[batch_size, label_dimension] self.assertAllClose([[30.2, 40.4, 50.6], [70.2, 96.4, 122.6]], predicted_scores) def testTwoFeatureColumns(self): """Tests predict with two feature columns.""" with tf.Graph().as_default(): tf.Variable([[10.]], name='linear/linear_model/x0/weights') tf.Variable([[20.]], name='linear/linear_model/x1/weights') tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('x0'), self._fc_lib.numeric_column('x1')), model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={ 'x0': np.array([[2.]]), 'x1': np.array([[3.]]) }, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2 self.assertAllClose([[80.2]], predicted_scores) def testTwoFeatureColumnsMix(self): """Tests predict with two feature columns.""" with tf.Graph().as_default(): tf.Variable([[10.]], name='linear/linear_model/x0/weights') tf.Variable([[20.]], name='linear/linear_model/x1/weights') tf.Variable([.2], name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=(feature_column.numeric_column('x0'), tf.feature_column.numeric_column('x1')), model_dir=self._model_dir) def _predict_input_fn(): return tf.compat.v1.data.Dataset.from_tensor_slices({ 'x0': np.array([[2.]]), 'x1': np.array([[3.]]) }).batch(1) predictions = linear_regressor.predict(input_fn=_predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2 self.assertAllClose([[80.2]], predicted_scores) def testSparseCombiner(self): w_a = 2.0 w_b = 3.0 w_c = 5.0 bias = 5.0 with tf.Graph().as_default(): tf.Variable([[w_a], [w_b], [w_c]], name=LANGUAGE_WEIGHT_NAME) tf.Variable([bias], name=BIAS_NAME) tf.Variable( 1, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): return tf.compat.v1.data.Dataset.from_tensors({ 'language': tf.sparse.SparseTensor( values=['a', 'c', 'b', 'c'], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }) feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list( 'language', vocabulary_list=['a', 'b', 'c']),) # Check prediction for each sparse_combiner. # With sparse_combiner = 'sum', we have # logits_1 = w_a + w_c + bias # = 2.0 + 5.0 + 5.0 = 12.0 # logits_2 = w_b + w_c + bias # = 3.0 + 5.0 + 5.0 = 13.0 linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir) predictions = linear_regressor.predict(input_fn=_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) self.assertAllClose([[12.0], [13.0]], predicted_scores) # With sparse_combiner = 'mean', we have # logits_1 = 1/2 * (w_a + w_c) + bias # = 1/2 * (2.0 + 5.0) + 5.0 = 8.5 # logits_2 = 1/2 * (w_b + w_c) + bias # = 1/2 * (3.0 + 5.0) + 5.0 = 9.0 linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir, sparse_combiner='mean') predictions = linear_regressor.predict(input_fn=_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) self.assertAllClose([[8.5], [9.0]], predicted_scores) # With sparse_combiner = 'sqrtn', we have # logits_1 = sqrt(2)/2 * (w_a + w_c) + bias # = sqrt(2)/2 * (2.0 + 5.0) + 5.0 = 9.94974 # logits_2 = sqrt(2)/2 * (w_b + w_c) + bias # = sqrt(2)/2 * (3.0 + 5.0) + 5.0 = 10.65685 linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, model_dir=self._model_dir, sparse_combiner='sqrtn') predictions = linear_regressor.predict(input_fn=_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) self.assertAllClose([[9.94974], [10.65685]], predicted_scores) class BaseLinearRegressorIntegrationTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ self._fc_lib.numeric_column('x', shape=(input_dimension,)) ] est = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def test_numpy_input_fn(self): """Tests complete flow with numpy_input_fn.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) def test_pandas_input_fn(self): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return # Pandas DataFrame natually supports 1 dim data only. label_dimension = 1 input_dimension = label_dimension batch_size = 10 data = np.array([1., 2., 3., 4.], dtype=np.float32) x = pd.DataFrame({'x': data}) y = pd.Series(data) prediction_length = 4 train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) def test_input_fn_from_parse_example(self): """Tests complete flow with input_fn constructed from parse_example.""" label_dimension = 2 input_dimension = label_dimension batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=datum)), 'y': feature_pb2.Feature( float_list=feature_pb2.FloatList( value=datum[:label_dimension])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([label_dimension], tf.dtypes.float32), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, label_dimension=label_dimension, prediction_length=prediction_length) class BaseLinearRegressorTrainingTest(object): def __init__(self, linear_regressor_fn, fc_lib=feature_column): self._linear_regressor_fn = linear_regressor_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) def _mock_optimizer(self, expected_loss=None): expected_var_names = [ '%s/part_0:0' % AGE_WEIGHT_NAME, '%s/part_0:0' % BIAS_NAME ] def _minimize(loss, global_step=None, var_list=None): trainable_vars = var_list or tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. self.assertEquals(0, loss.shape.ndims) if expected_loss is None: if global_step is not None: return tf.compat.v1.assign_add(global_step, 1).op return tf.no_op() assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): if global_step is not None: return tf.compat.v1.assign_add(global_step, 1).op return tf.no_op() mock_optimizer = tf.compat.v1.test.mock.NonCallableMock( spec=tf.compat.v1.train.Optimizer, wraps=tf.compat.v1.train.Optimizer( use_locking=False, name='my_optimizer')) mock_optimizer.minimize = tf.compat.v1.test.mock.MagicMock(wraps=_minimize) # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. # So, return mock_optimizer itself for deepcopy. mock_optimizer.__deepcopy__ = lambda _: mock_optimizer return mock_optimizer def _assert_checkpoint(self, expected_global_step, expected_age_weight=None, expected_bias=None): shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([1, 1], shapes[AGE_WEIGHT_NAME]) if expected_age_weight is not None: self.assertEqual(expected_age_weight, tf.train.load_variable(self._model_dir, AGE_WEIGHT_NAME)) self.assertEqual([1], shapes[BIAS_NAME]) if expected_bias is not None: self.assertEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def testFromScratchWithDefaultOptimizer(self): # Create LinearRegressor. label = 5. age = 17 linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self._assert_checkpoint(num_steps) def testTrainWithOneDimLabel(self): label_dimension = 1 batch_size = 20 feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))] est = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(200) def testTrainWithOneDimWeight(self): label_dimension = 1 batch_size = 20 feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))] est = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, weight_column='w', model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_1 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(200) def testFromScratch(self): # Create LinearRegressor. label = 5. age = 17 # loss = (logits - label)^2 = (0 - 5.)^2 = 25. mock_optimizer = self._mock_optimizer(expected_loss=25.) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( expected_global_step=num_steps, expected_age_weight=0., expected_bias=0.) def testFromCheckpoint(self): # Create initial checkpoint. age_weight = 10.0 bias = 5.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([[age_weight]], name=AGE_WEIGHT_NAME) tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias = 17 * 10. + 5. = 175 # loss = (logits - label)^2 = (175 - 5)^2 = 28900 mock_optimizer = self._mock_optimizer(expected_loss=28900.) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({ 'age': ((17,),) }, ((5.,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias) def testFromCheckpointMultiBatch(self): # Create initial checkpoint. age_weight = 10.0 bias = 5.0 initial_global_step = 100 with tf.Graph().as_default(): tf.Variable([[age_weight]], name=AGE_WEIGHT_NAME) tf.Variable([bias], name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias # logits[0] = 17 * 10. + 5. = 175 # logits[1] = 15 * 10. + 5. = 155 # loss = sum(logits - label)^2 = (175 - 5)^2 + (155 - 3)^2 = 52004 mock_optimizer = self._mock_optimizer(expected_loss=52004.) linear_regressor = self._linear_regressor_fn( feature_columns=(self._fc_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({ 'age': ((17,), (15,)) }, ((5.,), (3.,))), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias) class BaseLinearClassifierTrainingTest(object): def __init__(self, linear_classifier_fn, fc_lib=feature_column): self._linear_classifier_fn = linear_classifier_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _mock_optimizer(self, expected_loss=None): expected_var_names = [ '%s/part_0:0' % AGE_WEIGHT_NAME, '%s/part_0:0' % BIAS_NAME ] def _minimize(loss, global_step): trainable_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self.assertItemsEqual(expected_var_names, [var.name for var in trainable_vars]) # Verify loss. We can't check the value directly, so we add an assert op. self.assertEquals(0, loss.shape.ndims) if expected_loss is None: return tf.compat.v1.assign_add(global_step, 1).op assert_loss = assert_close( tf.cast(expected_loss, name='expected', dtype=tf.dtypes.float32), loss, name='assert_loss') with tf.control_dependencies((assert_loss,)): return tf.compat.v1.assign_add(global_step, 1).op mock_optimizer = tf.compat.v1.test.mock.NonCallableMock( spec=tf.compat.v1.train.Optimizer, wraps=tf.compat.v1.train.Optimizer( use_locking=False, name='my_optimizer')) mock_optimizer.minimize = tf.compat.v1.test.mock.MagicMock(wraps=_minimize) # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks. # So, return mock_optimizer itself for deepcopy. mock_optimizer.__deepcopy__ = lambda _: mock_optimizer return mock_optimizer def _assert_checkpoint(self, n_classes, expected_global_step, expected_age_weight=None, expected_bias=None): logits_dimension = n_classes if n_classes > 2 else 1 shapes = { name: shape for (name, shape) in tf.train.list_variables(self._model_dir) } self.assertEqual([], shapes[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertEqual( expected_global_step, tf.train.load_variable(self._model_dir, tf.compat.v1.GraphKeys.GLOBAL_STEP)) self.assertEqual([1, logits_dimension], shapes[AGE_WEIGHT_NAME]) if expected_age_weight is not None: self.assertAllEqual( expected_age_weight, tf.train.load_variable(self._model_dir, AGE_WEIGHT_NAME)) self.assertEqual([logits_dimension], shapes[BIAS_NAME]) if expected_bias is not None: self.assertAllEqual(expected_bias, tf.train.load_variable(self._model_dir, BIAS_NAME)) def _testFromScratchWithDefaultOptimizer(self, n_classes): label = 0 age = 17 est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self._assert_checkpoint(n_classes, num_steps) def testBinaryClassesFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=2) def testMultiClassesFromScratchWithDefaultOptimizer(self): self._testFromScratchWithDefaultOptimizer(n_classes=4) def _testTrainWithTwoDimsLabel(self, n_classes): batch_size = 20 est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) data_rank_2 = np.array([[0], [1]]) self.assertEqual((2,), data_rank_1.shape) self.assertEqual((2, 1), data_rank_2.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_2, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithTwoDimsLabel(self): self._testTrainWithTwoDimsLabel(n_classes=2) def testMultiClassesTrainWithTwoDimsLabel(self): self._testTrainWithTwoDimsLabel(n_classes=4) def _testTrainWithOneDimLabel(self, n_classes): batch_size = 20 est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) self.assertEqual((2,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithOneDimLabel(self): self._testTrainWithOneDimLabel(n_classes=2) def testMultiClassesTrainWithOneDimLabel(self): self._testTrainWithOneDimLabel(n_classes=4) def _testTrainWithTwoDimsWeight(self, n_classes): batch_size = 20 est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), weight_column='w', n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) data_rank_2 = np.array([[0], [1]]) self.assertEqual((2,), data_rank_1.shape) self.assertEqual((2, 1), data_rank_2.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_2 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithTwoDimsWeight(self): self._testTrainWithTwoDimsWeight(n_classes=2) def testMultiClassesTrainWithTwoDimsWeight(self): self._testTrainWithTwoDimsWeight(n_classes=4) def _testTrainWithOneDimWeight(self, n_classes): batch_size = 20 est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), weight_column='w', n_classes=n_classes, model_dir=self._model_dir) data_rank_1 = np.array([0, 1]) self.assertEqual((2,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={ 'age': data_rank_1, 'w': data_rank_1 }, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(n_classes, 200) def testBinaryClassesTrainWithOneDimWeight(self): self._testTrainWithOneDimWeight(n_classes=2) def testMultiClassesTrainWithOneDimWeight(self): self._testTrainWithOneDimWeight(n_classes=4) def _testFromScratch(self, n_classes): label = 1 age = 17 # For binary classifier: # loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are # all zero initially) and label = 1 so, # loss = 1 * -log ( sigmoid(logits) ) = 0.69315 # For multi class classifier: # loss = cross_entropy(logits, label) where logits are all 0s (weights are # all zero initially) and label = 1 so, # loss = 1 * -log ( 1.0 / n_classes ) # For this particular test case, as logits are same, the formular # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases. mock_optimizer = self._mock_optimizer( expected_loss=(-1 * math.log(1.0 / n_classes))) est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( n_classes, expected_global_step=num_steps, expected_age_weight=[[0.]] if n_classes == 2 else [[0.] * n_classes], expected_bias=[0.] if n_classes == 2 else [.0] * n_classes) def testBinaryClassesFromScratch(self): self._testFromScratch(n_classes=2) def testMultiClassesFromScratch(self): self._testFromScratch(n_classes=4) def _testFromCheckpoint(self, n_classes): # Create initial checkpoint. label = 1 age = 17 # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[2.0]] if n_classes == 2 else (np.reshape( 2.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # For binary classifier: # logits = age * age_weight + bias = 17 * 2. - 35. = -1. # loss = sigmoid_cross_entropy(logits, label) # so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133 # For multi class classifier: # loss = cross_entropy(logits, label) # where logits = 17 * age_weight + bias and label = 1 # so, loss = 1 * -log ( soft_max(logits)[1] ) if n_classes == 2: expected_loss = 1.3133 else: logits = age_weight * age + bias logits_exp = np.exp(logits) softmax = logits_exp / logits_exp.sum() expected_loss = -1 * math.log(softmax[0, label]) mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( n_classes, expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias) def testBinaryClassesFromCheckpoint(self): self._testFromCheckpoint(n_classes=2) def testMultiClassesFromCheckpoint(self): self._testFromCheckpoint(n_classes=4) def _testFromCheckpointFloatLabels(self, n_classes): """Tests float labels for binary classification.""" # Create initial checkpoint. if n_classes > 2: return label = 0.8 age = 17 age_weight = [[2.0]] bias = [-35.0] initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias = 17 * 2. - 35. = -1. # loss = sigmoid_cross_entropy(logits, label) # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617 mock_optimizer = self._mock_optimizer(expected_loss=1.1132617) est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) def testBinaryClassesFromCheckpointFloatLabels(self): self._testFromCheckpointFloatLabels(n_classes=2) def testMultiClassesFromCheckpointFloatLabels(self): self._testFromCheckpointFloatLabels(n_classes=4) def _testFromCheckpointMultiBatch(self, n_classes): # Create initial checkpoint. label = [1, 0] age = [17.0, 18.5] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[2.0]] if n_classes == 2 else (np.reshape( 2.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) # For binary classifier: # logits = age * age_weight + bias # logits[0] = 17 * 2. - 35. = -1. # logits[1] = 18.5 * 2. - 35. = 2. # loss = sigmoid_cross_entropy(logits, label) # so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133 # loss[1] = (1 - 0) * -log ( 1- sigmoid(2) ) = 2.1269 # expected_loss = loss[0] + loss[1] # For multi class classifier: # loss = cross_entropy(logits, label) # where logits = [17, 18.5] * age_weight + bias and label = [1, 0] # so, loss = 1 * -log ( soft_max(logits)[label] ) # expected_loss = loss[0] + loss[1] if n_classes == 2: expected_loss = 1.3133 + 2.1269 else: logits = age_weight * np.reshape(age, (2, 1)) + bias logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) expected_loss = expected_loss_0 + expected_loss_1 mock_optimizer = self._mock_optimizer(expected_loss=expected_loss) est = linear.LinearClassifier( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 est.train(input_fn=lambda: ({'age': (age)}, (label)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( n_classes, expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias) def testBinaryClassesFromCheckpointMultiBatch(self): self._testFromCheckpointMultiBatch(n_classes=2) def testMultiClassesFromCheckpointMultiBatch(self): self._testFromCheckpointMultiBatch(n_classes=4) class BaseLinearClassifierEvaluationTest(object): def __init__(self, linear_classifier_fn, fc_lib=feature_column): self._linear_classifier_fn = linear_classifier_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _test_evaluation_for_simple_data(self, n_classes): label = 1 age = 1. # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[-11.0]] if n_classes == 2 else (np.reshape( -11.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-30.0] if n_classes == 2 else [-30.0] * n_classes with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( 100, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = self._linear_classifier_fn( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': ((age,),) }, ((label,),)), steps=1) if n_classes == 2: # Binary classes: loss = sum(corss_entropy(41)) = 41. expected_metrics = { metric_keys.MetricKeys.LOSS: 41., tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: 41., metric_keys.MetricKeys.ACCURACY: 0., metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0., metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, metric_keys.MetricKeys.AUC: 0., metric_keys.MetricKeys.AUC_PR: 1., } else: # Multi classes: loss = 1 * -log ( soft_max(logits)[label] ) logits = age_weight * age + bias logits_exp = np.exp(logits) softmax = logits_exp / logits_exp.sum() expected_loss = -1 * math.log(softmax[0, label]) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.ACCURACY: 0., } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_for_simple_data(self): self._test_evaluation_for_simple_data(n_classes=2) def test_multi_classes_evaluation_for_simple_data(self): self._test_evaluation_for_simple_data(n_classes=4) def _test_evaluation_batch(self, n_classes): """Tests evaluation for batch_size==2.""" label = [1, 0] age = [17., 18.] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[2.0]] if n_classes == 2 else (np.reshape( 2.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = self._linear_classifier_fn( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': (age) }, (label)), steps=1) if n_classes == 2: # Logits are (-1., 1.) labels are (1, 0). # Loss is # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 # loss for row 2: (1 - 0) * -log(1 - sigmoid(1)) = 1.3133 expected_loss = 1.3133 * 2 expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, metric_keys.MetricKeys.ACCURACY: 0., metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.5, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, metric_keys.MetricKeys.AUC: 0., metric_keys.MetricKeys.AUC_PR: 0.25, } else: # Multi classes: loss = 1 * -log ( soft_max(logits)[label] ) logits = age_weight * np.reshape(age, (2, 1)) + bias logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) expected_loss = expected_loss_0 + expected_loss_1 expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.ACCURACY: 0., } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_batch(self): self._test_evaluation_batch(n_classes=2) def test_multi_classes_evaluation_batch(self): self._test_evaluation_batch(n_classes=4) def _test_evaluation_weights(self, n_classes): """Tests evaluation with weights.""" label = [1, 0] age = [17., 18.] weights = [1., 2.] # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[2.0]] if n_classes == 2 else (np.reshape( 2.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes initial_global_step = 100 with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable( initial_global_step, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = self._linear_classifier_fn( feature_columns=(self._fc_lib.numeric_column('age'),), n_classes=n_classes, weight_column='w', model_dir=self._model_dir) eval_metrics = est.evaluate( input_fn=lambda: ({ 'age': (age), 'w': (weights) }, (label)), steps=1) if n_classes == 2: # Logits are (-1., 1.) labels are (1, 0). # Loss is # loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133 # loss for row 2: (1 - 0) * -log(1 - sigmoid(1)) = 1.3133 # weights = [1., 2.] expected_loss = 1.3133 * (1. + 2.) loss_mean = expected_loss / (1.0 + 2.0) label_mean = np.average(label, weights=weights) logits = [-1, 1] logistics = sigmoid(np.array(logits)) predictions_mean = np.average(logistics, weights=weights) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, metric_keys.MetricKeys.ACCURACY: 0., metric_keys.MetricKeys.PRECISION: 0., metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, metric_keys.MetricKeys.LABEL_MEAN: label_mean, metric_keys.MetricKeys.ACCURACY_BASELINE: (max(label_mean, 1 - label_mean)), metric_keys.MetricKeys.AUC: 0., metric_keys.MetricKeys.AUC_PR: 0.1668, } else: # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) logits = age_weight * np.reshape(age, (2, 1)) + bias logits_exp = np.exp(logits) softmax_row_0 = logits_exp[0] / logits_exp[0].sum() softmax_row_1 = logits_exp[1] / logits_exp[1].sum() expected_loss_0 = -1 * math.log(softmax_row_0[label[0]]) expected_loss_1 = -1 * math.log(softmax_row_1[label[1]]) loss_mean = np.average([expected_loss_0, expected_loss_1], weights=weights) expected_loss = loss_mean * np.sum(weights) expected_metrics = { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, tf.compat.v1.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.ACCURACY: 0., } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics), rtol=1e-3) def test_binary_classes_evaluation_weights(self): self._test_evaluation_weights(n_classes=2) def test_multi_classes_evaluation_weights(self): self._test_evaluation_weights(n_classes=4) class BaseLinearClassifierPredictTest(object): def __init__(self, linear_classifier_fn, fc_lib=feature_column): self._linear_classifier_fn = linear_classifier_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _testPredictions(self, n_classes, label_vocabulary, label_output_fn): """Tests predict when all variables are one-dimensional.""" age = 1. # For binary case, the expected weight has shape (1,1). For multi class # case, the shape is (1, n_classes). In order to test the weights, set # weights as 2.0 * range(n_classes). age_weight = [[-11.0]] if n_classes == 2 else (np.reshape( -11.0 * np.array(list(range(n_classes)), dtype=np.float32), (1, n_classes))) bias = [10.0] if n_classes == 2 else [10.0] * n_classes with tf.Graph().as_default(): tf.Variable(age_weight, name=AGE_WEIGHT_NAME) tf.Variable(bias, name=BIAS_NAME) tf.Variable(100, name='global_step', dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) est = self._linear_classifier_fn( feature_columns=(self._fc_lib.numeric_column('age'),), label_vocabulary=label_vocabulary, n_classes=n_classes, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'age': np.array([[age]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = list(est.predict(input_fn=predict_input_fn)) if n_classes == 2: scalar_logits = np.reshape(np.array(age_weight) * age + bias, (1,)).item() two_classes_logits = [0, scalar_logits] two_classes_logits_exp = np.exp(two_classes_logits) softmax = two_classes_logits_exp / two_classes_logits_exp.sum() expected_predictions = { 'class_ids': [0], 'all_class_ids': [0, 1], 'classes': [label_output_fn(0)], 'all_classes': [label_output_fn(0), label_output_fn(1)], 'logistic': [sigmoid(np.array(scalar_logits))], 'logits': [scalar_logits], 'probabilities': softmax, } else: onedim_logits = np.reshape(np.array(age_weight) * age + bias, (-1,)) class_ids = onedim_logits.argmax() all_class_ids = list(range(len(onedim_logits))) logits_exp = np.exp(onedim_logits) softmax = logits_exp / logits_exp.sum() expected_predictions = { 'class_ids': [class_ids], 'all_class_ids': all_class_ids, 'classes': [label_output_fn(class_ids)], 'all_classes': [label_output_fn(i) for i in all_class_ids], 'logits': onedim_logits, 'probabilities': softmax, } self.assertEqual(1, len(predictions)) # assertAllClose cannot handle byte type. self.assertEqual(expected_predictions['classes'], predictions[0]['classes']) expected_predictions.pop('classes') predictions[0].pop('classes') self.assertAllEqual(expected_predictions['all_classes'], predictions[0]['all_classes']) expected_predictions.pop('all_classes') predictions[0].pop('all_classes') self.assertAllClose( sorted_key_dict(expected_predictions), sorted_key_dict(predictions[0])) def testBinaryClassesWithoutLabelVocabulary(self): n_classes = 2 self._testPredictions( n_classes, label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def testBinaryClassesWithLabelVocabulary(self): n_classes = 2 self._testPredictions( n_classes, label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) def testMultiClassesWithoutLabelVocabulary(self): n_classes = 4 self._testPredictions( n_classes, label_vocabulary=None, label_output_fn=lambda x: ('%s' % x).encode()) def testMultiClassesWithLabelVocabulary(self): n_classes = 4 self._testPredictions( n_classes, label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)], label_output_fn=lambda x: ('class_vocab_%s' % x).encode()) def testSparseCombiner(self): w_a = 2.0 w_b = 3.0 w_c = 5.0 bias = 5.0 with tf.Graph().as_default(): tf.Variable([[w_a], [w_b], [w_c]], name=LANGUAGE_WEIGHT_NAME) tf.Variable([bias], name=BIAS_NAME) tf.Variable( 1, name=tf.compat.v1.GraphKeys.GLOBAL_STEP, dtype=tf.dtypes.int64) save_variables_to_ckpt(self._model_dir) def _input_fn(): return tf.compat.v1.data.Dataset.from_tensors({ 'language': tf.sparse.SparseTensor( values=['a', 'c', 'b', 'c'], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }) feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list( 'language', vocabulary_list=['a', 'b', 'c']),) # Check prediction for each sparse_combiner. # With sparse_combiner = 'sum', we have # logits_1 = w_a + w_c + bias # = 2.0 + 5.0 + 5.0 = 12.0 # logits_2 = w_b + w_c + bias # = 3.0 + 5.0 + 5.0 = 13.0 linear_classifier = self._linear_classifier_fn( feature_columns=feature_columns, model_dir=self._model_dir) predictions = linear_classifier.predict(input_fn=_input_fn) predicted_scores = list([x['logits'] for x in predictions]) self.assertAllClose([[12.0], [13.0]], predicted_scores) # With sparse_combiner = 'mean', we have # logits_1 = 1/2 * (w_a + w_c) + bias # = 1/2 * (2.0 + 5.0) + 5.0 = 8.5 # logits_2 = 1/2 * (w_b + w_c) + bias # = 1/2 * (3.0 + 5.0) + 5.0 = 9.0 linear_classifier = self._linear_classifier_fn( feature_columns=feature_columns, model_dir=self._model_dir, sparse_combiner='mean') predictions = linear_classifier.predict(input_fn=_input_fn) predicted_scores = list([x['logits'] for x in predictions]) self.assertAllClose([[8.5], [9.0]], predicted_scores) # With sparse_combiner = 'sqrtn', we have # logits_1 = sqrt(2)/2 * (w_a + w_c) + bias # = sqrt(2)/2 * (2.0 + 5.0) + 5.0 = 9.94974 # logits_2 = sqrt(2)/2 * (w_b + w_c) + bias # = sqrt(2)/2 * (3.0 + 5.0) + 5.0 = 10.65685 linear_classifier = self._linear_classifier_fn( feature_columns=feature_columns, model_dir=self._model_dir, sparse_combiner='sqrtn') predictions = linear_classifier.predict(input_fn=_input_fn) predicted_scores = list([x['logits'] for x in predictions]) self.assertAllClose([[9.94974], [10.65685]], predicted_scores) class BaseLinearClassifierIntegrationTest(object): def __init__(self, linear_classifier_fn, fc_lib=feature_column): self._linear_classifier_fn = linear_classifier_fn self._fc_lib = fc_lib def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, prediction_length): feature_columns = [ self._fc_lib.numeric_column('x', shape=(input_dimension,)) ] est = self._linear_classifier_fn( feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['classes'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, 1), predictions.shape) # EXPORT feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def _test_numpy_input_fn(self, n_classes): """Tests complete flow with numpy_input_fn.""" input_dimension = 4 batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) target = np.array([1] * batch_size) train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=target, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=target, batch_size=batch_size, num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) self._test_complete_flow( n_classes=n_classes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_numpy_input_fn(self): self._test_numpy_input_fn(n_classes=2) def test_multi_classes_numpy_input_fn(self): self._test_numpy_input_fn(n_classes=4) def _test_pandas_input_fn(self, n_classes): """Tests complete flow with pandas_input_fn.""" if not HAS_PANDAS: return # Pandas DataFrame natually supports 1 dim data only. input_dimension = 1 batch_size = 10 data = np.array([1., 2., 3., 4.], dtype=np.float32) target = np.array([1, 0, 1, 0], dtype=np.int32) x = pd.DataFrame({'x': data}) y = pd.Series(target) prediction_length = 4 train_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = pandas_io.pandas_input_fn( x=x, y=y, batch_size=batch_size, shuffle=False) predict_input_fn = pandas_io.pandas_input_fn( x=x, batch_size=batch_size, shuffle=False) self._test_complete_flow( n_classes=n_classes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_pandas_input_fn(self): self._test_pandas_input_fn(n_classes=2) def test_multi_classes_pandas_input_fn(self): self._test_pandas_input_fn(n_classes=4) def _test_input_fn_from_parse_example(self, n_classes): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 batch_size = 10 prediction_length = batch_size data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) target = np.array([1] * batch_size, dtype=np.int64) serialized_examples = [] for x, y in zip(data, target): example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=x)), 'y': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[y])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': tf.io.FixedLenFeature([input_dimension], tf.dtypes.float32), 'y': tf.io.FixedLenFeature([1], tf.dtypes.int64), } def _train_input_fn(): feature_map = tf.compat.v1.io.parse_example(serialized_examples, feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = tf.compat.v1.io.parse_example( tf.compat.v1.train.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow( n_classes=n_classes, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, prediction_length=prediction_length) def test_binary_classes_input_fn_from_parse_example(self): self._test_input_fn_from_parse_example(n_classes=2) def test_multi_classes_input_fn_from_parse_example(self): self._test_input_fn_from_parse_example(n_classes=4) class BaseLinearLogitFnTest(object): def __init__(self, fc_lib=feature_column): self._fc_lib = fc_lib def test_basic_logit_correctness(self): """linear_logit_fn simply wraps feature_column_lib.linear_model.""" age = self._fc_lib.numeric_column('age') with tf.Graph().as_default(): logit_fn = linear.linear_logit_fn_builder(units=2, feature_columns=[age]) logits = logit_fn(features={'age': [[23.], [31.]]}) bias_var = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, 'linear_model/bias_weights')[0] age_var = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, 'linear_model/age')[0] with tf.compat.v1.Session() as sess: sess.run([tf.compat.v1.initializers.global_variables()]) self.assertAllClose([[0., 0.], [0., 0.]], logits.eval()) sess.run(bias_var.assign([10., 5.])) self.assertAllClose([[10., 5.], [10., 5.]], logits.eval()) sess.run(age_var.assign([[2.0, 3.0]])) # [2 * 23 + 10, 3 * 23 + 5] = [56, 74]. # [2 * 31 + 10, 3 * 31 + 5] = [72, 98] self.assertAllClose([[56., 74.], [72., 98.]], logits.eval()) def test_compute_fraction_of_zero(self): """Tests the calculation of sparsity.""" if self._fc_lib != feature_column: return age = tf.feature_column.numeric_column('age') occupation = feature_column.categorical_column_with_hash_bucket( 'occupation', hash_bucket_size=5) with tf.Graph().as_default(): cols_to_vars = {} tf.compat.v1.feature_column.linear_model( features={ 'age': [[23.], [31.]], 'occupation': [['doctor'], ['engineer']] }, feature_columns=[age, occupation], units=3, cols_to_vars=cols_to_vars) cols_to_vars.pop('bias') fraction_zero = linear._compute_fraction_of_zero( list(cols_to_vars.values())) age_var = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, 'linear_model/age')[0] with tf.compat.v1.Session() as sess: sess.run([tf.compat.v1.initializers.global_variables()]) # Upon initialization, all variables will be zero. self.assertAllClose(1, fraction_zero.eval()) sess.run(age_var.assign([[2.0, 0.0, -1.0]])) # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets # x 3-dim output) are zero. self.assertAllClose(16. / 18., fraction_zero.eval()) def test_compute_fraction_of_zero_v2(self): """Tests the calculation of sparsity.""" if self._fc_lib != feature_column_v2: return age = tf.feature_column.numeric_column('age') occupation = tf.feature_column.categorical_column_with_hash_bucket( 'occupation', hash_bucket_size=5) with tf.Graph().as_default(): model = feature_column_v2.LinearModel( feature_columns=[age, occupation], units=3, name='linear_model') features = { 'age': [[23.], [31.]], 'occupation': [['doctor'], ['engineer']] } model(features) variables = model.variables variables.remove(model.bias) fraction_zero = linear._compute_fraction_of_zero(variables) age_var = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, 'linear_model/age')[0] with tf.compat.v1.Session() as sess: sess.run([tf.compat.v1.initializers.global_variables()]) # Upon initialization, all variables will be zero. self.assertAllClose(1, fraction_zero.eval()) sess.run(age_var.assign([[2.0, 0.0, -1.0]])) # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets # x 3-dim output) are zero. self.assertAllClose(16. / 18., fraction_zero.eval()) class BaseLinearWarmStartingTest(object): def __init__(self, _linear_classifier_fn, _linear_regressor_fn, fc_lib=feature_column): self._linear_classifier_fn = _linear_classifier_fn self._linear_regressor_fn = _linear_regressor_fn self._fc_lib = fc_lib def setUp(self): # Create a directory to save our old checkpoint and vocabularies to. self._ckpt_and_vocab_dir = tempfile.mkdtemp() # Make a dummy input_fn. def _input_fn(): features = { 'age': [[23.], [31.]], 'age_in_years': [[23.], [31.]], 'occupation': [['doctor'], ['consultant']] } return features, [0, 1] self._input_fn = _input_fn def tearDown(self): # Clean up checkpoint / vocab dir. tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._ckpt_and_vocab_dir) def test_classifier_basic_warm_starting(self): """Tests correctness of LinearClassifier default warm-start.""" age = self._fc_lib.numeric_column('age') # Create a LinearClassifier and train to save a checkpoint. linear_classifier = self._linear_classifier_fn( feature_columns=[age], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') linear_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_linear_classifier = self._linear_classifier_fn( feature_columns=[age], n_classes=4, optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=linear_classifier.model_dir) warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_linear_classifier.get_variable_names(): self.assertAllClose( linear_classifier.get_variable_value(variable_name), warm_started_linear_classifier.get_variable_value(variable_name)) def test_regressor_basic_warm_starting(self): """Tests correctness of LinearRegressor default warm-start.""" age = self._fc_lib.numeric_column('age') # Create a LinearRegressor and train to save a checkpoint. linear_regressor = self._linear_regressor_fn( feature_columns=[age], model_dir=self._ckpt_and_vocab_dir, optimizer='SGD') linear_regressor.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearRegressor, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_linear_regressor = self._linear_regressor_fn( feature_columns=[age], optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=linear_regressor.model_dir) warm_started_linear_regressor.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_linear_regressor.get_variable_names(): self.assertAllClose( linear_regressor.get_variable_value(variable_name), warm_started_linear_regressor.get_variable_value(variable_name)) def test_warm_starting_selective_variables(self): """Tests selecting variables to warm-start.""" age = self._fc_lib.numeric_column('age') # Create a LinearClassifier and train to save a checkpoint. linear_classifier = self._linear_classifier_fn( feature_columns=[age], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') linear_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_linear_classifier = self._linear_classifier_fn( feature_columns=[age], n_classes=4, optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), # The provided regular expression will only warm-start the age variable # and not the bias. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, vars_to_warm_start='.*(age).*')) warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1) self.assertAllClose( linear_classifier.get_variable_value(AGE_WEIGHT_NAME), warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME)) # Bias should still be zero from initialization. self.assertAllClose( [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME)) def test_warm_starting_with_vocab_remapping_and_partitioning(self): """Tests warm-starting with vocab remapping and partitioning.""" vocab_list = ['doctor', 'lawyer', 'consultant'] vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab') with open(vocab_file, 'w') as f: f.write('\n'.join(vocab_list)) occupation = self._fc_lib.categorical_column_with_vocabulary_file( 'occupation', vocabulary_file=vocab_file, vocabulary_size=len(vocab_list)) # Create a LinearClassifier and train to save a checkpoint. partitioner = tf.compat.v1.fixed_size_partitioner(num_shards=2) linear_classifier = self._linear_classifier_fn( feature_columns=[occupation], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD', partitioner=partitioner) linear_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). Use a new FeatureColumn with a # different vocabulary for occupation. new_vocab_list = ['doctor', 'consultant', 'engineer'] new_vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'new_occupation_vocab') with open(new_vocab_file, 'w') as f: f.write('\n'.join(new_vocab_list)) new_occupation = self._fc_lib.categorical_column_with_vocabulary_file( 'occupation', vocabulary_file=new_vocab_file, vocabulary_size=len(new_vocab_list)) # We can create our VocabInfo object from the new and old occupation # FeatureColumn's. occupation_vocab_info = estimator.VocabInfo( new_vocab=new_occupation.vocabulary_file, new_vocab_size=new_occupation.vocabulary_size, num_oov_buckets=new_occupation.num_oov_buckets, old_vocab=occupation.vocabulary_file, old_vocab_size=occupation.vocabulary_size, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. backup_initializer=tf.compat.v1.initializers.random_uniform( minval=0.39, maxval=0.39)) warm_started_linear_classifier = self._linear_classifier_fn( feature_columns=[occupation], n_classes=4, optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, var_name_to_vocab_info={ OCCUPATION_WEIGHT_NAME: occupation_vocab_info }, # Explicitly providing None here will only warm-start variables # referenced in var_name_to_vocab_info (the bias will not be # warm-started). vars_to_warm_start=None), partitioner=partitioner) warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1) # 'doctor' was ID-0 and still ID-0. self.assertAllClose( linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[0, :], warm_started_linear_classifier.get_variable_value( OCCUPATION_WEIGHT_NAME)[0, :]) # 'consultant' was ID-2 and now ID-1. self.assertAllClose( linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[2, :], warm_started_linear_classifier.get_variable_value( OCCUPATION_WEIGHT_NAME)[1, :]) # 'engineer' is a new entry and should be initialized with the # backup_initializer in VocabInfo. self.assertAllClose([0.39] * 4, warm_started_linear_classifier.get_variable_value( OCCUPATION_WEIGHT_NAME)[2, :]) # Bias should still be zero (from initialization logic). self.assertAllClose( [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME)) def test_warm_starting_with_naming_change(self): """Tests warm-starting with a Tensor name remapping.""" age_in_years = self._fc_lib.numeric_column('age_in_years') # Create a LinearClassifier and train to save a checkpoint. linear_classifier = self._linear_classifier_fn( feature_columns=[age_in_years], model_dir=self._ckpt_and_vocab_dir, n_classes=4, optimizer='SGD') linear_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second LinearClassifier, warm-started from the first. Use a # learning_rate = 0.0 optimizer to check values (use SGD so we don't have # accumulator values that change). warm_started_linear_classifier = self._linear_classifier_fn( feature_columns=[self._fc_lib.numeric_column('age')], n_classes=4, optimizer=tf.compat.v1.train.GradientDescentOptimizer( learning_rate=0.0), # The 'age' variable correspond to the 'age_in_years' variable in the # previous model. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, var_name_to_prev_var_name={ AGE_WEIGHT_NAME: AGE_WEIGHT_NAME.replace('age', 'age_in_years') })) warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1) self.assertAllClose( linear_classifier.get_variable_value( AGE_WEIGHT_NAME.replace('age', 'age_in_years')), warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME)) # The bias is also warm-started (with no name remapping). self.assertAllClose( linear_classifier.get_variable_value(BIAS_NAME), warm_started_linear_classifier.get_variable_value(BIAS_NAME)) ================================================ FILE: tensorflow_estimator/python/estimator/distribute_strategy_estimator_integration_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests that show that DistributionStrategy works with canned Estimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import shutil import tempfile from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator import run_config from tensorflow_estimator.python.estimator import training from tensorflow_estimator.python.estimator.canned import dnn_linear_combined from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export_lib as export from tensorflow_estimator.python.estimator.inputs import numpy_io class DNNLinearCombinedClassifierIntegrationTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def dataset_input_fn(self, x, y, batch_size, shuffle): def input_fn(): dataset = tf.compat.v1.data.Dataset.from_tensor_slices((x, y)) if shuffle: dataset = dataset.shuffle(batch_size) dataset = dataset.repeat(10).batch(batch_size) return dataset return input_fn @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( mode=['graph'], distribution=[ tf.compat.v2.__internal__.distribute.combinations.one_device_strategy, tf.compat.v2.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, tf.compat.v2.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus ], use_train_and_evaluate=[True, False])) def test_estimator_with_strategy_hooks(self, distribution, use_train_and_evaluate): config = run_config.RunConfig(eval_distribute=distribution) def _input_map_fn(tensor): return {'feature': tensor}, tensor def input_fn(): return tf.data.Dataset.from_tensors( [1.]).repeat(10).batch(5).map(_input_map_fn) def model_fn(features, labels, mode): del features, labels global_step = tf.compat.v1.train.get_global_step() if mode == model_fn_lib.ModeKeys.TRAIN: train_hook1 = tf.compat.v1.train.StepCounterHook( every_n_steps=1, output_dir=self.get_temp_dir()) train_hook2 = tf.compat.v1.test.mock.MagicMock( wraps=tf.compat.v1.train.SessionRunHook(), spec=tf.compat.v1.train.SessionRunHook) return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(1.), train_op=global_step.assign_add(1), training_hooks=[train_hook1, train_hook2]) if mode == model_fn_lib.ModeKeys.EVAL: eval_hook1 = tf.compat.v1.train.StepCounterHook( every_n_steps=1, output_dir=self.get_temp_dir()) eval_hook2 = tf.compat.v1.test.mock.MagicMock( wraps=tf.compat.v1.train.SessionRunHook(), spec=tf.compat.v1.train.SessionRunHook) return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(1.), evaluation_hooks=[eval_hook1, eval_hook2]) num_steps = 10 estimator = estimator_lib.EstimatorV2( model_fn=model_fn, model_dir=self.get_temp_dir(), config=config) if use_train_and_evaluate: training.train_and_evaluate( estimator, training.TrainSpec(input_fn, max_steps=num_steps), training.EvalSpec(input_fn)) else: estimator.train(input_fn, steps=num_steps) estimator.evaluate(input_fn, steps=num_steps) @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( mode=['graph'], distribution=[ tf.compat.v2.__internal__.distribute.combinations.one_device_strategy, tf.compat.v2.__internal__.distribute.combinations.mirrored_strategy_with_gpu_and_cpu, tf.compat.v2.__internal__.distribute.combinations.mirrored_strategy_with_two_gpus ], use_train_and_evaluate=[True, False])) def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer='Adagrad', linear_optimizer='Adagrad', config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', scores) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = tf.compat.v1.feature_column.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) def tearDown(self): if self._model_dir: tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._model_dir) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/distribute_strategy_estimator_training_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests that show Distribute Coordinator works with Estimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import glob import json import os import sys import tempfile from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow.python.distribute import distribute_coordinator as dc from tensorflow.python.distribute import estimator_training as dc_training from tensorflow.python.distribute import multi_worker_test_base from tensorflow.python.distribute import multi_worker_util from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver from tensorflow.python.distribute.distribute_config import DistributeConfig from tensorflow.python.eager import context from tensorflow_estimator.python.estimator import exporter as exporter_lib from tensorflow_estimator.python.estimator import run_config as run_config_lib from tensorflow_estimator.python.estimator import training as estimator_training from tensorflow_estimator.python.estimator.canned import dnn_linear_combined from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export as export_lib BATCH_SIZE = 10 LABEL_DIMENSION = 2 DATA = np.linspace( 0., 2., BATCH_SIZE * LABEL_DIMENSION, dtype=np.float32).reshape(BATCH_SIZE, LABEL_DIMENSION) EVAL_NAME = "foo" EXPORTER_NAME = "saved_model_exporter" MAX_STEPS = 10 CHIEF = dc._TaskType.CHIEF EVALUATOR = dc._TaskType.EVALUATOR WORKER = dc._TaskType.WORKER PS = dc._TaskType.PS original_run_std_server = dc._run_std_server class DistributeCoordinatorIntegrationTest( multi_worker_test_base.IndependentWorkerTestBase, parameterized.TestCase): @classmethod def setUpClass(cls): """Create a local cluster with 2 workers.""" super(DistributeCoordinatorIntegrationTest, cls).setUpClass() cls._cluster_spec = multi_worker_test_base.create_in_process_cluster( num_workers=3, num_ps=2, has_eval=True) def setUp(self): self._model_dir = tempfile.mkdtemp() super(DistributeCoordinatorIntegrationTest, self).setUp() def dataset_input_fn(self, x, y, batch_size, shuffle): def input_fn(): dataset = tf.compat.v1.data.Dataset.from_tensor_slices((x, y)) if shuffle: dataset = dataset.shuffle(batch_size) dataset = dataset.repeat(100).batch(batch_size) return dataset return input_fn def _get_exporter(self, name, fc): feature_spec = tf.compat.v1.feature_column.make_parse_example_spec(fc) serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) return exporter_lib.LatestExporter( name, serving_input_receiver_fn=serving_input_receiver_fn) def _extract_loss_and_global_step(self, event_folder): """Returns the loss and global step in last event.""" event_paths = glob.glob(os.path.join(event_folder, "events*")) self.assertNotEmpty( event_paths, msg="Event file not found in dir %s" % event_folder) loss = None global_step_count = None for e in tf.compat.v1.train.summary_iterator(event_paths[-1]): current_loss = None for v in e.summary.value: if v.tag == "loss": current_loss = v.simple_value # If loss is not found, global step is meaningless. if current_loss is None: continue current_global_step = e.step if global_step_count is None or current_global_step > global_step_count: global_step_count = current_global_step loss = current_loss return (loss, global_step_count) def _get_estimator(self, train_distribute, eval_distribute, remote_cluster=None): input_dimension = LABEL_DIMENSION linear_feature_columns = [ tf.compat.v1.feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ tf.compat.v1.feature_column.numeric_column("x", shape=(input_dimension,)) ] return dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=LABEL_DIMENSION, model_dir=self._model_dir, dnn_optimizer="Adagrad", linear_optimizer="Adagrad", config=run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=train_distribute, eval_distribute=eval_distribute, remote_cluster=remote_cluster))) def _complete_flow(self, train_distribute, eval_distribute, remote_cluster=None, use_train_and_evaluate=True): estimator = self._get_estimator(train_distribute, eval_distribute, remote_cluster) input_dimension = LABEL_DIMENSION train_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=BATCH_SIZE // train_distribute.num_replicas_in_sync, shuffle=True) if eval_distribute: eval_batch_size = BATCH_SIZE // eval_distribute.num_replicas_in_sync else: eval_batch_size = BATCH_SIZE eval_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=eval_batch_size, shuffle=False) linear_feature_columns = [ tf.compat.v1.feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ tf.compat.v1.feature_column.numeric_column("x", shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns eval_spec = estimator_training.EvalSpec( name=EVAL_NAME, input_fn=eval_input_fn, steps=None, exporters=self._get_exporter(EXPORTER_NAME, feature_columns), start_delay_secs=0, throttle_secs=1) if use_train_and_evaluate: estimator_training.train_and_evaluate( estimator, estimator_training.TrainSpec(train_input_fn, max_steps=MAX_STEPS), eval_spec) else: estimator.train(train_input_fn, max_steps=MAX_STEPS) latest_ckpt_path = estimator.latest_checkpoint() metrics = estimator.evaluate( eval_input_fn, checkpoint_path=latest_ckpt_path, name=EVAL_NAME) # Export the eval result to files. eval_result = estimator_training._EvalResult( status=estimator_training._EvalStatus.EVALUATED, metrics=metrics, checkpoint_path=latest_ckpt_path) evaluator = estimator_training._TrainingExecutor._Evaluator( estimator, eval_spec, None) evaluator._export_eval_result(eval_result, True) return estimator def _inspect_train_and_eval_events(self, estimator): # Make sure nothing is stuck in limbo. tf.compat.v1.summary.FileWriterCache.clear() # Examine the training events. Use a range to check global step to avoid # flakyness due to global step race condition. training_loss, _ = self._extract_loss_and_global_step(self._model_dir) self.assertIsNotNone(training_loss) # Examine the eval events. The global step should be accurate. eval_dir = os.path.join(self._model_dir, "eval_" + EVAL_NAME) eval_loss, eval_global_step = self._extract_loss_and_global_step( event_folder=eval_dir) self.assertIsNotNone(eval_loss) self.assertGreaterEqual(eval_global_step, MAX_STEPS) # Examine the export folder. export_dir = os.path.join( os.path.join(self._model_dir, "export"), EXPORTER_NAME) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) # Examine the ckpt for predict. def predict_input_fn(): return tf.compat.v1.data.Dataset.from_tensor_slices({"x": DATA}).batch(BATCH_SIZE) predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((BATCH_SIZE, LABEL_DIMENSION), predicted_proba.shape) def _make_cross_device_ops(self, num_gpus_per_worker): return tf.distribute.ReductionToOneDevice() def _get_strategy_object(self, strategy_cls, cluster_spec=None, eval_strategy=False): if strategy_cls == tf.compat.v1.distribute.MirroredStrategy: if eval_strategy: return strategy_cls() else: return strategy_cls( cross_device_ops=self._make_cross_device_ops( num_gpus_per_worker=context.num_gpus())) elif (strategy_cls == tf.compat.v1.distribute.MirroredStrategy and not eval_strategy): return strategy_cls( num_gpus_per_worker=context.num_gpus(), cross_device_ops=self._make_cross_device_ops( num_gpus_per_worker=context.num_gpus())) elif strategy_cls == tf.compat.v1.distribute.experimental.ParameterServerStrategy: assert cluster_spec is not None cluster_resolver = SimpleClusterResolver( cluster_spec=multi_worker_util.normalize_cluster_spec(cluster_spec), task_type="ps", task_id=0, num_accelerators={"GPU": context.num_gpus()}) return strategy_cls(cluster_resolver) elif strategy_cls == tf.compat.v1.distribute.experimental.CentralStorageStrategy: return strategy_cls._from_num_gpus(context.num_gpus()) else: return strategy_cls() @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( mode=["graph"], train_distribute_cls=[ tf.compat.v1.distribute.experimental.MultiWorkerMirroredStrategy, tf.compat.v1.distribute.MirroredStrategy, tf.compat.v1.distribute.experimental.ParameterServerStrategy ], eval_distribute_cls=[ None, tf.compat.v1.distribute.MirroredStrategy, tf.compat.v2.distribute.experimental.CentralStorageStrategy, tf.compat.v1.distribute.experimental.MultiWorkerMirroredStrategy, ], required_gpus=[0, 1])) def test_complete_flow_standalone_client(self, train_distribute_cls, eval_distribute_cls): cluster_spec = copy.deepcopy(self._cluster_spec) if (train_distribute_cls != tf.compat.v1.distribute.experimental.ParameterServerStrategy): cluster_spec.pop("ps", None) train_distribute = self._get_strategy_object( train_distribute_cls, cluster_spec=cluster_spec) if eval_distribute_cls: eval_distribute = self._get_strategy_object( eval_distribute_cls, eval_strategy=True) else: eval_distribute = None estimator = self._complete_flow(train_distribute, eval_distribute, cluster_spec) self._inspect_train_and_eval_events(estimator) @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( mode=["graph"], eval_distribute_class=[ None, tf.compat.v1.distribute.MirroredStrategy, tf.compat.v2.distribute.experimental.CentralStorageStrategy, ], required_gpus=[0, 1])) def test_complete_flow_standalone_client_collective_nccl( self, eval_distribute_class): train_distribute = ( tf.compat.v1.distribute.experimental.MultiWorkerMirroredStrategy( communication=tf.compat.v1.distribute.experimental.CollectiveCommunication .NCCL)) if eval_distribute_class: eval_distribute = self._get_strategy_object( eval_distribute_class, eval_strategy=True) else: eval_distribute = None cluster_spec = copy.deepcopy(self._cluster_spec) cluster_spec.pop("ps", None) estimator = self._complete_flow(train_distribute, eval_distribute, cluster_spec) self._inspect_train_and_eval_events(estimator) @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( mode=["graph"], train_distribute_cls=[ tf.compat.v1.distribute.MirroredStrategy, ], eval_distribute_cls=[ None, tf.compat.v1.distribute.MirroredStrategy, ], required_gpus=[0, 1])) def test_estimator_standalone_client(self, train_distribute_cls, eval_distribute_cls): train_distribute = self._get_strategy_object(train_distribute_cls) if eval_distribute_cls: eval_distribute = self._get_strategy_object(eval_distribute_cls) else: eval_distribute = None # We use the whole cluster for evaluation. cluster = copy.deepcopy(self._cluster_spec) cluster.pop("evaluator", None) estimator = self._complete_flow( train_distribute, eval_distribute, remote_cluster=cluster, use_train_and_evaluate=False) self._inspect_train_and_eval_events(estimator) def _mock_run_std_server(self, *args, **kwargs): ret = original_run_std_server(*args, **kwargs) # Wait for all std servers to be brought up in order to reduce the chance of # remote sessions taking local ports that have been assigned to std servers. self._barrier.wait() return ret def _independent_worker_fn( self, train_distribute, eval_distribute, ): train_distribute = copy.deepcopy(train_distribute) eval_distribute = copy.deepcopy(eval_distribute) with tf.compat.v1.test.mock.patch.object(dc, "_run_std_server", self._mock_run_std_server): self._complete_flow(train_distribute, eval_distribute) @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( mode=["graph"], train_distribute_cls=[ tf.compat.v1.distribute.experimental.MultiWorkerMirroredStrategy, tf.compat.v1.distribute.experimental.ParameterServerStrategy, ], eval_distribute_cls=[ None, tf.compat.v1.distribute.MirroredStrategy, tf.compat.v2.distribute.experimental.CentralStorageStrategy, tf.compat.v1.distribute.experimental.MultiWorkerMirroredStrategy, ], required_gpus=[0, 1])) def test_complete_flow_independent_worker_between_graph( self, train_distribute_cls, eval_distribute_cls): if (context.num_gpus() < 2 and eval_distribute_cls == tf.compat.v1.distribute.experimental.MultiWorkerMirroredStrategy): self.skipTest("`CollectiveAllReduceStrategy` needs at least two towers.") if (train_distribute_cls == tf.compat.v1.distribute.experimental.ParameterServerStrategy): cluster_spec = tf.compat.v2.__internal__.distribute.multi_process_runner.create_cluster_spec( num_workers=3, num_ps=2, has_eval=True) # 3 workers, 2 ps. self._barrier = dc._Barrier(5) else: cluster_spec = tf.compat.v2.__internal__.distribute.multi_process_runner.create_cluster_spec( num_workers=3, num_ps=0, has_eval=True) # 3 workers. self._barrier = dc._Barrier(3) train_distribute = self._get_strategy_object( train_distribute_cls, cluster_spec=cluster_spec) if eval_distribute_cls: eval_distribute = self._get_strategy_object( eval_distribute_cls, eval_strategy=True) else: eval_distribute = None threads = self.run_multiple_tasks_in_threads(self._independent_worker_fn, cluster_spec, train_distribute, eval_distribute) threads_to_join = [] for task_type, ts in threads.items(): if task_type == PS: continue for t in ts: threads_to_join.append(t) self.join_independent_workers(threads_to_join) estimator = self._get_estimator(train_distribute, eval_distribute) self._inspect_train_and_eval_events(estimator) @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( mode=["graph"], train_distribute_cls=[ tf.compat.v1.distribute.MirroredStrategy, ], eval_distribute_cls=[ None, tf.compat.v1.distribute.MirroredStrategy, ], required_gpus=[0, 1])) def test_complete_flow_independent_worker_in_graph(self, train_distribute_cls, eval_distribute_cls): train_distribute = self._get_strategy_object(train_distribute_cls) if eval_distribute_cls: eval_distribute = self._get_strategy_object( eval_distribute_cls, eval_strategy=True) else: eval_distribute = None cluster_spec = tf.compat.v2.__internal__.distribute.multi_process_runner.create_cluster_spec( num_workers=3, num_ps=0, has_eval=True) # 3 workers. self._barrier = dc._Barrier(3) threads = self.run_multiple_tasks_in_threads(self._independent_worker_fn, cluster_spec, train_distribute, eval_distribute) self.join_independent_workers([threads[WORKER][0], threads[EVALUATOR][0]]) estimator = self._get_estimator(train_distribute, eval_distribute) self._inspect_train_and_eval_events(estimator) TF_CONFIG_WITH_CHIEF = { "cluster": { "chief": ["fake_chief"], }, "task": { "type": "chief", "index": 0 } } TF_CONFIG_WITH_MASTER = { "cluster": { "master": ["fake_master"], }, "task": { "type": "master", "index": 0 } } TF_CONFIG_WITHOUT_TASK = {"cluster": {"chief": ["fake_worker"]}} class RunConfigTest(tf.compat.v1.test.TestCase): def test_previously_unexpected_cluster_spec(self): with tf.compat.v1.test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITHOUT_TASK)}): run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]))) def test_should_run_distribute_coordinator(self): """Tests that should_run_distribute_coordinator return a correct value.""" # We don't use distribute coordinator for local training. self.assertFalse( dc_training.should_run_distribute_coordinator( run_config_lib.RunConfig())) # When `train_distribute` is not specified, don't use distribute # coordinator. with tf.compat.v1.test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_CHIEF)}): self.assertFalse( dc_training.should_run_distribute_coordinator( run_config_lib.RunConfig())) # When `train_distribute` is specified and TF_CONFIG is detected, use # distribute coordinator. with tf.compat.v1.test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_CHIEF)}): config_with_train_distribute = run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]))) config_with_eval_distribute = run_config_lib.RunConfig( experimental_distribute=DistributeConfig( eval_distribute=tf.compat.v1.distribute.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]))) self.assertTrue( dc_training.should_run_distribute_coordinator( config_with_train_distribute)) self.assertFalse( dc_training.should_run_distribute_coordinator( config_with_eval_distribute)) # With a master in the cluster, don't run distribute coordinator. with tf.compat.v1.test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_MASTER)}): config = run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy( ["/device:GPU:0", "/device:GPU:1"]))) self.assertFalse(dc_training.should_run_distribute_coordinator(config)) def test_init_run_config_duplicate_distribute(self): with self.assertRaises(ValueError): run_config_lib.RunConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy(), experimental_distribute=DistributeConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy())) with self.assertRaises(ValueError): run_config_lib.RunConfig( eval_distribute=tf.compat.v1.distribute.MirroredStrategy(), experimental_distribute=DistributeConfig( eval_distribute=tf.compat.v1.distribute.MirroredStrategy())) def test_init_run_config_none_distribute_coordinator_mode(self): # We don't use distribute coordinator for local training. config = run_config_lib.RunConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy()) dc_training.init_run_config(config, {}) self.assertIsNone(config._distribute_coordinator_mode) # With a master in the cluster, don't run distribute coordinator. with tf.compat.v1.test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_MASTER)}): config = run_config_lib.RunConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy()) self.assertIsNone(config._distribute_coordinator_mode) # When `train_distribute` is not specified, don't use distribute # coordinator. with tf.compat.v1.test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_CHIEF)}): config = run_config_lib.RunConfig() self.assertFalse(hasattr(config, "_distribute_coordinator_mode")) def test_init_run_config_independent_worker(self): # When `train_distribute` is specified and TF_CONFIG is detected, use # distribute coordinator with INDEPENDENT_WORKER mode. with tf.compat.v1.test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITH_CHIEF)}): config = run_config_lib.RunConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy()) self.assertEqual(config._distribute_coordinator_mode, dc.CoordinatorMode.INDEPENDENT_WORKER) def test_init_run_config_standalone_client(self): # When `train_distribute` is specified, TF_CONFIG is detected and # `experimental.remote_cluster` is set use distribute coordinator with # STANDALONE_CLIENT mode. config = run_config_lib.RunConfig( train_distribute=tf.compat.v1.distribute.MirroredStrategy(), experimental_distribute=DistributeConfig( remote_cluster={"chief": ["fake_worker"]})) self.assertEqual(config._distribute_coordinator_mode, dc.CoordinatorMode.STANDALONE_CLIENT) if __name__ == "__main__": # Reduce `recovery_wait_secs` from 30 seconds so the test completes quickly. orig_init = tf.compat.v1.train.SessionManager.__init__ def new_init(*args, **kwargs): kwargs.pop("recovery_wait_secs", None) kwargs["recovery_wait_secs"] = 0.5 orig_init(*args, **kwargs) tf.compat.v1.train.SessionManager.__init__ = new_init with tf.compat.v1.test.mock.patch.object(sys, "exit", os._exit): tf.compat.v1.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/early_stopping.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utilities for early stopping.""" import collections import operator import os import tensorflow as tf from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.util import tf_keras_v1 _EVENT_FILE_GLOB_PATTERN = 'events.out.tfevents.*' @estimator_export('estimator.experimental.make_early_stopping_hook') def make_early_stopping_hook(estimator, should_stop_fn, run_every_secs=60, run_every_steps=None): """Creates early-stopping hook. Returns a `SessionRunHook` that stops training when `should_stop_fn` returns `True`. Usage example: ```python estimator = ... hook = early_stopping.make_early_stopping_hook( estimator, should_stop_fn=make_stop_fn(...)) train_spec = tf.estimator.TrainSpec(..., hooks=[hook]) tf.estimator.train_and_evaluate(estimator, train_spec, ...) ``` Caveat: Current implementation supports early-stopping both training and evaluation in local mode. In distributed mode, training can be stopped but evaluation (where it's a separate job) will indefinitely wait for new model checkpoints to evaluate, so you will need other means to detect and stop it. Early-stopping evaluation in distributed mode requires changes in `train_and_evaluate` API and will be addressed in a future revision. Args: estimator: A `tf.estimator.Estimator` instance. should_stop_fn: `callable`, function that takes no arguments and returns a `bool`. If the function returns `True`, stopping will be initiated by the chief. run_every_secs: If specified, calls `should_stop_fn` at an interval of `run_every_secs` seconds. Defaults to 60 seconds. Either this or `run_every_steps` must be set. run_every_steps: If specified, calls `should_stop_fn` every `run_every_steps` steps. Either this or `run_every_secs` must be set. Returns: A `SessionRunHook` that periodically executes `should_stop_fn` and initiates early stopping if the function returns `True`. Raises: TypeError: If `estimator` is not of type `tf.estimator.Estimator`. ValueError: If both `run_every_secs` and `run_every_steps` are set. """ if not isinstance(estimator, estimator_lib.Estimator): raise TypeError('`estimator` must have type `tf.estimator.Estimator`. ' 'Got: {}'.format(type(estimator))) if run_every_secs is not None and run_every_steps is not None: raise ValueError('Only one of `run_every_secs` and `run_every_steps` must ' 'be set.') train_distribute = estimator.config.train_distribute mwms = ['CollectiveAllReduceStrategy', 'MultiWorkerMirroredStrategy'] if train_distribute and (train_distribute.__class__.__name__.startswith( strategy) for strategy in mwms): if run_every_secs: raise ValueError('run_every_secs should not be set when using ' 'MultiWorkerMirroredStrategy.') return _MultiWorkerEarlyStoppingHook(should_stop_fn, run_every_steps) if estimator.config.is_chief: return _StopOnPredicateHook(should_stop_fn, run_every_secs, run_every_steps) else: return _CheckForStoppingHook() @estimator_export('estimator.experimental.stop_if_higher_hook') def stop_if_higher_hook(estimator, metric_name, threshold, eval_dir=None, min_steps=0, run_every_secs=60, run_every_steps=None): """Creates hook to stop if the given metric is higher than the threshold. Usage example: ```python estimator = ... # Hook to stop training if accuracy becomes higher than 0.9. hook = early_stopping.stop_if_higher_hook(estimator, "accuracy", 0.9) train_spec = tf.estimator.TrainSpec(..., hooks=[hook]) tf.estimator.train_and_evaluate(estimator, train_spec, ...) ``` Caveat: Current implementation supports early-stopping both training and evaluation in local mode. In distributed mode, training can be stopped but evaluation (where it's a separate job) will indefinitely wait for new model checkpoints to evaluate, so you will need other means to detect and stop it. Early-stopping evaluation in distributed mode requires changes in `train_and_evaluate` API and will be addressed in a future revision. Args: estimator: A `tf.estimator.Estimator` instance. metric_name: `str`, metric to track. "loss", "accuracy", etc. threshold: Numeric threshold for the given metric. eval_dir: If set, directory containing summary files with eval metrics. By default, `estimator.eval_dir()` will be used. min_steps: `int`, stop is never requested if global step is less than this value. Defaults to 0. run_every_secs: If specified, calls `should_stop_fn` at an interval of `run_every_secs` seconds. Defaults to 60 seconds. Either this or `run_every_steps` must be set. run_every_steps: If specified, calls `should_stop_fn` every `run_every_steps` steps. Either this or `run_every_secs` must be set. Returns: An early-stopping hook of type `SessionRunHook` that periodically checks if the given metric is higher than specified threshold and initiates early stopping if true. """ return _stop_if_threshold_crossed_hook( estimator=estimator, metric_name=metric_name, threshold=threshold, higher_is_better=True, eval_dir=eval_dir, min_steps=min_steps, run_every_secs=run_every_secs, run_every_steps=run_every_steps) @estimator_export('estimator.experimental.stop_if_lower_hook') def stop_if_lower_hook(estimator, metric_name, threshold, eval_dir=None, min_steps=0, run_every_secs=60, run_every_steps=None): """Creates hook to stop if the given metric is lower than the threshold. Usage example: ```python estimator = ... # Hook to stop training if loss becomes lower than 100. hook = early_stopping.stop_if_lower_hook(estimator, "loss", 100) train_spec = tf.estimator.TrainSpec(..., hooks=[hook]) tf.estimator.train_and_evaluate(estimator, train_spec, ...) ``` Caveat: Current implementation supports early-stopping both training and evaluation in local mode. In distributed mode, training can be stopped but evaluation (where it's a separate job) will indefinitely wait for new model checkpoints to evaluate, so you will need other means to detect and stop it. Early-stopping evaluation in distributed mode requires changes in `train_and_evaluate` API and will be addressed in a future revision. Args: estimator: A `tf.estimator.Estimator` instance. metric_name: `str`, metric to track. "loss", "accuracy", etc. threshold: Numeric threshold for the given metric. eval_dir: If set, directory containing summary files with eval metrics. By default, `estimator.eval_dir()` will be used. min_steps: `int`, stop is never requested if global step is less than this value. Defaults to 0. run_every_secs: If specified, calls `should_stop_fn` at an interval of `run_every_secs` seconds. Defaults to 60 seconds. Either this or `run_every_steps` must be set. run_every_steps: If specified, calls `should_stop_fn` every `run_every_steps` steps. Either this or `run_every_secs` must be set. Returns: An early-stopping hook of type `SessionRunHook` that periodically checks if the given metric is lower than specified threshold and initiates early stopping if true. """ return _stop_if_threshold_crossed_hook( estimator=estimator, metric_name=metric_name, threshold=threshold, higher_is_better=False, eval_dir=eval_dir, min_steps=min_steps, run_every_secs=run_every_secs, run_every_steps=run_every_steps) @estimator_export('estimator.experimental.stop_if_no_increase_hook') def stop_if_no_increase_hook(estimator, metric_name, max_steps_without_increase, eval_dir=None, min_steps=0, run_every_secs=60, run_every_steps=None): """Creates hook to stop if metric does not increase within given max steps. Usage example: ```python estimator = ... # Hook to stop training if accuracy does not increase in over 100000 steps. hook = early_stopping.stop_if_no_increase_hook(estimator, "accuracy", 100000) train_spec = tf.estimator.TrainSpec(..., hooks=[hook]) tf.estimator.train_and_evaluate(estimator, train_spec, ...) ``` Caveat: Current implementation supports early-stopping both training and evaluation in local mode. In distributed mode, training can be stopped but evaluation (where it's a separate job) will indefinitely wait for new model checkpoints to evaluate, so you will need other means to detect and stop it. Early-stopping evaluation in distributed mode requires changes in `train_and_evaluate` API and will be addressed in a future revision. Args: estimator: A `tf.estimator.Estimator` instance. metric_name: `str`, metric to track. "loss", "accuracy", etc. max_steps_without_increase: `int`, maximum number of training steps with no increase in the given metric. eval_dir: If set, directory containing summary files with eval metrics. By default, `estimator.eval_dir()` will be used. min_steps: `int`, stop is never requested if global step is less than this value. Defaults to 0. run_every_secs: If specified, calls `should_stop_fn` at an interval of `run_every_secs` seconds. Defaults to 60 seconds. Either this or `run_every_steps` must be set. run_every_steps: If specified, calls `should_stop_fn` every `run_every_steps` steps. Either this or `run_every_secs` must be set. Returns: An early-stopping hook of type `SessionRunHook` that periodically checks if the given metric shows no increase over given maximum number of training steps, and initiates early stopping if true. """ return _stop_if_no_metric_improvement_hook( estimator=estimator, metric_name=metric_name, max_steps_without_improvement=max_steps_without_increase, higher_is_better=True, eval_dir=eval_dir, min_steps=min_steps, run_every_secs=run_every_secs, run_every_steps=run_every_steps) @estimator_export('estimator.experimental.stop_if_no_decrease_hook') def stop_if_no_decrease_hook(estimator, metric_name, max_steps_without_decrease, eval_dir=None, min_steps=0, run_every_secs=60, run_every_steps=None): """Creates hook to stop if metric does not decrease within given max steps. Usage example: ```python estimator = ... # Hook to stop training if loss does not decrease in over 100000 steps. hook = early_stopping.stop_if_no_decrease_hook(estimator, "loss", 100000) train_spec = tf.estimator.TrainSpec(..., hooks=[hook]) tf.estimator.train_and_evaluate(estimator, train_spec, ...) ``` Caveat: Current implementation supports early-stopping both training and evaluation in local mode. In distributed mode, training can be stopped but evaluation (where it's a separate job) will indefinitely wait for new model checkpoints to evaluate, so you will need other means to detect and stop it. Early-stopping evaluation in distributed mode requires changes in `train_and_evaluate` API and will be addressed in a future revision. Args: estimator: A `tf.estimator.Estimator` instance. metric_name: `str`, metric to track. "loss", "accuracy", etc. max_steps_without_decrease: `int`, maximum number of training steps with no decrease in the given metric. eval_dir: If set, directory containing summary files with eval metrics. By default, `estimator.eval_dir()` will be used. min_steps: `int`, stop is never requested if global step is less than this value. Defaults to 0. run_every_secs: If specified, calls `should_stop_fn` at an interval of `run_every_secs` seconds. Defaults to 60 seconds. Either this or `run_every_steps` must be set. run_every_steps: If specified, calls `should_stop_fn` every `run_every_steps` steps. Either this or `run_every_secs` must be set. Returns: An early-stopping hook of type `SessionRunHook` that periodically checks if the given metric shows no decrease over given maximum number of training steps, and initiates early stopping if true. """ return _stop_if_no_metric_improvement_hook( estimator=estimator, metric_name=metric_name, max_steps_without_improvement=max_steps_without_decrease, higher_is_better=False, eval_dir=eval_dir, min_steps=min_steps, run_every_secs=run_every_secs, run_every_steps=run_every_steps) def read_eval_metrics(eval_dir): """Helper to read eval metrics from eval summary files. Args: eval_dir: Directory containing summary files with eval metrics. Returns: A `dict` with global steps mapping to `dict` of metric names and values. """ eval_metrics_dict = collections.defaultdict(dict) for event in _summaries(eval_dir): if not event.HasField('summary'): continue metrics = {} for value in event.summary.value: if value.HasField('simple_value'): metrics[value.tag] = value.simple_value if metrics: eval_metrics_dict[event.step].update(metrics) return collections.OrderedDict( sorted(eval_metrics_dict.items(), key=lambda t: t[0])) def _stop_if_threshold_crossed_hook(estimator, metric_name, threshold, higher_is_better, eval_dir, min_steps, run_every_secs, run_every_steps): """Creates early-stopping hook to stop training if threshold is crossed.""" if eval_dir is None: eval_dir = estimator.eval_dir() is_lhs_better = operator.gt if higher_is_better else operator.lt greater_or_lesser = 'greater than' if higher_is_better else 'less than' def stop_if_threshold_crossed_fn(): """Returns `True` if the given metric crosses specified threshold.""" eval_results = read_eval_metrics(eval_dir) for step, metrics in eval_results.items(): if step < min_steps: continue val = metrics[metric_name] if is_lhs_better(val, threshold): tf.compat.v1.logging.info( 'At step %s, metric "%s" has value %s which is %s the configured ' 'threshold (%s) for early stopping.', step, metric_name, val, greater_or_lesser, threshold) return True return False return make_early_stopping_hook( estimator=estimator, should_stop_fn=stop_if_threshold_crossed_fn, run_every_secs=run_every_secs, run_every_steps=run_every_steps) def _stop_if_no_metric_improvement_hook(estimator, metric_name, max_steps_without_improvement, higher_is_better, eval_dir, min_steps, run_every_secs, run_every_steps): """Returns hook to stop training if given metric shows no improvement.""" if eval_dir is None: eval_dir = estimator.eval_dir() is_lhs_better = operator.gt if higher_is_better else operator.lt increase_or_decrease = 'increase' if higher_is_better else 'decrease' def stop_if_no_metric_improvement_fn(): """Returns `True` if metric does not improve within max steps.""" eval_results = read_eval_metrics(eval_dir) best_val = None best_val_step = None for step, metrics in eval_results.items(): if step < min_steps: continue val = metrics[metric_name] if best_val is None or is_lhs_better(val, best_val): best_val = val best_val_step = step if step - best_val_step >= max_steps_without_improvement: tf.compat.v1.logging.info( 'No %s in metric "%s" for %s steps, which is greater than or equal ' 'to max steps (%s) configured for early stopping.', increase_or_decrease, metric_name, step - best_val_step, max_steps_without_improvement) return True return False return make_early_stopping_hook( estimator=estimator, should_stop_fn=stop_if_no_metric_improvement_fn, run_every_secs=run_every_secs, run_every_steps=run_every_steps) def _summaries(eval_dir): """Yields `tensorflow.Event` protos from event files in the eval dir. Args: eval_dir: Directory containing summary files with eval metrics. Yields: `tensorflow.Event` object read from the event files. """ if tf.compat.v1.gfile.Exists(eval_dir): for event_file in tf.compat.v1.gfile.Glob( os.path.join(eval_dir, _EVENT_FILE_GLOB_PATTERN)): try: for event in tf.compat.v1.train.summary_iterator(event_file): yield event except tf.errors.DataLossError as e: # Upon DataLossError, we ignore the rest of the file and go to the next # one. tf.compat.v1.logging.warning( 'Skipping rest of the file due to encountering data corruption ' 'error; file path: %s; original error raised by ' '`tf.train.summary_iterator`: %s', event_file, e) def _get_or_create_stop_var(): with tf.compat.v1.variable_scope( name_or_scope='signal_early_stopping', values=[], reuse=tf.compat.v1.AUTO_REUSE): return tf.compat.v1.get_variable( name='STOP', shape=[], dtype=tf.dtypes.bool, initializer=tf.compat.v1.initializers.constant(False), collections=[tf.compat.v1.GraphKeys.GLOBAL_VARIABLES], trainable=False) class _StopOnPredicateHook(tf.compat.v1.train.SessionRunHook): """Hook that requests stop when `should_stop_fn` returns `True`.""" def __init__(self, should_stop_fn, run_every_secs=60, run_every_steps=None): if not callable(should_stop_fn): raise TypeError('`should_stop_fn` must be callable.') self._should_stop_fn = should_stop_fn self._timer = tf.compat.v1.train.SecondOrStepTimer( every_secs=run_every_secs, every_steps=run_every_steps) self._global_step_tensor = None self._stop_var = None self._stop_op = None def begin(self): self._global_step_tensor = tf.compat.v1.train.get_global_step() self._stop_var = _get_or_create_stop_var() self._stop_op = tf.compat.v1.assign(self._stop_var, True) def before_run(self, run_context): del run_context return tf.compat.v1.train.SessionRunArgs(self._global_step_tensor) def after_run(self, run_context, run_values): global_step = run_values.results if self._timer.should_trigger_for_step(global_step): self._timer.update_last_triggered_step(global_step) if self._should_stop_fn(): tf.compat.v1.logging.info('Requesting early stopping at global step %d', global_step) run_context.session.run(self._stop_op) run_context.request_stop() class _CheckForStoppingHook(tf.compat.v1.train.SessionRunHook): """Hook that requests stop if stop is requested by `_StopOnPredicateHook`.""" def __init__(self): self._stop_var = None def begin(self): self._stop_var = _get_or_create_stop_var() def before_run(self, run_context): del run_context return tf.compat.v1.train.SessionRunArgs(self._stop_var) def after_run(self, run_context, run_values): should_early_stop = run_values.results if should_early_stop: tf.compat.v1.logging.info('Early stopping requested, suspending run.') run_context.request_stop() class _MultiWorkerEarlyStoppingHook(tf.compat.v1.train.SessionRunHook): """Hook that requests stop when `should_stop_fn` returns `True`.""" def _get_or_create_stop_var_with_aggregation(self): with tf.compat.v1.variable_scope( name_or_scope='signal_early_stopping', values=[], reuse=tf.compat.v1.AUTO_REUSE): return tf.compat.v1.get_variable( name='STOP', shape=[], dtype=tf.dtypes.int32, initializer=tf_keras_v1.initializers.constant(0), collections=[tf.compat.v1.GraphKeys.GLOBAL_VARIABLES], synchronization=tf.VariableSynchronization.ON_WRITE, aggregation=tf.compat.v1.VariableAggregation.SUM, trainable=False) def __init__(self, should_stop_fn, run_every_steps=None): if not callable(should_stop_fn): raise TypeError('`should_stop_fn` must be callable.') self._should_stop_fn = should_stop_fn self._timer = tf.compat.v1.train.SecondOrStepTimer( every_secs=None, every_steps=run_every_steps) self._global_step_tensor = None self._stop_var = None self._stop_op = None self._non_stop_op = None def begin(self): self._global_step_tensor = tf.compat.v1.train.get_global_step() self._stop_var = self._get_or_create_stop_var_with_aggregation() assert tf.distribute.in_cross_replica_context() strategy = tf.distribute.get_strategy() self._stop_placeholder = None def stop_op_fn(var): placeholder = tf.compat.v1.placeholder_with_default( 0, tuple(), name='stop_value') if self._stop_placeholder is None: self._stop_placeholder = placeholder return var.assign_add(placeholder) self._stop_op = strategy.run( stop_op_fn, args=(self._stop_var,)) def before_run(self, run_context): del run_context return tf.compat.v1.train.SessionRunArgs({ 'global_step': self._global_step_tensor, 'stop_var': self._stop_var }) def after_run(self, run_context, run_values): global_step = run_values.results['global_step'] should_early_stop = run_values.results['stop_var'] if should_early_stop > 0: tf.compat.v1.logging.info('Early stopping requested, suspending run.') run_context.request_stop() return if self._timer.should_trigger_for_step(global_step): self._timer.update_last_triggered_step(global_step) if self._should_stop_fn(): run_context.session.run( self._stop_op, feed_dict={self._stop_placeholder: 1}) tf.compat.v1.logging.info('Requesting early stopping at global step %d', global_step) else: run_context.session.run( self._stop_op, feed_dict={self._stop_placeholder: 0}) ================================================ FILE: tensorflow_estimator/python/estimator/early_stopping_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for early_stopping.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import tempfile from absl.testing import parameterized from absl.testing.absltest import mock import tensorflow as tf from tensorflow.python.eager import context from tensorflow_estimator.python.estimator import early_stopping from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator import run_config class _FakeRunConfig(run_config.RunConfig): def __init__(self, is_chief): super(_FakeRunConfig, self).__init__() self._is_chief = is_chief @property def is_chief(self): return self._is_chief def _dummy_model_fn(features, labels, params): _, _, _ = features, labels, params class _FakeEstimator(estimator.Estimator): """Fake estimator for testing.""" def __init__(self, config): super(_FakeEstimator, self).__init__( model_fn=_dummy_model_fn, config=config) def _write_events(eval_dir, params): """Test helper to write events to summary files.""" with context.graph_mode(): for steps, loss, accuracy in params: estimator._write_dict_to_summary(eval_dir, { 'loss': loss, 'accuracy': accuracy, }, steps) class ReadEvalMetricsTest(tf.test.TestCase): def test_read_eval_metrics(self): eval_dir = tempfile.mkdtemp() _write_events( eval_dir, [ # steps, loss, accuracy (1000, 1, 2), (2000, 3, 4), (3000, 5, 6), ]) self.assertEqual( { 1000: { 'loss': 1, 'accuracy': 2 }, 2000: { 'loss': 3, 'accuracy': 4 }, 3000: { 'loss': 5, 'accuracy': 6 }, }, early_stopping.read_eval_metrics(eval_dir)) def test_data_loss_error_ignored(self): eval_dir = tempfile.mkdtemp() _write_events( eval_dir, [ # steps, loss, accuracy (1000, 1, 2), (2000, 3, 4), (3000, 5, 6), ]) orig_tf_train_summary_iterator = tf.compat.v1.train.summary_iterator def _summary_iterator(*args, **kwargs): for event in orig_tf_train_summary_iterator(*args, **kwargs): yield event # Raise an error for one of the files after yielding a summary event. if event.HasField('summary'): raise tf.errors.DataLossError(None, None, 'testing data loss') with mock.patch.object(tf.compat.v1.train, 'summary_iterator') as mock_summary_iterator: mock_summary_iterator.side_effect = _summary_iterator eval_results = early_stopping.read_eval_metrics(eval_dir) self.assertEqual({ 1000: { 'loss': 1, 'accuracy': 2 } }, eval_results) def test_read_eval_metrics_when_no_events(self): eval_dir = tempfile.mkdtemp() self.assertTrue(os.path.exists(eval_dir)) # No error should be raised when eval directory exists with no event files. self.assertEqual({}, early_stopping.read_eval_metrics(eval_dir)) os.rmdir(eval_dir) self.assertFalse(os.path.exists(eval_dir)) # No error should be raised when eval directory does not exist. self.assertEqual({}, early_stopping.read_eval_metrics(eval_dir)) class EarlyStoppingHooksTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): super(EarlyStoppingHooksTest, self).setUp() config = _FakeRunConfig(is_chief=True) self._estimator = _FakeEstimator(config=config) eval_dir = self._estimator.eval_dir() os.makedirs(eval_dir) _write_events( eval_dir, [ # steps, loss, accuracy (1000, 0.8, 0.5), (2000, 0.7, 0.6), (3000, 0.4, 0.7), (3500, 0.41, 0.68), ]) def run_session(self, hooks, should_stop): hooks = hooks if isinstance(hooks, list) else [hooks] with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() no_op = tf.no_op() with tf.compat.v1.train.SingularMonitoredSession(hooks=hooks) as mon_sess: mon_sess.run(no_op) self.assertEqual(mon_sess.should_stop(), should_stop) @parameterized.parameters((0.8, 0, False), (0.6, 4000, False), (0.6, 0, True)) def test_stop_if_higher_hook(self, threshold, min_steps, should_stop): self.run_session( early_stopping.stop_if_higher_hook( self._estimator, metric_name='accuracy', threshold=threshold, min_steps=min_steps), should_stop) @parameterized.parameters((0.3, 0, False), (0.5, 4000, False), (0.5, 0, True)) def test_stop_if_lower_hook(self, threshold, min_steps, should_stop): self.run_session( early_stopping.stop_if_lower_hook( self._estimator, metric_name='loss', threshold=threshold, min_steps=min_steps), should_stop) @parameterized.parameters((1500, 0, False), (500, 4000, False), (500, 0, True)) def test_stop_if_no_increase_hook(self, max_steps, min_steps, should_stop): self.run_session( early_stopping.stop_if_no_increase_hook( self._estimator, metric_name='accuracy', max_steps_without_increase=max_steps, min_steps=min_steps), should_stop) @parameterized.parameters((1500, 0, False), (500, 4000, False), (500, 0, True)) def test_stop_if_no_decrease_hook(self, max_steps, min_steps, should_stop): self.run_session( early_stopping.stop_if_no_decrease_hook( self._estimator, metric_name='loss', max_steps_without_decrease=max_steps, min_steps=min_steps), should_stop) @parameterized.parameters((1500, 0.3, False), (1500, 0.5, True), (500, 0.3, True)) def test_multiple_hooks(self, max_steps, loss_threshold, should_stop): self.run_session([ early_stopping.stop_if_no_decrease_hook( self._estimator, metric_name='loss', max_steps_without_decrease=max_steps), early_stopping.stop_if_lower_hook( self._estimator, metric_name='loss', threshold=loss_threshold) ], should_stop) @parameterized.parameters(False, True) def test_make_early_stopping_hook(self, should_stop): self.run_session([ early_stopping.make_early_stopping_hook( self._estimator, should_stop_fn=lambda: should_stop) ], should_stop) def test_make_early_stopping_hook_typeerror(self): with self.assertRaises(TypeError): early_stopping.make_early_stopping_hook( estimator=object(), should_stop_fn=lambda: True) def test_make_early_stopping_hook_valueerror(self): with self.assertRaises(ValueError): early_stopping.make_early_stopping_hook( self._estimator, should_stop_fn=lambda: True, run_every_secs=60, run_every_steps=100) class StopOnPredicateHookTest(tf.test.TestCase): def test_stop(self): hook = early_stopping._StopOnPredicateHook( should_stop_fn=lambda: False, run_every_secs=0) with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() no_op = tf.no_op() with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook]) as mon_sess: mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) self.assertFalse(mon_sess.raw_session().run(hook._stop_var)) hook = early_stopping._StopOnPredicateHook( should_stop_fn=lambda: True, run_every_secs=0) with tf.Graph().as_default(): tf.compat.v1.train.create_global_step() no_op = tf.no_op() with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook]) as mon_sess: mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) self.assertTrue(mon_sess.raw_session().run(hook._stop_var)) class CheckForStoppingHookTest(tf.test.TestCase): def test_stop(self): hook = early_stopping._CheckForStoppingHook() with tf.Graph().as_default(): no_op = tf.no_op() assign_op = tf.compat.v1.assign(early_stopping._get_or_create_stop_var(), True) with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook]) as mon_sess: mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) mon_sess.run(assign_op) # Because there are no guarantees that the stop variable will be read # after the assign op is completed, run another no_op to ensure that the # updated value is read. if not mon_sess.should_stop(): mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/estimator.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Base Estimator class.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import copy import os import tempfile import numpy as np import six import tensorflow as tf from google.protobuf import message # pylint: disable=g-import-not-at-top from tensorflow.core.framework import summary_pb2 from tensorflow.python.checkpoint import checkpoint as trackable_util from tensorflow.python.checkpoint import checkpoint_management from tensorflow.python.checkpoint import graph_view from tensorflow.python.distribute import estimator_training as distribute_coordinator_training from tensorflow.python.eager import context from tensorflow.python.eager import monitoring from tensorflow.python.framework import ops from tensorflow.python.profiler import trace from tensorflow.python.saved_model import path_helpers from tensorflow.python.summary import summary from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import device_setter from tensorflow.python.training import evaluation from tensorflow.python.training import training from tensorflow.python.training import training_util from tensorflow.python.util import deprecation from tensorflow.python.util import function_utils from tensorflow.python.util import tf_contextlib from tensorflow.tools.docs import doc_controls from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator import run_config from tensorflow_estimator.python.estimator import util as estimator_util from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys _VALID_MODEL_FN_ARGS = set( ['features', 'labels', 'mode', 'params', 'self', 'config']) _estimator_api_gauge = monitoring.BoolGauge('/tensorflow/api/estimator', 'estimator api usage', 'method') _canned_estimator_api_gauge = monitoring.StringGauge( '/tensorflow/api/estimator/canned_estimator', 'Gauge to track the type of canned estimator used', 'ClassType') @estimator_export(v1=['estimator.Estimator']) @doc_controls.inheritable_header("""\ Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://www.tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." """) class Estimator(object): """Estimator class to train and evaluate TensorFlow models. The `Estimator` object wraps a model which is specified by a `model_fn`, which, given inputs and a number of other parameters, returns the ops necessary to perform training, evaluation, or predictions. All outputs (checkpoints, event files, etc.) are written to `model_dir`, or a subdirectory thereof. If `model_dir` is not set, a temporary directory is used. The `config` argument can be passed `tf.estimator.RunConfig` object containing information about the execution environment. It is passed on to the `model_fn`, if the `model_fn` has a parameter named "config" (and input functions in the same manner). If the `config` parameter is not passed, it is instantiated by the `Estimator`. Not passing config means that defaults useful for local execution are used. `Estimator` makes config available to the model (for instance, to allow specialization based on the number of workers available), and also uses some of its fields to control internals, especially regarding checkpointing. The `params` argument contains hyperparameters. It is passed to the `model_fn`, if the `model_fn` has a parameter named "params", and to the input functions in the same manner. `Estimator` only passes params along, it does not inspect it. The structure of `params` is therefore entirely up to the developer. None of `Estimator`'s methods can be overridden in subclasses (its constructor enforces this). Subclasses should use `model_fn` to configure the base class, and may add methods implementing specialized functionality. See [estimators](https://tensorflow.org/guide/estimator) for more information. To warm-start an `Estimator`: ```python estimator = tf.estimator.DNNClassifier( feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb], hidden_units=[1024, 512, 256], warm_start_from="/path/to/checkpoint/dir") ``` For more details on warm-start configuration, see `tf.estimator.WarmStartSettings`. @compatibility(eager) Calling methods of `Estimator` will work while eager execution is enabled. However, the `model_fn` and `input_fn` is not executed eagerly, `Estimator` will switch to graph mode before calling all user-provided functions (incl. hooks), so their code has to be compatible with graph mode execution. Note that `input_fn` code using `tf.data` generally works in both graph and eager modes. @end_compatibility """ def __init__(self, model_fn, model_dir=None, config=None, params=None, warm_start_from=None): """Constructs an `Estimator` instance. Args: model_fn: Model function. Follows the signature: * `features` -- This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor` or `dict` of same. * `labels` -- This is the second item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor` or `dict` of same (for multi-head models). If mode is `tf.estimator.ModeKeys.PREDICT`, `labels=None` will be passed. If the `model_fn`'s signature does not accept `mode`, the `model_fn` must still be able to handle `labels=None`. * `mode` -- Optional. Specifies if this is training, evaluation or prediction. See `tf.estimator.ModeKeys`. `params` -- Optional `dict` of hyperparameters. Will receive what is passed to Estimator in `params` parameter. This allows to configure Estimators from hyper parameter tuning. * `config` -- Optional `estimator.RunConfig` object. Will receive what is passed to Estimator as its `config` parameter, or a default value. Allows setting up things in your `model_fn` based on configuration such as `num_ps_replicas`, or `model_dir`. * Returns -- `tf.estimator.EstimatorSpec` model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. If `PathLike` object, the path will be resolved. If `None`, the model_dir in `config` will be used if set. If both are set, they must be same. If both are `None`, a temporary directory will be used. config: `estimator.RunConfig` configuration object. params: `dict` of hyper parameters that will be passed into `model_fn`. Keys are names of parameters, values are basic python types. warm_start_from: Optional string filepath to a checkpoint or SavedModel to warm-start from, or a `tf.estimator.WarmStartSettings` object to fully configure warm-starting. If None, only TRAINABLE variables are warm-started. If the string filepath is provided instead of a `tf.estimator.WarmStartSettings`, then all variables are warm-started, and it is assumed that vocabularies and `tf.Tensor` names are unchanged. Raises: ValueError: parameters of `model_fn` don't match `params`. ValueError: if this is called via a subclass and if that class overrides a member of `Estimator`. """ _estimator_api_gauge.get_cell('init').set(True) # We do not endorse Estimator child classes to override methods in # Estimator, other than a select few. You're on your own if you cleverly # override the method "_assert_members_are_not_overridden". self.__class__._assert_members_are_not_overridden(self) # pylint: disable=protected-access self._config = maybe_overwrite_model_dir_and_session_config( config, model_dir) # The distribute field contains an instance of tf.distribute.Strategy. self._train_distribution = self._config.train_distribute self._eval_distribution = self._config.eval_distribute # Model directory. self._model_dir = self._config.model_dir self._session_config = self._config.session_config tf.compat.v1.logging.info('Using config: %s', str(vars(self._config))) self._device_fn = ( self._config.device_fn or _get_replica_device_setter(self._config)) if model_fn is None: raise ValueError('model_fn must be provided to Estimator.') model_fn_lib.verify_model_fn_args(model_fn, params) self._model_fn = model_fn self._params = copy.deepcopy(params or {}) # pylint: disable=protected-access self._warm_start_settings = _get_default_warm_start_settings( warm_start_from) # pylint: enable=protected-access @property def model_dir(self): return self._model_dir @property def config(self): return copy.deepcopy(self._config) @property def params(self): return copy.deepcopy(self._params) @property def model_fn(self): """Returns the `model_fn` which is bound to `self.params`. Returns: The `model_fn` with following signature: `def model_fn(features, labels, mode, config)` """ def public_model_fn(features, labels, mode, config): return self._call_model_fn(features, labels, mode, config) return public_model_fn # TODO(ispir): support a list of names def get_variable_value(self, name): """Returns value of the variable given by name. Args: name: string or a list of string, name of the tensor. Returns: Numpy array - value of the tensor. Raises: ValueError: If the `Estimator` has not produced a checkpoint yet. """ _check_checkpoint_available(self.model_dir) with context.graph_mode(): return tf.train.load_variable(self.model_dir, name) def get_variable_names(self): """Returns list of all variable names in this model. Returns: List of names. Raises: ValueError: If the `Estimator` has not produced a checkpoint yet. """ _check_checkpoint_available(self.model_dir) with context.graph_mode(): return [name for name, _ in tf.train.list_variables(self.model_dir)] def latest_checkpoint(self): """Finds the filename of the latest saved checkpoint file in `model_dir`. Returns: The full path to the latest checkpoint or `None` if no checkpoint was found. """ with context.graph_mode(): return checkpoint_management.latest_checkpoint(self.model_dir) def train(self, input_fn, hooks=None, steps=None, max_steps=None, saving_listeners=None): """Trains a model given training data `input_fn`. Args: input_fn: A function that provides input data for training as minibatches. See [Premade Estimators]( https://tensorflow.org/guide/premade_estimators#create_input_functions) for more information. The function should construct and return one of the following: * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple `(features, labels)` with same constraints as below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. hooks: List of `tf.train.SessionRunHook` subclass instances. Used for callbacks inside the training loop. steps: Number of steps for which to train the model. If `None`, train forever or train until `input_fn` generates the `tf.errors.OutOfRange` error or `StopIteration` exception. `steps` works incrementally. If you call two times `train(steps=10)` then training occurs in total 20 steps. If `OutOfRange` or `StopIteration` occurs in the middle, training stops before 20 steps. If you don't want to have incremental behavior please set `max_steps` instead. If set, `max_steps` must be `None`. max_steps: Number of total steps for which to train model. If `None`, train forever or train until `input_fn` generates the `tf.errors.OutOfRange` error or `StopIteration` exception. If set, `steps` must be `None`. If `OutOfRange` or `StopIteration` occurs in the middle, training stops before `max_steps` steps. Two calls to `train(steps=100)` means 200 training iterations. On the other hand, two calls to `train(max_steps=100)` means that the second call will not do any iteration since first call did all 100 steps. saving_listeners: list of `CheckpointSaverListener` objects. Used for callbacks that run immediately before or after checkpoint savings. Returns: `self`, for chaining. Raises: ValueError: If both `steps` and `max_steps` are not `None`. ValueError: If either `steps` or `max_steps <= 0`. """ _estimator_api_gauge.get_cell('train').set(True) if self.config.task_type in (run_config.TaskType.EVALUATOR, run_config.TaskType.PS): raise ValueError( 'Train has been called wrong configuration. Please use ' 'tf.estimator.train_and_evaluate which calls proper API according ' 'to given configuration. Current configuration: {}.'.format( self.config)) with context.graph_mode(): if (steps is not None) and (max_steps is not None): raise ValueError('Can not provide both steps and max_steps.') if steps is not None and steps <= 0: raise ValueError('Must specify steps > 0, given: {}'.format(steps)) if max_steps is not None and max_steps <= 0: raise ValueError( 'Must specify max_steps > 0, given: {}'.format(max_steps)) if max_steps is not None: start_step = _load_global_step_from_checkpoint_dir(self._model_dir) if max_steps <= start_step: tf.compat.v1.logging.info( 'Skipping training since max_steps has already saved.' ) return self hooks = _check_hooks_type(hooks) hooks.extend(self._convert_train_steps_to_hooks(steps, max_steps)) saving_listeners = _check_listeners_type(saving_listeners) loss = self._train_model(input_fn, hooks, saving_listeners) tf.compat.v1.logging.info('Loss for final step: %s.', loss) return self def _convert_train_steps_to_hooks(self, steps, max_steps): """Create hooks to run correct number of steps in training. Args: steps: number of steps to run during training. max_steps: maximum number of steps to be run during training. It'll be the maximum number of steps the model will train to after restoring from checkpoint even across multiple estimator.train calls. Returns: List of hooks to be passed to the estimator. """ if steps is not None or max_steps is not None: if self._train_distribution: steps_per_run = getattr(self._train_distribution.extended, 'steps_per_run', 1) if steps_per_run > 1: return [ basic_session_run_hooks._MultiStepStopAtStepHook( # pylint: disable=protected-access steps, max_steps, steps_per_run) ] return [tf.compat.v1.train.StopAtStepHook(steps, max_steps)] else: return [] def eval_dir(self, name=None): """Shows the directory name where evaluation metrics are dumped. Args: name: Name of the evaluation if user needs to run multiple evaluations on different data sets, such as on training data vs test data. Metrics for different evaluations are saved in separate folders, and appear separately in tensorboard. Returns: A string which is the path of directory contains evaluation metrics. """ return os.path.join(self._model_dir, 'eval' if not name else 'eval_' + name) def evaluate(self, input_fn, steps=None, hooks=None, checkpoint_path=None, name=None): """Evaluates the model given evaluation data `input_fn`. For each step, calls `input_fn`, which returns one batch of data. Evaluates until: - `steps` batches are processed, or - `input_fn` raises an end-of-input exception (`tf.errors.OutOfRangeError` or `StopIteration`). Args: input_fn: A function that constructs the input data for evaluation. See [Premade Estimators]( https://tensorflow.org/guide/premade_estimators#create_input_functions) for more information. The function should construct and return one of the following: * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple `(features, labels)` with same constraints as below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. steps: Number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. hooks: List of `tf.train.SessionRunHook` subclass instances. Used for callbacks inside the evaluation call. checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, evaluation is run with newly initialized `Variables` instead of ones restored from checkpoint. name: Name of the evaluation if user needs to run multiple evaluations on different data sets, such as on training data vs test data. Metrics for different evaluations are saved in separate folders, and appear separately in tensorboard. Returns: A dict containing the evaluation metrics specified in `model_fn` keyed by name, as well as an entry `global_step` which contains the value of the global step for which this evaluation was performed. For canned estimators, the dict contains the `loss` (mean loss per mini-batch) and the `average_loss` (mean loss per sample). Canned classifiers also return the `accuracy`. Canned regressors also return the `label/mean` and the `prediction/mean`. Raises: ValueError: If `steps <= 0`. """ _estimator_api_gauge.get_cell('evaluate').set(True) # pylint: disable=protected-access if (self._eval_distribution and hasattr(self._config, '_distribute_coordinator_mode') and self._config._distribute_coordinator_mode): return distribute_coordinator_training.estimator_evaluate( self, lambda est, s, eval_hooks: est._actual_eval( # pylint: disable=g-long-lambda input_fn, strategy=s, steps=steps, hooks=eval_hooks, checkpoint_path=checkpoint_path, name=name), hooks) # pylint: enable=protected-access else: return self._actual_eval( input_fn, strategy=self._eval_distribution, steps=steps, hooks=hooks, checkpoint_path=checkpoint_path, name=name) def _actual_eval(self, input_fn, strategy=None, steps=None, hooks=None, checkpoint_path=None, name=None): """The method that does evaluation actually.""" with context.graph_mode(): hooks = _check_hooks_type(hooks) hooks.extend(self._convert_eval_steps_to_hooks(steps)) # Check that model has been trained (if nothing has been set explicitly). if not checkpoint_path: latest_path = checkpoint_management.latest_checkpoint(self._model_dir) if not latest_path: tf.compat.v1.logging.info( 'Could not find trained model in model_dir: {}, running ' 'initialization to evaluate.'.format(self._model_dir)) checkpoint_path = latest_path def _evaluate(): (scaffold, update_op, eval_dict, all_hooks) = ( self._evaluate_build_graph(input_fn, hooks, checkpoint_path)) return self._evaluate_run( checkpoint_path=checkpoint_path, scaffold=scaffold, update_op=update_op, eval_dict=eval_dict, all_hooks=all_hooks, output_dir=self.eval_dir(name)) with tf.Graph().as_default(): if strategy: # We want to create the iterations variable outside the distribution # scope as that is just stored on the host and mainly used to drive # the loop and doesn't need to be a Mirrored/Device variable. training.get_or_create_steps_per_run_variable() with strategy.scope(): return _evaluate() else: return _evaluate() def _convert_eval_steps_to_hooks(self, steps): """Create hooks to run correct number of steps in evaluation. Args: steps: number of steps to run during evaluation. Raises: ValueError: if steps is less than or equal to zero. Returns: List of hooks to be passed to the estimator. """ if steps is None: return [] if steps <= 0: raise ValueError('Must specify steps > 0, given: {}'.format(steps)) # The hooks are declared as private in evaluation.py discourage the use # by other libraries or open source users. This should be the only usage # of the estimator evaluation hooks. if self._eval_distribution: steps_per_run = getattr(self._eval_distribution.extended, 'steps_per_run', 1) if steps_per_run > 1: return [ evaluation._MultiStepStopAfterNEvalsHook( # pylint: disable=protected-access num_evals=steps, steps_per_run=steps_per_run) ] return [evaluation._StopAfterNEvalsHook(num_evals=steps)] # pylint: disable=protected-access def predict(self, input_fn, predict_keys=None, hooks=None, checkpoint_path=None, yield_single_examples=True): """Yields predictions for given features. Please note that interleaving two predict outputs does not work. See: [issue/20506]( https://github.com/tensorflow/tensorflow/issues/20506#issuecomment-422208517) Args: input_fn: A function that constructs the features. Prediction continues until `input_fn` raises an end-of-input exception (`tf.errors.OutOfRangeError` or `StopIteration`). See [Premade Estimators]( https://tensorflow.org/guide/premade_estimators#create_input_functions) for more information. The function should construct and return one of the following: * `tf.data.Dataset` object -- Outputs of `Dataset` object must have same constraints as below. * features -- A `tf.Tensor` or a dictionary of string feature name to `Tensor`. features are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. * A tuple, in which case the first item is extracted as features. predict_keys: list of `str`, name of the keys to predict. It is used if the `tf.estimator.EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used then rest of the predictions will be filtered from the dictionary. If `None`, returns all. hooks: List of `tf.train.SessionRunHook` subclass instances. Used for callbacks inside the prediction call. checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, prediction is run with newly initialized `Variables` instead of ones restored from checkpoint. yield_single_examples: If `False`, yields the whole batch as returned by the `model_fn` instead of decomposing the batch into individual elements. This is useful if `model_fn` returns some tensors whose first dimension is not equal to the batch size. Yields: Evaluated values of `predictions` tensors. Raises: ValueError: If batch length of predictions is not the same and `yield_single_examples` is `True`. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `tf.estimator.EstimatorSpec.predictions` is not a `dict`. """ _estimator_api_gauge.get_cell('predict').set(True) with context.graph_mode(): hooks = _check_hooks_type(hooks) # Check that model has been trained. if not checkpoint_path: checkpoint_path = checkpoint_management.latest_checkpoint( self._model_dir) if not checkpoint_path: tf.compat.v1.logging.info( 'Could not find trained model in model_dir: {}, running ' 'initialization to predict.'.format(self._model_dir)) with tf.Graph().as_default() as g: tf.compat.v1.random.set_random_seed(self._config.tf_random_seed) self._create_and_assert_global_step(g) features, input_hooks = self._get_features_from_input_fn( input_fn, ModeKeys.PREDICT) estimator_spec = self._call_model_fn(features, None, ModeKeys.PREDICT, self.config) # Call to warm_start has to be after model_fn is called. self._maybe_warm_start(checkpoint_path) predictions = self._extract_keys(estimator_spec.predictions, predict_keys) all_hooks = list(input_hooks) all_hooks.extend(hooks) all_hooks.extend(list(estimator_spec.prediction_hooks or [])) with tf.compat.v1.train.MonitoredSession( session_creator=tf.compat.v1.train.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, master=self._config.master, scaffold=estimator_spec.scaffold, config=self._session_config), hooks=all_hooks) as mon_sess: while not mon_sess.should_stop(): preds_evaluated = mon_sess.run(predictions) if not yield_single_examples: yield preds_evaluated elif not isinstance(predictions, dict): for pred in preds_evaluated: yield pred else: for i in range(self._extract_batch_length(preds_evaluated)): yield { key: value[i] for key, value in six.iteritems(preds_evaluated) } def _assert_members_are_not_overridden(self): """Asserts members of `Estimator` are not overridden.""" _assert_members_are_not_overridden(Estimator, self) def export_saved_model(self, export_dir_base, serving_input_receiver_fn, assets_extra=None, as_text=False, checkpoint_path=None, experimental_mode=ModeKeys.PREDICT): # pylint: disable=line-too-long """Exports inference graph as a `SavedModel` into the given dir. For a detailed guide on SavedModel, see [Using the SavedModel format] (https://tensorflow.org/guide/saved_model#savedmodels_from_estimators). This method builds a new graph by first calling the `serving_input_receiver_fn` to obtain feature `Tensor`s, and then calling this `Estimator`'s `model_fn` to generate the model graph based on those features. It restores the given checkpoint (or, lacking that, the most recent checkpoint) into this graph in a fresh session. Finally it creates a timestamped export directory below the given `export_dir_base`, and writes a `SavedModel` into it containing a single `tf.MetaGraphDef` saved from this session. The exported `MetaGraphDef` will provide one `SignatureDef` for each element of the `export_outputs` dict returned from the `model_fn`, named using the same keys. One of these keys is always `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`, indicating which signature will be served when a serving request does not specify one. For each signature, the outputs are provided by the corresponding `tf.estimator.export.ExportOutput`s, and the inputs are always the input receivers provided by the `serving_input_receiver_fn`. Extra assets may be written into the `SavedModel` via the `assets_extra` argument. This should be a dict, where each key gives a destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. The experimental_mode parameter can be used to export a single train/eval/predict graph as a `SavedModel`. See `experimental_export_all_saved_models` for full docs. Args: export_dir_base: A string containing a directory in which to create timestamped subdirectories containing exported `SavedModel`s. serving_input_receiver_fn: A function that takes no argument and returns a `tf.estimator.export.ServingInputReceiver` or `tf.estimator.export.TensorServingInputReceiver`. assets_extra: A dict specifying how to populate the assets.extra directory within the exported `SavedModel`, or `None` if no extra assets are needed. as_text: whether to write the `SavedModel` proto in text format. checkpoint_path: The checkpoint path to export. If `None` (the default), the most recent checkpoint found within the model directory is chosen. experimental_mode: `tf.estimator.ModeKeys` value indicating with mode will be exported. Note that this feature is experimental. Returns: The path to the exported directory as a bytes object. Raises: ValueError: if no `serving_input_receiver_fn` is provided, no `export_outputs` are provided, or no checkpoint can be found. """ # pylint: enable=line-too-long if not serving_input_receiver_fn: raise ValueError('An input_receiver_fn must be defined.') input_receiver_fn_map = {experimental_mode: serving_input_receiver_fn} return self._export_all_saved_models( export_dir_base, input_receiver_fn_map, assets_extra=assets_extra, as_text=as_text, checkpoint_path=checkpoint_path, strip_default_attrs=True) def experimental_export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra=None, as_text=False, checkpoint_path=None): """Exports a `SavedModel` with `tf.MetaGraphDefs` for each requested mode. For each mode passed in via the `input_receiver_fn_map`, this method builds a new graph by calling the `input_receiver_fn` to obtain feature and label `Tensor`s. Next, this method calls the `Estimator`'s `model_fn` in the passed mode to generate the model graph based on those features and labels, and restores the given checkpoint (or, lacking that, the most recent checkpoint) into the graph. Only one of the modes is used for saving variables to the `SavedModel` (order of preference: `tf.estimator.ModeKeys.TRAIN`, `tf.estimator.ModeKeys.EVAL`, then `tf.estimator.ModeKeys.PREDICT`), such that up to three `tf.MetaGraphDefs` are saved with a single set of variables in a single `SavedModel` directory. For the variables and `tf.MetaGraphDefs`, a timestamped export directory below `export_dir_base`, and writes a `SavedModel` into it containing the `tf.MetaGraphDef` for the given mode and its associated signatures. For prediction, the exported `MetaGraphDef` will provide one `SignatureDef` for each element of the `export_outputs` dict returned from the `model_fn`, named using the same keys. One of these keys is always `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`, indicating which signature will be served when a serving request does not specify one. For each signature, the outputs are provided by the corresponding `tf.estimator.export.ExportOutput`s, and the inputs are always the input receivers provided by the `serving_input_receiver_fn`. For training and evaluation, the `train_op` is stored in an extra collection, and loss, metrics, and predictions are included in a `SignatureDef` for the mode in question. Extra assets may be written into the `SavedModel` via the `assets_extra` argument. This should be a dict, where each key gives a destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. Args: export_dir_base: A string containing a directory in which to create timestamped subdirectories containing exported `SavedModel`s. input_receiver_fn_map: dict of `tf.estimator.ModeKeys` to `input_receiver_fn` mappings, where the `input_receiver_fn` is a function that takes no arguments and returns the appropriate subclass of `InputReceiver`. assets_extra: A dict specifying how to populate the assets.extra directory within the exported `SavedModel`, or `None` if no extra assets are needed. as_text: whether to write the `SavedModel` proto in text format. checkpoint_path: The checkpoint path to export. If `None` (the default), the most recent checkpoint found within the model directory is chosen. Returns: The path to the exported directory as a bytes object. Raises: ValueError: if any `input_receiver_fn` is `None`, no `export_outputs` are provided, or no checkpoint can be found. """ return self._export_all_saved_models( export_dir_base, input_receiver_fn_map, assets_extra=assets_extra, as_text=as_text, checkpoint_path=checkpoint_path, strip_default_attrs=True) def _export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra=None, as_text=False, checkpoint_path=None, strip_default_attrs=True): """Exports multiple modes in the model function to a SavedModel.""" # TODO(b/65561022): Consider allowing multiple input_receiver_fns per mode. with context.graph_mode(): if not checkpoint_path: # Locate the latest checkpoint checkpoint_path = self.latest_checkpoint() if not checkpoint_path: if self._warm_start_settings: checkpoint_path = self._warm_start_settings.ckpt_to_initialize_from if tf.compat.v1.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) else: raise ValueError("Couldn't find trained model at {}.".format( self._model_dir)) export_dir = export_lib.get_timestamped_export_dir(export_dir_base) temp_export_dir = export_lib.get_temp_export_dir(export_dir) builder = tf.compat.v1.saved_model.Builder(temp_export_dir) save_variables = True # Note that the order in which we run here matters, as the first # mode we pass through will be used to save the variables. We run TRAIN # first, as that is also the mode used for checkpoints, and therefore # we are not likely to have vars in PREDICT that are not in the checkpoint # created by TRAIN. if input_receiver_fn_map.get(ModeKeys.TRAIN): self._add_meta_graph_for_mode( builder, input_receiver_fn_map, checkpoint_path, save_variables, mode=ModeKeys.TRAIN, strip_default_attrs=strip_default_attrs) save_variables = False if input_receiver_fn_map.get(ModeKeys.EVAL): self._add_meta_graph_for_mode( builder, input_receiver_fn_map, checkpoint_path, save_variables, mode=ModeKeys.EVAL, strip_default_attrs=strip_default_attrs) save_variables = False if input_receiver_fn_map.get(ModeKeys.PREDICT): self._add_meta_graph_for_mode( builder, input_receiver_fn_map, checkpoint_path, save_variables, mode=ModeKeys.PREDICT, strip_default_attrs=strip_default_attrs) save_variables = False if save_variables: raise ValueError('No valid modes for exporting found. Got {}.'.format( input_receiver_fn_map.keys())) builder.save(as_text) # Add the extra assets if assets_extra: assets_extra_path = os.path.join( tf.compat.as_bytes(temp_export_dir), tf.compat.as_bytes('assets.extra')) for dest_relative, source in assets_extra.items(): dest_absolute = os.path.join( tf.compat.as_bytes(assets_extra_path), tf.compat.as_bytes(dest_relative)) dest_path = os.path.dirname(dest_absolute) tf.compat.v1.gfile.MakeDirs(dest_path) tf.compat.v1.gfile.Copy(source, dest_absolute) tf.compat.v1.gfile.Rename(temp_export_dir, export_dir) return export_dir def _add_meta_graph_for_mode(self, builder, input_receiver_fn_map, checkpoint_path, save_variables=True, mode=ModeKeys.PREDICT, export_tags=None, check_variables=True, strip_default_attrs=True): """Loads variables and adds them along with a `tf.MetaGraphDef` for saving. Args: builder: instance of `tf.saved_modle.builder.SavedModelBuilder` that will be used for saving. input_receiver_fn_map: dict of `tf.estimator.ModeKeys` to `input_receiver_fn` mappings, where the `input_receiver_fn` is a function that takes no argument and returns the appropriate subclass of `InputReceiver`. checkpoint_path: The checkpoint path to export. save_variables: bool, whether variables should be saved. If `False`, just the `tf.MetaGraphDef` will be saved. Note that `save_variables` should only be `True` for the first call to this function, and the `SavedModelBuilder` will raise an error if that is not the case. mode: `tf.estimator.ModeKeys` value indicating which mode will be exported. export_tags: The set of tags with which to save `tf.MetaGraphDef`. If `None`, a default set will be selected to matched the passed mode. check_variables: bool, whether to check the checkpoint has all variables. strip_default_attrs: bool, whether to strip default attributes. This may only be True when called from the deprecated V1 Estimator.export_savedmodel. Raises: ValueError: if `save_variables` is `True` and `check_variable` is `False`. """ if export_tags is None: export_tags = export_lib.EXPORT_TAG_MAP[mode] input_receiver_fn = input_receiver_fn_map[mode] with tf.Graph().as_default() as g: self._create_and_assert_global_step(g) tf.compat.v1.random.set_random_seed(self._config.tf_random_seed) input_receiver = input_receiver_fn() # Call the model_fn and collect the export_outputs. estimator_spec = self._call_model_fn( features=input_receiver.features, labels=getattr(input_receiver, 'labels', None), mode=mode, config=self.config) export_outputs = export_lib.export_outputs_for_mode( mode=estimator_spec.mode, serving_export_outputs=estimator_spec.export_outputs, predictions=estimator_spec.predictions, loss=estimator_spec.loss, metrics=estimator_spec.eval_metric_ops) # Build the SignatureDefs from receivers and all outputs signature_def_map = export_lib.build_all_signature_defs( input_receiver.receiver_tensors, export_outputs, getattr(input_receiver, 'receiver_tensors_alternatives', None), serving_only=(mode == ModeKeys.PREDICT)) with tf.compat.v1.Session(config=self._session_config) as session: if estimator_spec.scaffold.local_init_op is not None: local_init_op = estimator_spec.scaffold.local_init_op else: local_init_op = tf.compat.v1.train.Scaffold.default_local_init_op() # This saver will be used both for restoring variables now, # and in saving out the metagraph below. This ensures that any # Custom Savers stored with the Scaffold are passed through to the # SavedModel for restore later. if isinstance(estimator_spec.scaffold.saver, trackable_util.Checkpoint): graph_saver = tf.compat.v1.train.Saver( var_list=graph_view.ObjectGraphView( estimator_spec.scaffold.saver).frozen_saveable_objects(), sharded=True) else: graph_saver = ( estimator_spec.scaffold.saver or tf.compat.v1.train.Saver(sharded=True)) if save_variables and not check_variables: raise ValueError('If `save_variables` is `True, `check_variables`' 'must not be `False`.') if check_variables: try: graph_saver.restore(session, checkpoint_path) except tf.errors.NotFoundError as e: msg = ('Could not load all requested variables from checkpoint. ' 'Please make sure your model_fn does not expect variables ' 'that were not saved in the checkpoint.\n\n' 'Encountered error with mode `{}` while restoring ' 'checkpoint from: `{}`. Full Traceback:\n\n{}').format( mode, checkpoint_path, e) raise ValueError(msg) # We add the train op explicitly for now, so that we don't have to # change the Builder public interface. Note that this is a no-op # for prediction, where train_op is None. builder._add_train_op(estimator_spec.train_op) # pylint: disable=protected-access meta_graph_kwargs = dict( tags=export_tags, signature_def_map=signature_def_map, assets_collection=tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.ASSET_FILEPATHS), main_op=local_init_op, saver=graph_saver, strip_default_attrs=strip_default_attrs) if save_variables: builder.add_meta_graph_and_variables(session, **meta_graph_kwargs) else: builder.add_meta_graph(**meta_graph_kwargs) def _get_features_from_input_fn(self, input_fn, mode): """Extracts the `features` from return values of `input_fn`.""" result = self._call_input_fn(input_fn, mode) result, _, hooks = estimator_util.parse_input_fn_result(result) self._validate_features_in_predict_input(result) return result, hooks def _validate_features_in_predict_input(self, result): if not _has_dataset_or_queue_runner(result): tf.compat.v1.logging.warning( 'Input graph does not use tf.data.Dataset or contain a ' 'QueueRunner. That means predict yields forever. ' 'This is probably a mistake.' ) def _get_iterator_from_input_fn(self, input_fn, mode, distribution=None): """Calls `input_fn` and returns an iterator.""" if distribution is not None: # pylint: disable=g-long-lambda iterator = distribution.make_input_fn_iterator( lambda input_context: self._call_input_fn(input_fn, mode, input_context)) input_hooks = [ estimator_util.DistributedIteratorInitializerHook(iterator) ] else: result = self._call_input_fn(input_fn, mode) iterator = result.make_initializable_iterator() input_hooks = [estimator_util._DatasetInitializerHook(iterator)] # pylint: disable=protected-access return iterator, input_hooks def _get_features_and_labels_from_input_fn(self, input_fn, mode): """Extracts the `features` and labels from return values of `input_fn`.""" return estimator_util.parse_input_fn_result( self._call_input_fn(input_fn, mode)) def _extract_batch_length(self, preds_evaluated): """Extracts batch length of predictions.""" batch_length = None for key, value in six.iteritems(preds_evaluated): batch_length = batch_length or value.shape[0] if value.shape[0] != batch_length: raise ValueError('Batch length of predictions should be same. %s has ' 'different batch length than others.' % key) return batch_length def _extract_keys(self, predictions, predict_keys): """Extracts `predict_keys` from `predictions`.""" if not predict_keys: return predictions if not isinstance(predictions, dict): raise ValueError( 'predict_keys argument is not valid in case of non-dict predictions.') existing_keys = predictions.keys() predictions = { key: value for key, value in six.iteritems(predictions) if key in predict_keys } if not predictions: raise ValueError('Expected to run at least one output from %s, ' 'provided %s.' % (existing_keys, predict_keys)) return predictions def _create_global_step(self, graph): """Creates the global step tensor in graph. The global step tensor must be an integer type with name 'global_step' and be added to the collection `tf.GraphKeys.GLOBAL_STEP`. Args: graph: The graph in which to create the global step tensor. Returns: The global step `tf.Tensor`. """ return tf.compat.v1.train.create_global_step(graph) def _create_and_assert_global_step(self, graph): """Creates and asserts properties of the global step. Args: graph: The graph in which to create the global step tensor. Returns: The global step `tf.Tensor`. """ step = self._create_global_step(graph) assert step is tf.compat.v1.train.get_global_step() assert step.dtype.is_integer return step def _call_input_fn(self, input_fn, mode, input_context=None): """Calls the input function. Args: input_fn: The input function. mode: `tf.estimator.ModeKeys` Returns: The return value of the passed `input_fn`, which should be one of: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple `(features, labels)` with same constraints as below. * A tuple `(features, labels)`: Where `features` is a `Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. Raises: ValueError: if `input_fn` takes invalid arguments. """ input_fn_args = function_utils.fn_args(input_fn) kwargs = {} if 'mode' in input_fn_args: kwargs['mode'] = mode if 'params' in input_fn_args: kwargs['params'] = self.params if 'config' in input_fn_args: kwargs['config'] = self.config if input_context and 'input_context' in input_fn_args: tf.compat.v1.logging.info( 'The `input_fn` accepts an `input_context` which will ' 'be given by DistributionStrategy') kwargs['input_context'] = input_context with tf.compat.v1.device('/cpu:0'): return input_fn(**kwargs) def _call_model_fn(self, features, labels, mode, config): """Calls model function. Args: features: features dict. labels: labels dict. mode: `tf.estimator.ModeKeys` config: `tf.estimator.RunConfig` Returns: An `tf.estimator.EstimatorSpec` object. Raises: ValueError: if `model_fn` returns invalid objects. """ model_fn_args = function_utils.fn_args(self._model_fn) kwargs = {} if 'labels' in model_fn_args: kwargs['labels'] = labels else: if labels is not None: raise ValueError( 'model_fn does not take labels, but input_fn returns labels.') if 'mode' in model_fn_args: kwargs['mode'] = mode if 'params' in model_fn_args: kwargs['params'] = self.params if 'config' in model_fn_args: kwargs['config'] = config tf.compat.v1.logging.info('Calling model_fn.') model_fn_results = self._model_fn(features=features, **kwargs) tf.compat.v1.logging.info('Done calling model_fn.') if not isinstance(model_fn_results, model_fn_lib.EstimatorSpec): raise ValueError('model_fn should return an EstimatorSpec.') return model_fn_results def _train_model(self, input_fn, hooks, saving_listeners): if self._train_distribution: return self._train_model_distributed(input_fn, hooks, saving_listeners) else: return self._train_model_default(input_fn, hooks, saving_listeners) def _train_model_default(self, input_fn, hooks, saving_listeners): """Initiate training with `input_fn`, without `DistributionStrategies`. Args: input_fn: A function that provides input data for training as minibatches. hooks: List of `tf.train.SessionRunHook` subclass instances. Used for callbacks inside the training loop. saving_listeners: list of `tf.train.CheckpointSaverListener` objects. Used for callbacks that run immediately before or after checkpoint savings. Returns: Loss from training """ worker_hooks = [] with tf.Graph().as_default() as g, g.device(self._device_fn): tf.compat.v1.random.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) # Skip creating a read variable if _create_and_assert_global_step # returns None (e.g. tf.contrib.estimator.SavedModelEstimator). if global_step_tensor is not None: training_util._get_or_create_global_step_read(g) # pylint: disable=protected-access features, labels, input_hooks = ( self._get_features_and_labels_from_input_fn(input_fn, ModeKeys.TRAIN)) worker_hooks.extend(input_hooks) estimator_spec = self._call_model_fn(features, labels, ModeKeys.TRAIN, self.config) global_step_tensor = tf.compat.v1.train.get_global_step(g) return self._train_with_estimator_spec(estimator_spec, worker_hooks, hooks, global_step_tensor, saving_listeners) def _train_model_distributed(self, input_fn, hooks, saving_listeners): """Initiate training with `input_fn`, using `DistributionStrategies`. Args: input_fn: A function that provides input data for training as minibatches. hooks: List of `tf.train.SessionRunHook` subclass instances. Used for callbacks inside the training loop. saving_listeners: list of `tf.train.CheckpointSaverListener` objects. Used for callbacks that run immediately before or after checkpoint savings. Returns: Loss from training """ # pylint: disable=protected-access if (hasattr(self._config, '_distribute_coordinator_mode') and self._config._distribute_coordinator_mode): # pylint: disable=protected-access distribute_coordinator_training.estimator_train( self, lambda est, s, train_hooks: est._actual_train_model_distributed( # pylint: disable=g-long-lambda s, input_fn, train_hooks, saving_listeners), hooks) return self else: self._config._train_distribute.configure(self._config.session_config) return self._actual_train_model_distributed( self._config._train_distribute, input_fn, hooks, saving_listeners) # pylint: enable=protected-access def _actual_train_model_distributed(self, strategy, input_fn, hooks, saving_listeners): """That method that does actual training with distribution strategy.""" # TODO(sourabhbajaj): Remove this hack once we migrate the other strategies # to use the new API is_tpu_strategy = strategy.__class__.__name__.startswith('TPUStrategy') worker_hooks = [] with tf.Graph().as_default() as g: # We want to create the iterations variable outside the distribution scope # as that is just stored on the host and mainly used to drive the loop # and doesn't need to be a Mirrored/Device variable. if is_tpu_strategy: steps_per_run_variable = training.get_or_create_steps_per_run_variable() # Set flag on the distribution strategy so that optimizer v1 is # distribution aware and scales the losses by number of replicas. # This is required only for backward compatibility with estimator and # V1 optimizer. TF2 will not do this scaling. if hasattr(strategy, '_scale_loss_for_estimator_enabled'): scale_ctx = strategy._scale_loss_for_estimator_enabled() # pylint: disable=protected-access else: # TODO(psv): Remove this clause after estimator repo gets the # distribute library changes related to loss scaling. @tf_contextlib.contextmanager def nullcontextmanager(): yield scale_ctx = nullcontextmanager() with strategy.scope(), scale_ctx: tf.compat.v1.random.set_random_seed(self._config.tf_random_seed) iterator, input_hooks = self._get_iterator_from_input_fn( input_fn, ModeKeys.TRAIN, strategy) worker_hooks.extend(input_hooks) global_step_tensor = self._create_and_assert_global_step(g) # we want to add to the global collection in the main thread not the # replica threads. tf.compat.v1.add_to_collection( training_util.GLOBAL_STEP_READ_KEY, strategy.extended.read_var(global_step_tensor)) if is_tpu_strategy: # Create a step_fn from the train_op of grouped_estimator_spec def step_fn(ctx, inputs): """A single step that is passed to run_on_dataset.""" if isinstance(inputs, tuple): features, labels = inputs else: features = inputs labels = None estimator_spec = strategy.extended.call_for_each_replica( self._call_model_fn, args=(features, labels, ModeKeys.TRAIN, self.config)) ctx.set_last_step_output( name='loss', output=estimator_spec.loss, reduce_op=_get_loss_reduce_op_for_reporting()) ctx.set_non_tensor_output( name='estimator_spec', output=estimator_spec) return estimator_spec.train_op # Create new train_op post graph rewrites initial_training_loss = tf.constant(1e7) ctx = strategy.extended.experimental_run_steps_on_iterator( step_fn, iterator, iterations=steps_per_run_variable, initial_loop_values={'loss': initial_training_loss}) distributed_train_op = ctx.run_op loss = ctx.last_step_outputs['loss'] grouped_estimator_spec = ctx.non_tensor_outputs['estimator_spec'] else: features, labels = estimator_util.parse_iterator_result( iterator.get_next()) grouped_estimator_spec = strategy.extended.call_for_each_replica( self._call_model_fn, args=( features, labels, # although this will be None it seems ModeKeys.TRAIN, self.config)) loss = strategy.reduce( _get_loss_reduce_op_for_reporting(), grouped_estimator_spec.loss, axis=None) distributed_train_op = grouped_estimator_spec.train_op scaffold = _combine_distributed_scaffold( grouped_estimator_spec.scaffold, strategy) # TODO(yuefengz): add a test for unwrapping per_device_hooks. def get_hooks_from_the_first_device(per_device_hooks): return [ self._train_distribution.experimental_local_results( per_device_hook)[0] for per_device_hook in per_device_hooks ] training_hooks = get_hooks_from_the_first_device( grouped_estimator_spec.training_hooks) training_chief_hooks = get_hooks_from_the_first_device( grouped_estimator_spec.training_chief_hooks) estimator_spec = model_fn_lib.EstimatorSpec( mode=grouped_estimator_spec.mode, loss=loss, train_op=strategy.group(distributed_train_op), training_hooks=training_hooks, training_chief_hooks=training_chief_hooks, scaffold=scaffold) return self._train_with_estimator_spec(estimator_spec, worker_hooks, hooks, global_step_tensor, saving_listeners) def _train_with_estimator_spec_distributed(self, estimator_spec, worker_hooks, saving_listener): """Train a model with the given Estimator Spec and Distribution Strategy.""" if saving_listener: raise ValueError('Saving listenor is not supported by the current ' 'Distribution Strategies.') #TODO: consolidate code duplication in _train_with_estimator_spec with training.MonitoredTrainingSession( master=self._config.master, is_chief=self._config.is_chief, checkpoint_dir=self._model_dir, scaffold=estimator_spec.scaffold, hooks=worker_hooks, chief_only_hooks=tuple(estimator_spec.training_chief_hooks), save_checkpoint_secs=self._config.save_checkpoints_secs, save_checkpoint_steps=self._config.save_checkpoints_steps, save_summaries_steps=self._config.save_summary_steps, config=self._session_config, max_wait_secs=self._config.session_creation_timeout_secs, log_step_count_steps=self._config.log_step_count_steps, save_graph_def=self._config.checkpoint_save_graph_def) as mon_sess: loss = None current_step = 0 while not mon_sess.should_stop(): current_step += 1 # just as keras(https://github.com/tensorflow/tensorflow/blob/v2.4.1/tensorflow/python/keras/engine/training.py#L1093), # trace should be enabled for every step with trace.Trace('train', step_num=current_step, _r=1): _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss]) if current_step == 0: tf.compat.v1.logging.warn('Training with estimator made no steps. ' 'Perhaps input is empty or misspecified.') return loss def _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks, global_step_tensor, saving_listeners): """Train a model with the given Estimator Spec.""" if (self._warm_start_settings and not tf.train.latest_checkpoint(self._model_dir)): tf.compat.v1.logging.info('Warm-starting with WarmStartSettings: %s' % (self._warm_start_settings,)) tf.compat.v1.train.warm_start(*self._warm_start_settings) # Check if the user created a loss summary, and add one if they didn't. # We assume here that the summary is called 'loss'. If it is not, we will # make another one with the name 'loss' to ensure it shows up in the right # graph in TensorBoard. if not any([ x.op.name == 'loss' for x in ops.get_collection(ops.GraphKeys.SUMMARIES) ]): summary.scalar('loss', estimator_spec.loss) ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss) worker_hooks.extend(hooks) worker_hooks.append(tf.compat.v1.train.NanTensorHook(estimator_spec.loss)) if self._config.log_step_count_steps is not None: worker_hooks.append( tf.compat.v1.train.LoggingTensorHook( { 'loss': estimator_spec.loss, 'step': global_step_tensor }, every_n_iter=self._config.log_step_count_steps)) worker_hooks.extend(estimator_spec.training_hooks) if not (estimator_spec.scaffold.saver or tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.SAVERS)): tf.compat.v1.add_to_collection( tf.compat.v1.GraphKeys.SAVERS, tf.compat.v1.train.Saver( sharded=True, max_to_keep=self._config.keep_checkpoint_max, keep_checkpoint_every_n_hours=( self._config.keep_checkpoint_every_n_hours), defer_build=True, save_relative_paths=True)) if (self._config.cluster_spec and type( self._train_distribution).__name__ in ('CollectiveAllReduceStrategy', 'CollectiveAllReduceStrategyV1', 'MultiWorkerMirroredStrategy')): return self._train_with_estimator_spec_distributed( estimator_spec, worker_hooks, saving_listeners) chief_hooks = [] all_hooks = worker_hooks + list(estimator_spec.training_chief_hooks) saver_hooks = [ h for h in all_hooks if isinstance(h, tf.compat.v1.train.CheckpointSaverHook) ] if (self._config.save_checkpoints_secs or self._config.save_checkpoints_steps): if not saver_hooks: chief_hooks = [ tf.compat.v1.train.CheckpointSaverHook( self._model_dir, save_secs=self._config.save_checkpoints_secs, save_steps=self._config.save_checkpoints_steps, scaffold=estimator_spec.scaffold, save_graph_def=self._config.checkpoint_save_graph_def) ] saver_hooks = [chief_hooks[0]] if saving_listeners: if not saver_hooks: raise ValueError( 'There should be a CheckpointSaverHook to use saving_listeners. ' 'Please set one of the RunConfig.save_checkpoints_steps or ' 'RunConfig.save_checkpoints_secs.') else: # It is expected to have one CheckpointSaverHook. If multiple, we pick # up the first one to add listener. for listener in saving_listeners: # pylint: disable=protected-access if listener not in saver_hooks[0]._listeners: saver_hooks[0]._listeners.append(listener) # pylint: disable=protected-access # Add summary hooks to worker 0 if we are running with a master, to ensure # that summaries are written at correct intervals even with long-running # evaluations. save_summary_steps = self._config.save_summary_steps log_step_count_steps = self._config.log_step_count_steps # Check existence of appropriate cluster spec fields, as well as master and # worker nodes. As master also performs evaluation, summary writing must # occur on a different node. The presence of a worker is also checked to # prevent reassigning hooks for single-replica jobs with just a master node. if (self._config.cluster_spec and self._config.cluster_spec.jobs and (run_config.TaskType.WORKER in self._config.cluster_spec.jobs) and (run_config.TaskType.MASTER in self._config.cluster_spec.jobs)): # Update config values to prevent the default hooks from being created on # the master or other workers. save_summary_steps = 0 log_step_count_steps = None if (self._config.task_type == run_config.TaskType.WORKER and self._config.task_id == 0): if (self._config.save_summary_steps and self._config.save_summary_steps > 0): worker_hooks.append( tf.compat.v1.train.SummarySaverHook( save_steps=self._config.save_summary_steps, output_dir=self._config.model_dir, scaffold=estimator_spec.scaffold)) if (self._config.log_step_count_steps and self._config.log_step_count_steps > 0): worker_hooks.append( tf.compat.v1.train.StepCounterHook( every_n_steps=self._config.log_step_count_steps, output_dir=self._config.model_dir)) with training.MonitoredTrainingSession( master=self._config.master, is_chief=self._config.is_chief, checkpoint_dir=self._model_dir, scaffold=estimator_spec.scaffold, hooks=worker_hooks, chief_only_hooks=(tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)), save_checkpoint_secs=0, # Saving is handled by a hook. save_summaries_steps=save_summary_steps, config=self._session_config, max_wait_secs=self._config.session_creation_timeout_secs, log_step_count_steps=log_step_count_steps, save_graph_def=self._config.checkpoint_save_graph_def) as mon_sess: loss = None current_step = 0 while not mon_sess.should_stop(): current_step += 1 # just as keras(https://github.com/tensorflow/tensorflow/blob/v2.4.1/tensorflow/python/keras/engine/training.py#L1093), # trace should be enabled for every step with trace.Trace('train', step_num=current_step, _r=1): _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss]) if current_step == 0: tf.compat.v1.logging.warn('Training with estimator made no steps. ' 'Perhaps input is empty or misspecified.') return loss def _evaluate_build_graph(self, input_fn, hooks=None, checkpoint_path=None): """Builds the graph and related hooks to run evaluation.""" tf.compat.v1.random.set_random_seed(self._config.tf_random_seed) self._create_and_assert_global_step(tf.compat.v1.get_default_graph()) if self._eval_distribution: (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict) = ( self._call_model_fn_eval_distributed(input_fn, self.config)) else: (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict) = ( self._call_model_fn_eval(input_fn, self.config)) global_step_tensor = tf.compat.v1.train.get_global_step( tf.compat.v1.get_default_graph()) # Call to warm_start has to be after model_fn is called. self._maybe_warm_start(checkpoint_path) if tf.compat.v1.GraphKeys.GLOBAL_STEP in eval_dict: raise ValueError( 'Metric with name `global_step` is not allowed, because Estimator ' 'already defines a default metric with the same name.') eval_dict[tf.compat.v1.GraphKeys.GLOBAL_STEP] = global_step_tensor all_hooks = list(input_hooks) all_hooks.extend(hooks) all_hooks.extend(list(evaluation_hooks or [])) # New local variables have been added, so update the estimator spec's # local init op if it was defined. if scaffold and scaffold.local_init_op: # Ensure that eval step has been created before updating local init op. evaluation._get_or_create_eval_step() # pylint: disable=protected-access scaffold = tf.compat.v1.train.Scaffold( local_init_op=tf.group( scaffold.local_init_op, tf.compat.v1.train.Scaffold.default_local_init_op()), copy_from_scaffold=scaffold) return scaffold, update_op, eval_dict, all_hooks def _call_model_fn_eval(self, input_fn, config): """Call model_fn for evaluation and handle return values.""" features, labels, input_hooks = self._get_features_and_labels_from_input_fn( input_fn, ModeKeys.EVAL) estimator_spec = self._call_model_fn(features, labels, ModeKeys.EVAL, config) eval_metric_ops = _verify_and_create_loss_metric( estimator_spec.eval_metric_ops, estimator_spec.loss) update_op, eval_dict = _extract_metric_update_ops(eval_metric_ops) return (estimator_spec.scaffold, estimator_spec.evaluation_hooks, input_hooks, update_op, eval_dict) def _call_model_fn_eval_distributed(self, input_fn, config): """Call model_fn in distribution mode and handle return values.""" iterator, input_hooks = self._get_iterator_from_input_fn( input_fn, ModeKeys.EVAL, self._eval_distribution) is_tpu_strategy = ( self._eval_distribution.__class__.__name__.startswith('TPUStrategy')) if is_tpu_strategy: steps_per_run_variable = training.get_or_create_steps_per_run_variable() def step_fn(ctx, inputs): """Runs one step of the eval computation and captures outputs.""" if isinstance(inputs, tuple): features, labels = inputs else: features = inputs labels = None estimator_spec = self._eval_distribution.extended.call_for_each_replica( self._call_model_fn, args=(features, labels, ModeKeys.EVAL, config)) eval_metric_ops = _verify_and_create_loss_metric( estimator_spec.eval_metric_ops, estimator_spec.loss, self._eval_distribution) update_op, eval_dict = _extract_metric_update_ops( eval_metric_ops, self._eval_distribution) ctx.set_non_tensor_output(name='estimator_spec', output=estimator_spec) ctx.set_non_tensor_output(name='eval_dict', output=eval_dict) return update_op # TODO(priyag): Fix eval step hook to account for steps_per_run. ctx = self._eval_distribution.extended.experimental_run_steps_on_iterator( step_fn, iterator, iterations=steps_per_run_variable) update_op = ctx.run_op eval_dict = ctx.non_tensor_outputs['eval_dict'] grouped_estimator_spec = ctx.non_tensor_outputs['estimator_spec'] else: features, labels = estimator_util.parse_iterator_result( iterator.get_next()) grouped_estimator_spec = ( self._eval_distribution.extended.call_for_each_replica( self._call_model_fn, args=(features, labels, ModeKeys.EVAL, config))) eval_metric_ops = _verify_and_create_loss_metric( grouped_estimator_spec.eval_metric_ops, grouped_estimator_spec.loss, self._eval_distribution) update_op, eval_dict = _extract_metric_update_ops(eval_metric_ops, self._eval_distribution) scaffold = _combine_distributed_scaffold(grouped_estimator_spec.scaffold, self._eval_distribution) def get_hooks_from_the_first_device(per_device_hooks): return [ self._eval_distribution.experimental_local_results(per_device_hook)[0] for per_device_hook in per_device_hooks ] evaluation_hooks = get_hooks_from_the_first_device( grouped_estimator_spec.evaluation_hooks) return (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict) def _evaluate_run(self, checkpoint_path, scaffold, update_op, eval_dict, all_hooks, output_dir): """Run evaluation.""" eval_results = evaluation._evaluate_once( # pylint: disable=protected-access checkpoint_path=checkpoint_path, master=self._config.evaluation_master, scaffold=scaffold, eval_ops=update_op, final_ops=eval_dict, hooks=all_hooks, config=self._session_config) current_global_step = eval_results[tf.compat.v1.GraphKeys.GLOBAL_STEP] _write_dict_to_summary( output_dir=output_dir, dictionary=eval_results, current_global_step=current_global_step) if checkpoint_path: _write_checkpoint_path_to_summary( output_dir=output_dir, checkpoint_path=checkpoint_path, current_global_step=current_global_step) return eval_results def _maybe_warm_start(self, checkpoint_path): if not checkpoint_path and self._warm_start_settings: tf.compat.v1.logging.info('Warm-starting with WarmStartSettings: %s' % (self._warm_start_settings,)) tf.compat.v1.train.warm_start(*self._warm_start_settings) @deprecation.deprecated( None, 'This function has been renamed, use `export_saved_model` instead.') def export_savedmodel(self, export_dir_base, serving_input_receiver_fn, assets_extra=None, as_text=False, checkpoint_path=None, strip_default_attrs=False): # pylint: disable=line-too-long """Exports inference graph as a `SavedModel` into the given dir. For a detailed guide, see [SavedModel from Estimators.](https://www.tensorflow.org/guide/estimator#savedmodels_from_estimators). This method builds a new graph by first calling the `serving_input_receiver_fn` to obtain feature `Tensor`s, and then calling this `Estimator`'s `model_fn` to generate the model graph based on those features. It restores the given checkpoint (or, lacking that, the most recent checkpoint) into this graph in a fresh session. Finally it creates a timestamped export directory below the given `export_dir_base`, and writes a `SavedModel` into it containing a single `tf.MetaGraphDef` saved from this session. The exported `MetaGraphDef` will provide one `SignatureDef` for each element of the `export_outputs` dict returned from the `model_fn`, named using the same keys. One of these keys is always `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`, indicating which signature will be served when a serving request does not specify one. For each signature, the outputs are provided by the corresponding `tf.estimator.export.ExportOutput`s, and the inputs are always the input receivers provided by the `serving_input_receiver_fn`. Extra assets may be written into the `SavedModel` via the `assets_extra` argument. This should be a dict, where each key gives a destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. Args: export_dir_base: A string containing a directory in which to create timestamped subdirectories containing exported `SavedModel`s. serving_input_receiver_fn: A function that takes no argument and returns a `tf.estimator.export.ServingInputReceiver` or `tf.estimator.export.TensorServingInputReceiver`. assets_extra: A dict specifying how to populate the assets.extra directory within the exported `SavedModel`, or `None` if no extra assets are needed. as_text: whether to write the `SavedModel` proto in text format. checkpoint_path: The checkpoint path to export. If `None` (the default), the most recent checkpoint found within the model directory is chosen. strip_default_attrs: Boolean. If `True`, default-valued attributes will be removed from the `NodeDef`s. For a detailed guide, see [Stripping Default-Valued Attributes]( https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes). Returns: The path to the exported directory as a bytes object. Raises: ValueError: if no `serving_input_receiver_fn` is provided, no `export_outputs` are provided, or no checkpoint can be found. """ # pylint: enable=line-too-long if not serving_input_receiver_fn: raise ValueError('An input_receiver_fn must be defined.') return self._export_all_saved_models( export_dir_base, {ModeKeys.PREDICT: serving_input_receiver_fn}, assets_extra=assets_extra, as_text=as_text, checkpoint_path=checkpoint_path, strip_default_attrs=strip_default_attrs) @estimator_export('estimator.Estimator', v1=[]) # pylint: disable=missing-docstring class EstimatorV2(Estimator): __doc__ = Estimator.__doc__ export_savedmodel = deprecation.hide_attribute_from_api( '`Estimator.export_savedmodel` has been deprecated. Please use ' '`export_saved_model` instead.') def _assert_members_are_not_overridden(self): """Asserts members of `Estimator` are not overridden.""" _assert_members_are_not_overridden(EstimatorV2, self) def _get_loss_reduce_op_for_reporting(): graph = tf.compat.v1.get_default_graph() if getattr(graph, '_is_loss_scaled_by_optimizer', False): # pylint: disable=protected-access return tf.compat.v1.distribute.get_loss_reduction() return tf.distribute.ReduceOp.SUM def _assert_members_are_not_overridden(cls, obj): """Assert Estimator methods are not overwritten.""" # TPUEstimator is special cased (owned by TF). if obj.__class__.__name__ == 'TPUEstimator': return allowed_overrides = set([ 'model_fn', '_create_and_assert_global_step', '_export_all_saved_models', '_tf_api_names', '_tf_api_names_v1', '_estimator_api_names', '_estimator_api_names_v1', '_estimator_api_constants', '_estimator_api_constants_v1', 'latest_checkpoint' ]) estimator_members = set([m for m in dir(cls) if not m.startswith('__')]) subclass_members = set(obj.__class__.__dict__.keys()) common_members = estimator_members & subclass_members - allowed_overrides overridden_members = [ m for m in common_members if getattr(cls, m) != getattr(obj.__class__, m) ] if overridden_members: raise ValueError( 'Subclasses of Estimator cannot override members of Estimator. ' '{} does override {}'.format(obj.__class__, overridden_members)) def _verify_and_create_loss_metric(eval_metric_ops, loss, distribution=None): """Creates a metric for loss and throws an error if one already exists.""" if model_fn_lib.LOSS_METRIC_KEY in eval_metric_ops: raise ValueError( 'Metric with name "%s" is not allowed, because Estimator ' % (model_fn_lib.LOSS_METRIC_KEY) + 'already defines a default metric with the same name.') if distribution is None: loss_metric = tf.compat.v1.metrics.mean(loss) else: loss_metric = distribution.extended.call_for_each_replica( tf.compat.v1.metrics.mean, args=(loss,)) eval_metric_ops[model_fn_lib.LOSS_METRIC_KEY] = loss_metric return eval_metric_ops def maybe_overwrite_model_dir_and_session_config(config, model_dir): """Overwrite estimator config by `model_dir` and `session_config` if needed. Args: config: Original estimator config. model_dir: Estimator model checkpoint directory. Returns: Overwritten estimator config. Raises: ValueError: Model directory inconsistent between `model_dir` and `config`. """ if config is None: config = run_config.RunConfig() tf.compat.v1.logging.info('Using default config.') if not isinstance(config, run_config.RunConfig): raise ValueError( 'config must be an instance of `RunConfig`, but provided %s.' % config) if config.session_config is None: session_config = run_config.get_default_session_config() config = run_config.RunConfig.replace(config, session_config=session_config) model_dir = run_config.path_to_str(model_dir) if model_dir is not None: if (getattr(config, 'model_dir', None) is not None and config.model_dir != model_dir): raise ValueError( '`model_dir` are set both in constructor and `RunConfig`, but with ' "different values. In constructor: '{}', in `RunConfig`: " "'{}' ".format(model_dir, config.model_dir)) if model_dir: config = run_config.RunConfig.replace(config, model_dir=model_dir) elif getattr(config, 'model_dir', None) is None: model_dir = tempfile.mkdtemp() tf.compat.v1.logging.warn('Using temporary folder as model directory: %s', model_dir) config = run_config.RunConfig.replace(config, model_dir=model_dir) return config def create_per_replica_ready_for_local_init_op(scaffold): """Create a `tf.train.Scaffold.ready_for_local_init_op` inside a replica.""" if scaffold.ready_for_local_init_op: return scaffold.ready_for_local_init_op def default_ready_for_local_init_op(): return tf.compat.v1.report_uninitialized_variables( tf.compat.v1.global_variables()) return tf.compat.v1.train.Scaffold.get_or_default( 'ready_for_local_init_op', tf.compat.v1.GraphKeys.READY_FOR_LOCAL_INIT_OP, default_ready_for_local_init_op) def _combine_distributed_scaffold(grouped_scaffold, distribution): """Combines scaffold(s) returned from `call_for_each_replica`.""" # TODO(anjalisridhar): Figure out how to resolve the following scaffold # parameters: init_feed_dict, init_fn. scaffold_list = distribution.experimental_local_results(grouped_scaffold) init_feed_dict = [ s.init_feed_dict for s in scaffold_list if s.init_feed_dict is not None ] if init_feed_dict: init_feed_dict = distribution.group(init_feed_dict) else: init_feed_dict = None init_fn = [ s._user_init_fn for s in scaffold_list if s._user_init_fn is not None # pylint: disable=protected-access ] if init_fn: init_fn = init_fn[0] else: init_fn = None init_op = [s.init_op for s in scaffold_list if s.init_op is not None] if init_op: init_op = distribution.group(init_op) else: init_op = None def _unwrap_and_concat(value): value = tf.nest.flatten(distribution.experimental_local_results(value)) if len(value) != 1: return tf.concat(value, 0) return value[0] ready_op = distribution.extended.call_for_each_replica( lambda scaffold: scaffold.ready_op, args=(grouped_scaffold,)) if ready_op is not None: ready_op = _unwrap_and_concat(ready_op) ready_for_local_init_op = distribution.extended.call_for_each_replica( create_per_replica_ready_for_local_init_op, args=(grouped_scaffold,)) if ready_for_local_init_op is not None: ready_for_local_init_op = _unwrap_and_concat(ready_for_local_init_op) else: ready_for_local_init_op = None local_init_op = [ s.local_init_op for s in scaffold_list if s.local_init_op is not None ] if local_init_op: local_init_op = distribution.group(local_init_op) else: local_init_op = None summary_op = [s.summary_op for s in scaffold_list if s.summary_op is not None] if summary_op: summary_op = distribution.group(summary_op) else: summary_op = None savers = [s.saver for s in scaffold_list if s.saver is not None] if savers: saver = savers[0] else: saver = None scaffold = tf.compat.v1.train.Scaffold( init_op=init_op, ready_op=ready_op, ready_for_local_init_op=ready_for_local_init_op, local_init_op=local_init_op, summary_op=summary_op, saver=saver, init_feed_dict=init_feed_dict, init_fn=init_fn) return scaffold def _check_checkpoint_available(model_dir): latest_path = tf.train.latest_checkpoint(model_dir) if not latest_path: raise ValueError( 'Could not find trained model in model_dir: {}.'.format(model_dir)) def _check_hooks_type(hooks): """Returns hooks if all are `SessionRunHook`, raises TypeError otherwise.""" hooks = list(hooks or []) for h in hooks: if not isinstance(h, tf.compat.v1.train.SessionRunHook): raise TypeError('Hooks must be a SessionRunHook, given: {}'.format(h)) return hooks def _check_listeners_type(saving_listeners): """Check listeners type.""" listeners = list(saving_listeners or []) for l in listeners: if not isinstance(l, tf.compat.v1.train.CheckpointSaverListener): raise TypeError( 'saving_listeners must be a list of CheckpointSaverListener, ' 'given: {}'.format(l)) return listeners def _get_replica_device_setter(config): """Creates a replica device setter if required as a default `device_fn`. `Estimator` uses `tf.train.ReplicaDeviceSetter` as a default device placer. It sets the distributed related arguments such as number of `ps_replicas` based on given `config`. Args: config: A `tf.estimator.RunConfig` instance. Returns: A replica device setter, or `None`. """ if config.task_type: worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id) else: worker_device = '/job:worker' if config.num_ps_replicas > 0: return tf.compat.v1.train.replica_device_setter( ps_tasks=config.num_ps_replicas, worker_device=worker_device, merge_devices=True, ps_ops=list(device_setter.STANDARD_PS_OPS), cluster=config.cluster_spec) else: return None def _verify_model_fn_args(model_fn, params): """Verifies `model_fn` arguments.""" args = set(function_utils.fn_args(model_fn)) if 'features' not in args: raise ValueError('model_fn (%s) must include features argument.' % model_fn) if params is not None and 'params' not in args: raise ValueError('model_fn (%s) does not include params argument, ' 'but params (%s) is passed to Estimator.' % (model_fn, params)) if params is None and 'params' in args: tf.compat.v1.logging.warn( 'Estimator\'s model_fn (%s) includes params ' 'argument, but params are not passed to Estimator.', model_fn) non_valid_args = list(args - _VALID_MODEL_FN_ARGS) if non_valid_args: raise ValueError('model_fn (%s) has following not expected args: %s' % (model_fn, non_valid_args)) def _load_global_step_from_checkpoint_dir(checkpoint_dir): try: checkpoint_reader = tf.compat.v1.train.NewCheckpointReader( tf.train.latest_checkpoint(checkpoint_dir)) return checkpoint_reader.get_tensor(tf.compat.v1.GraphKeys.GLOBAL_STEP) except: # pylint: disable=bare-except return 0 def _extract_metric_update_ops(eval_dict, distribution=None): """Separate update operations from metric value operations.""" update_ops = [] value_ops = {} # Sort metrics lexicographically so graph is identical every time. for name, value in sorted(six.iteritems(eval_dict)): value_ops[name] = value[0] update_ops.append( distribution.group(value[1]) if distribution else value[1]) update_op = tf.group(*update_ops) if update_ops else None return update_op, value_ops def _dict_to_str(dictionary): """Get a `str` representation of a `dict`. Args: dictionary: The `dict` to be represented as `str`. Returns: A `str` representing the `dictionary`. """ return ', '.join('%s = %s' % (k, v) for k, v in sorted(six.iteritems(dictionary)) if not isinstance(v, six.binary_type)) def _write_dict_to_summary(output_dir, dictionary, current_global_step): """Writes a `dict` into summary file in given output directory. Args: output_dir: `str`, directory to write the summary file in. dictionary: the `dict` to be written to summary file. current_global_step: `int`, the current global step. """ tf.compat.v1.logging.info('Saving dict for global step %d: %s', current_global_step, _dict_to_str(dictionary)) summary_writer = tf.compat.v1.summary.FileWriterCache.get(output_dir) summary_proto = summary_pb2.Summary() for key in dictionary: if dictionary[key] is None: continue if key == 'global_step': continue if (isinstance(dictionary[key], np.float32) or isinstance(dictionary[key], float)): summary_proto.value.add(tag=key, simple_value=float(dictionary[key])) elif (isinstance(dictionary[key], np.int64) or isinstance(dictionary[key], np.int32) or isinstance(dictionary[key], int)): summary_proto.value.add(tag=key, simple_value=int(dictionary[key])) elif isinstance(dictionary[key], six.binary_type): try: summ = summary_pb2.Summary.FromString(dictionary[key]) for i, _ in enumerate(summ.value): summ.value[i].tag = '%s/%d' % (key, i) summary_proto.value.extend(summ.value) except message.DecodeError: tf.compat.v1.logging.warn( 'Skipping summary for %s, cannot parse string to Summary.', key) continue elif isinstance(dictionary[key], np.ndarray): value = summary_proto.value.add() value.tag = key value.node_name = key tensor_proto = tf.make_tensor_proto(dictionary[key]) value.tensor.CopyFrom(tensor_proto) # pylint: disable=line-too-long tf.compat.v1.logging.info( 'Summary for np.ndarray is not visible in Tensorboard by default. ' 'Consider using a Tensorboard plugin for visualization (see ' 'https://github.com/tensorflow/tensorboard-plugin-example/blob/master/README.md' ' for more information).') # pylint: enable=line-too-long else: tf.compat.v1.logging.warn( 'Skipping summary for %s, must be a float, np.float32, np.int64, ' 'np.int32 or int or np.ndarray or a serialized string of Summary.', key) summary_writer.add_summary(summary_proto, current_global_step) summary_writer.flush() def _write_checkpoint_path_to_summary(output_dir, checkpoint_path, current_global_step): """Writes `checkpoint_path` into summary file in the given output directory. Args: output_dir: `str`, directory to write the summary file in. checkpoint_path: `str`, checkpoint file path to be written to summary file. current_global_step: `int`, the current global step. """ checkpoint_path_tag = 'checkpoint_path' tf.compat.v1.logging.info('Saving \'%s\' summary for global step %d: %s', checkpoint_path_tag, current_global_step, checkpoint_path) summary_proto = summary_pb2.Summary() summary_proto.value.add( tag=checkpoint_path_tag, tensor=tf.make_tensor_proto(checkpoint_path, dtype=tf.dtypes.string)) summary_writer = tf.compat.v1.summary.FileWriterCache.get(output_dir) summary_writer.add_summary(summary_proto, current_global_step) summary_writer.flush() def _has_dataset_or_queue_runner(maybe_tensor): """Returns `True` if `Dataset` or `QueueRunner` has been used.""" # Check TF dataset first. Here, we use a simple algorithm to check the top # level Tensors only, which should be sufficient for most users. tensors = [ x for x in tf.nest.flatten(maybe_tensor) if isinstance(x, tf.Tensor) ] if any([t.op.type == 'IteratorGetNext' for t in tensors]): return True # Now, check queue. return tf.compat.v1.get_default_graph().get_collection( tf.compat.v1.GraphKeys.QUEUE_RUNNERS) VocabInfo = tf.compat.v1.train.VocabInfo # pylint: disable=invalid-name estimator_export('estimator.VocabInfo')(VocabInfo) @estimator_export('estimator.WarmStartSettings') class WarmStartSettings( collections.namedtuple('WarmStartSettings', [ 'ckpt_to_initialize_from', 'vars_to_warm_start', 'var_name_to_vocab_info', 'var_name_to_prev_var_name', ])): """Settings for warm-starting in `tf.estimator.Estimators`. Example Use with canned `tf.estimator.DNNEstimator`: ``` emb_vocab_file = tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_vocabulary_file( "sc_vocab_file", "new_vocab.txt", vocab_size=100), dimension=8) emb_vocab_list = tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_vocabulary_list( "sc_vocab_list", vocabulary_list=["a", "b"]), dimension=8) estimator = tf.estimator.DNNClassifier( hidden_units=[128, 64], feature_columns=[emb_vocab_file, emb_vocab_list], warm_start_from=ws) ``` where `ws` could be defined as: Warm-start all weights in the model (input layer and hidden weights). Either the directory or a specific checkpoint can be provided (in the case of the former, the latest checkpoint will be used): ``` ws = WarmStartSettings(ckpt_to_initialize_from="/tmp") ws = WarmStartSettings(ckpt_to_initialize_from="/tmp/model-1000") ``` Warm-start only the embeddings (input layer): ``` ws = WarmStartSettings(ckpt_to_initialize_from="/tmp", vars_to_warm_start=".*input_layer.*") ``` Warm-start all weights but the embedding parameters corresponding to `sc_vocab_file` have a different vocab from the one used in the current model: ``` vocab_info = tf.estimator.VocabInfo( new_vocab=sc_vocab_file.vocabulary_file, new_vocab_size=sc_vocab_file.vocabulary_size, num_oov_buckets=sc_vocab_file.num_oov_buckets, old_vocab="old_vocab.txt" ) ws = WarmStartSettings( ckpt_to_initialize_from="/tmp", var_name_to_vocab_info={ "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info }) ``` Warm-start only `sc_vocab_file` embeddings (and no other variables), which have a different vocab from the one used in the current model: ``` vocab_info = tf.estimator.VocabInfo( new_vocab=sc_vocab_file.vocabulary_file, new_vocab_size=sc_vocab_file.vocabulary_size, num_oov_buckets=sc_vocab_file.num_oov_buckets, old_vocab="old_vocab.txt" ) ws = WarmStartSettings( ckpt_to_initialize_from="/tmp", vars_to_warm_start=None, var_name_to_vocab_info={ "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info }) ``` Warm-start all weights but the parameters corresponding to `sc_vocab_file` have a different vocab from the one used in current checkpoint, and only 100 of those entries were used: ``` vocab_info = tf.estimator.VocabInfo( new_vocab=sc_vocab_file.vocabulary_file, new_vocab_size=sc_vocab_file.vocabulary_size, num_oov_buckets=sc_vocab_file.num_oov_buckets, old_vocab="old_vocab.txt", old_vocab_size=100 ) ws = WarmStartSettings( ckpt_to_initialize_from="/tmp", var_name_to_vocab_info={ "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info }) ``` Warm-start all weights but the parameters corresponding to `sc_vocab_file` have a different vocab from the one used in current checkpoint and the parameters corresponding to `sc_vocab_list` have a different name from the current checkpoint: ``` vocab_info = tf.estimator.VocabInfo( new_vocab=sc_vocab_file.vocabulary_file, new_vocab_size=sc_vocab_file.vocabulary_size, num_oov_buckets=sc_vocab_file.num_oov_buckets, old_vocab="old_vocab.txt", old_vocab_size=100 ) ws = WarmStartSettings( ckpt_to_initialize_from="/tmp", var_name_to_vocab_info={ "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info }, var_name_to_prev_var_name={ "input_layer/sc_vocab_list_embedding/embedding_weights": "old_tensor_name" }) ``` Warm-start all TRAINABLE variables: ``` ws = WarmStartSettings(ckpt_to_initialize_from="/tmp", vars_to_warm_start=".*") ``` Warm-start all variables (including non-TRAINABLE): ``` ws = WarmStartSettings(ckpt_to_initialize_from="/tmp", vars_to_warm_start=[".*"]) ``` Warm-start non-TRAINABLE variables "v1", "v1/Momentum", and "v2" but not "v2/momentum": ``` ws = WarmStartSettings(ckpt_to_initialize_from="/tmp", vars_to_warm_start=["v1", "v2[^/]"]) ``` Attributes: ckpt_to_initialize_from: [Required] A string specifying the directory with checkpoint file(s) or path to checkpoint from which to warm-start the model parameters. vars_to_warm_start: [Optional] One of the following: * A regular expression (string) that captures which variables to warm-start (see tf.compat.v1.get_collection). This expression will only consider variables in the TRAINABLE_VARIABLES collection -- if you need to warm-start non_TRAINABLE vars (such as optimizer accumulators or batch norm statistics), please use the below option. * A list of strings, each a regex scope provided to tf.compat.v1.get_collection with GLOBAL_VARIABLES (please see tf.compat.v1.get_collection). For backwards compatibility reasons, this is separate from the single-string argument type. * A list of Variables to warm-start. If you do not have access to the `Variable` objects at the call site, please use the above option. * `None`, in which case only TRAINABLE variables specified in `var_name_to_vocab_info` will be warm-started. Defaults to `'.*'`, which warm-starts all variables in the TRAINABLE_VARIABLES collection. Note that this excludes variables such as accumulators and moving statistics from batch norm. var_name_to_vocab_info: [Optional] Dict of variable names (strings) to `tf.estimator.VocabInfo`. The variable names should be "full" variables, not the names of the partitions. If not explicitly provided, the variable is assumed to have no (changes to) vocabulary. var_name_to_prev_var_name: [Optional] Dict of variable names (strings) to name of the previously-trained variable in `ckpt_to_initialize_from`. If not explicitly provided, the name of the variable is assumed to be same between previous checkpoint and current model. Note that this has no effect on the set of variables that is warm-started, and only controls name mapping (use `vars_to_warm_start` for controlling what variables to warm-start). """ def __new__(cls, ckpt_to_initialize_from, vars_to_warm_start='.*', var_name_to_vocab_info=None, var_name_to_prev_var_name=None): if not ckpt_to_initialize_from: raise ValueError( '`ckpt_to_initialize_from` MUST be set in WarmStartSettings') return super(WarmStartSettings, cls).__new__( cls, ckpt_to_initialize_from, vars_to_warm_start, var_name_to_vocab_info or {}, var_name_to_prev_var_name or {}, ) def _get_default_warm_start_settings(warm_start_from): """Returns default `tf.estimator.WarmStartSettings`. Args: warm_start_from: Either a string representing the filepath of a checkpoint or `SavedModel` to initialize from, or an instance of `tf.estimator.WarmStartSettings`. Returns: Either None or an instance of `WarmStartSettings`. Raises: ValueError: If `warm_start_from` is not `None` but is neither a string nor an instance of `WarmStartSettings`. """ if warm_start_from is None: return None if isinstance(warm_start_from, (six.string_types, six.binary_type)): # Infer that this is a SavedModel if export_path + # 'variables/variables.index' exists, and if so, construct the # WarmStartSettings pointing to the variables path # (export_path + 'variables/variables'). if tf.compat.v1.gfile.Exists( os.path.join( path_helpers.get_variables_dir(warm_start_from), tf.compat.as_text('variables.index'))): tf.compat.v1.logging.info('Warm-starting from a SavedModel') return WarmStartSettings( ckpt_to_initialize_from=path_helpers.get_variables_path( warm_start_from)) return WarmStartSettings(ckpt_to_initialize_from=warm_start_from) elif isinstance(warm_start_from, WarmStartSettings): return warm_start_from else: raise ValueError('warm_start_from must be a string or a WarmStartSettings, ' 'instead got {}'.format(type(warm_start_from))) ================================================ FILE: tensorflow_estimator/python/estimator/estimator_export.py ================================================ # Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utilities for exporting TensorFlow Estimator symbols to the API. Exporting a function or a class: To export a function or a class use the estimator_export decorator. For e.g.: ```python @estimator_export('foo', 'bar.foo') def foo(...): ... ``` If a function is assigned to a variable, you can export it by calling estimator_export explicitly. For e.g.: ```python foo = get_foo(...) estimator_export('foo', 'bar.foo')(foo) ``` Exporting a constant ```python foo = 1 estimator_export('consts.foo').export_constant(__name__, 'foo') ``` """ from collections.abc import Sequence from typing import Optional, TypeVar from tensorflow.python.util import deprecation from tensorflow.python.util import tf_export T = TypeVar('T') ESTIMATOR_API_NAME = 'estimator' # pylint: disable=protected-access if ESTIMATOR_API_NAME not in tf_export.API_ATTRS: tf_export.API_ATTRS[ESTIMATOR_API_NAME] = tf_export._Attributes( '_estimator_api_names', '_estimator_api_constants' ) if ESTIMATOR_API_NAME not in tf_export.API_ATTRS_V1: tf_export.API_ATTRS_V1[ESTIMATOR_API_NAME] = tf_export._Attributes( '_estimator_api_names_v1', '_estimator_api_constants_v1' ) # pylint: enable=protected-access class estimator_export(tf_export.api_export): # pylint: disable=invalid-name """Provides ways to export symbols to the TensorFlow Estimator API.""" def __init__(self, *args: str, v1: Optional[Sequence[str]] = None): """Export under the names *args (first one is considered canonical). All symbols exported by this decorator are exported under the `estimator` API name. Args: *args: API names in dot delimited format. v1: Names for the TensorFlow V1 API. If not set, we will use V2 API names both for TensorFlow V1 and V2 APIs. """ super().__init__(*args, api_name=ESTIMATOR_API_NAME, v1=v1) def __call__(self, func: T) -> T: """Calls this decorator. Args: func: decorated symbol (function or class). Returns: The input function with _tf_api_names attribute set and marked as deprecated. """ func = deprecation.deprecated(None, 'Use tf_keras instead.')(func) return super().__call__(func) ================================================ FILE: tensorflow_estimator/python/estimator/estimator_export_test.py ================================================ # Copyright 2023 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """estimator_export tests.""" import sys import tensorflow as tf from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_export # pylint: disable=g-deprecated-tf-checker from tensorflow_estimator.python.estimator import estimator_export class TestClass(object): pass class ValidateExportTest(tf.test.TestCase): """Tests for estimator_export class.""" def setUp(self): super().setUp() self._modules = [] def tearDown(self): super().tearDown() for name in self._modules: del sys.modules[name] self._modules = [] if hasattr(TestClass, '_estimator_api_names'): del TestClass._estimator_api_names if hasattr(TestClass, '_estimator_api_names_v1'): del TestClass._estimator_api_names_v1 @tf.compat.v1.test.mock.patch.object( logging, 'warning', autospec=True ) def testExportDeprecated(self, mock_warning): export_decorator = estimator_export.estimator_export('estimator.TestClass') export_decorator(TestClass) # Deprecation should trigger a runtime warning TestClass() self.assertEqual(1, mock_warning.call_count) # Deprecation should only warn once, upon first call TestClass() self.assertEqual(1, mock_warning.call_count) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/estimator_lib.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Estimator: High level tools for working with models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import from tensorflow_estimator.python.estimator.canned.baseline import BaselineClassifier from tensorflow_estimator.python.estimator.canned.baseline import BaselineEstimator from tensorflow_estimator.python.estimator.canned.baseline import BaselineRegressor from tensorflow_estimator.python.estimator.canned.dnn import dnn_logit_fn_builder from tensorflow_estimator.python.estimator.canned.dnn import DNNClassifier from tensorflow_estimator.python.estimator.canned.dnn import DNNEstimator from tensorflow_estimator.python.estimator.canned.dnn import DNNRegressor from tensorflow_estimator.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedClassifier from tensorflow_estimator.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedEstimator from tensorflow_estimator.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedRegressor from tensorflow_estimator.python.estimator.canned.kmeans import KMeansClustering from tensorflow_estimator.python.estimator.canned.linear import linear_logit_fn_builder from tensorflow_estimator.python.estimator.canned.linear import LinearClassifier from tensorflow_estimator.python.estimator.canned.linear import LinearEstimator from tensorflow_estimator.python.estimator.canned.linear import LinearRegressor from tensorflow_estimator.python.estimator.canned.parsing_utils import classifier_parse_example_spec from tensorflow_estimator.python.estimator.canned.parsing_utils import regressor_parse_example_spec from tensorflow_estimator.python.estimator.canned.rnn import RNNClassifier from tensorflow_estimator.python.estimator.canned.rnn import RNNEstimator from tensorflow_estimator.python.estimator.early_stopping import * from tensorflow_estimator.python.estimator.estimator import Estimator from tensorflow_estimator.python.estimator.estimator import VocabInfo from tensorflow_estimator.python.estimator.estimator import WarmStartSettings from tensorflow_estimator.python.estimator.export import export_lib as export from tensorflow_estimator.python.estimator.exporter import Exporter from tensorflow_estimator.python.estimator.exporter import FinalExporter from tensorflow_estimator.python.estimator.exporter import LatestExporter from tensorflow_estimator.python.estimator.extenders import add_metrics from tensorflow_estimator.python.estimator.head.base_head import Head from tensorflow_estimator.python.estimator.head.binary_class_head import BinaryClassHead from tensorflow_estimator.python.estimator.head.multi_class_head import MultiClassHead from tensorflow_estimator.python.estimator.head.multi_head import MultiHead from tensorflow_estimator.python.estimator.head.multi_label_head import MultiLabelHead from tensorflow_estimator.python.estimator.head.regression_head import LogisticRegressionHead from tensorflow_estimator.python.estimator.head.regression_head import PoissonRegressionHead from tensorflow_estimator.python.estimator.head.regression_head import RegressionHead from tensorflow_estimator.python.estimator.hooks import basic_session_run_hooks from tensorflow_estimator.python.estimator.hooks import hooks from tensorflow_estimator.python.estimator.hooks import session_run_hook from tensorflow_estimator.python.estimator.inputs import inputs from tensorflow_estimator.python.estimator.keras_lib import model_to_estimator from tensorflow_estimator.python.estimator.mode_keys import ModeKeys from tensorflow_estimator.python.estimator.model_fn import call_logit_fn from tensorflow_estimator.python.estimator.model_fn import EstimatorSpec from tensorflow_estimator.python.estimator.run_config import RunConfig from tensorflow_estimator.python.estimator.tpu.tpu_estimator import TPUEstimator from tensorflow_estimator.python.estimator.training import EvalSpec from tensorflow_estimator.python.estimator.training import train_and_evaluate from tensorflow_estimator.python.estimator.training import TrainSpec # pylint: enable=unused-import,line-too-long,wildcard-import ================================================ FILE: tensorflow_estimator/python/estimator/estimator_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for Estimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import functools import glob import json import os import socket import tempfile import numpy as np import six import tensorflow.compat.v1 as tf from google.protobuf import text_format from absl.testing import parameterized from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import combinations from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.lib.io import file_io from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops.random_ops import random_uniform from tensorflow.python.platform import tf_logging as logging from tensorflow.python.platform import gfile from tensorflow.python.profiler import profiler_v2 as profiler from tensorflow.python.saved_model import loader_impl from tensorflow.python.saved_model import path_helpers from tensorflow.python.saved_model import tag_constants from tensorflow.python.training import checkpoint_state_pb2 from tensorflow.python.training import saver_test_utils from tensorflow.python.training import training from tensorflow.python.util import function_utils from tensorflow_estimator.python.estimator import training as estimator_training from tensorflow_estimator.python.estimator import estimator from tensorflow_estimator.python.estimator.util import tf_keras_v1 from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator import run_config from tensorflow_estimator.python.estimator.export import export_lib from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.mode_keys import ModeKeys _TMP_DIR = '/tmp' _ANOTHER_TMP_DIR = '/another_tmp' def dummy_model_fn(features, labels, params): _, _, _ = features, labels, params def summaries_with_matching_keyword(keyword, dir_): """Yields summary protos matching given keyword from event file.""" tf.summary.FileWriterCache.clear() event_paths = glob.glob(os.path.join(dir_, 'events*')) for event in tf.train.summary_iterator(event_paths[-1]): if event.summary is not None: for value in event.summary.value: if keyword in value.tag: yield event.summary def check_eventfile_for_keyword(keyword, dir_): """Checks event files for the keyword.""" return any(summaries_with_matching_keyword(keyword, dir_)) def get_mock_saver(): real_saver = tf.train.Saver() return tf.test.mock.Mock(wraps=real_saver, saver_def=real_saver.saver_def) class EstimatorInheritanceConstraintTest(tf.test.TestCase): """Tests that sub classes cannot override methods of Estimator.""" @property def random_estimator(self): switch = np.random.random() return estimator.EstimatorV2 if switch > 0.5 else estimator.EstimatorV2 def test_override_a_method(self): class _Estimator(self.random_estimator): def __init__(self): super(_Estimator, self).__init__(model_fn=dummy_model_fn) def predict(self, input_fn, predict_keys=None, hooks=None): pass with self.assertRaisesRegexp( ValueError, 'cannot override members of Estimator.*predict'): _Estimator() def test_extension_of_api_is_ok(self): class _Estimator(self.random_estimator): def __init__(self): super(_Estimator, self).__init__(model_fn=dummy_model_fn) def predict_proba(self, input_fn, predict_keys=None, hooks=None): pass _Estimator() def test_override_allowed_method(self): class _Estimator(self.random_estimator): def __init__(self): super(_Estimator, self).__init__(model_fn=dummy_model_fn) def _tf_api_names(self): pass _Estimator() class EstimatorConstructorTest(tf.test.TestCase): def test_config_must_be_a_run_config(self): with self.assertRaisesRegexp(ValueError, 'an instance of `RunConfig`'): estimator.EstimatorV2(model_fn=None, config='NotARunConfig') def test_model_fn_must_be_provided(self): with self.assertRaisesRegexp(ValueError, 'model_fn.* must be'): estimator.EstimatorV2(model_fn=None) def test_property_accessors(self): def model_fn(features, labels, params): _, _, _ = features, labels, params class FakeConfig(run_config.RunConfig): pass params = {'hidden_layers': [3, 4]} est = estimator.EstimatorV2( model_fn=model_fn, model_dir='bla', config=FakeConfig(), params=params) self.assertTrue(isinstance(est.config, FakeConfig)) self.assertEqual(params, est.params) self.assertEqual('bla', est.model_dir) def test_default_config(self): def model_fn(features, labels): _, _ = features, labels est = estimator.EstimatorV2(model_fn=model_fn) self.assertTrue(isinstance(est.config, run_config.RunConfig)) self.assertTrue(est._session_config.allow_soft_placement) rewrite_options = est._session_config.graph_options.rewrite_options self.assertEqual(rewrite_options.meta_optimizer_iterations, rewriter_config_pb2.RewriterConfig.ONE) def test_default_model_dir(self): def model_fn(features, labels): _, _ = features, labels with tf.test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR): est = estimator.EstimatorV2(model_fn=model_fn) self.assertEqual(_TMP_DIR, est.config.model_dir) self.assertEqual(_TMP_DIR, est.model_dir) def test_model_dir_in_constructor(self): def model_fn(features, labels): _, _ = features, labels est = estimator.EstimatorV2(model_fn=model_fn, model_dir=_TMP_DIR) self.assertEqual(_TMP_DIR, est.config.model_dir) self.assertEqual(_TMP_DIR, est.model_dir) def test_empty_model_dir(self): def model_fn(features, labels): _, _ = features, labels with tf.test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR): est = estimator.EstimatorV2(model_fn=model_fn, model_dir='') self.assertEqual(_TMP_DIR, est.config.model_dir) self.assertEqual(_TMP_DIR, est.model_dir) def test_model_dir_in_run_config(self): class FakeConfig(run_config.RunConfig): @property def model_dir(self): return _TMP_DIR def model_fn(features, labels): _, _ = features, labels est = estimator.EstimatorV2(model_fn=model_fn, config=FakeConfig()) self.assertEqual(_TMP_DIR, est.config.model_dir) self.assertEqual(_TMP_DIR, est.model_dir) def test_same_model_dir_in_constructor_and_run_config(self): class FakeConfig(run_config.RunConfig): @property def model_dir(self): return _TMP_DIR def model_fn(features, labels): _, _ = features, labels est = estimator.EstimatorV2( model_fn=model_fn, config=FakeConfig(), model_dir=_TMP_DIR) self.assertEqual(_TMP_DIR, est.config.model_dir) self.assertEqual(_TMP_DIR, est.model_dir) def test_different_model_dir_in_constructor_and_run_config(self): class FakeConfig(run_config.RunConfig): @property def model_dir(self): return _TMP_DIR def model_fn(features, labels): _, _ = features, labels with self.assertRaisesRegexp( ValueError, '`model_dir` are set both in constructor and `RunConfig`, but ' 'with different values'): estimator.EstimatorV2( model_fn=model_fn, config=FakeConfig(), model_dir=_ANOTHER_TMP_DIR) def test_model_fn_args_must_include_features(self): def model_fn(x, labels): _, _ = x, labels with self.assertRaisesRegexp(ValueError, 'features'): estimator.EstimatorV2(model_fn=model_fn) def test_model_fn_args_labels_is_optional(self): def model_fn(features): _ = features estimator.EstimatorV2(model_fn=model_fn) def test_if_params_provided_then_model_fn_should_accept_it(self): def model_fn(features, labels): _, _ = features, labels estimator.EstimatorV2(model_fn=model_fn) with self.assertRaisesRegexp(ValueError, 'params'): estimator.EstimatorV2(model_fn=model_fn, params={'hidden_layers': 4}) def test_internal_params_is_a_deepcopy(self): def model_fn(features, labels, params): _, _, _ = features, labels, params params = {'hidden_layers': 4} est = estimator.EstimatorV2(model_fn=model_fn, params=params) params['hidden_layers'] = 5 self.assertEqual(4, est.params['hidden_layers']) def test_not_known_model_fn_args(self): def model_fn(features, labels, something): _, _, _ = features, labels, something with self.assertRaisesRegexp(ValueError, 'something'): estimator.EstimatorV2(model_fn=model_fn) def test_not_known_model_fn_args_handled_by_lambda(self): def model_fn(features, labels, something): _, _, _ = features, labels, something new_model_fn = lambda features, labels: model_fn( # pylint: disable=g-long-lambda features, labels, 'something') estimator.EstimatorV2(model_fn=new_model_fn) def test_if_model_fn_is_a_member_function_of_a_class(self): class ModelFnClass(object): def __init__(self): estimator.EstimatorV2(model_fn=self.model_fn) def model_fn(self, features, labels, mode): _, _, _ = features, labels, mode ModelFnClass() def test_model_fn_property_binds_params(self): def model_fn(features, labels, mode, config, params): _, _, _, _, _ = features, labels, mode, config, params est = estimator.EstimatorV2(model_fn=model_fn) model_fn_args = function_utils.fn_args(est.model_fn) self.assertEqual( set(['features', 'labels', 'mode', 'config']), set(model_fn_args)) def test_model_fn_property_returns_fixed_signature(self): def model_fn(features, labels): _, _ = features, labels est = estimator.EstimatorV2(model_fn=model_fn) model_fn_args = function_utils.fn_args(est.model_fn) self.assertEqual( set(['features', 'labels', 'mode', 'config']), set(model_fn_args)) def dummy_input_fn(): return ({'x': tf.constant([[1], [1]])}, tf.constant([[1], [1]])) def model_fn_global_step_incrementer(features, labels, mode): _, _ = features, labels global_step = tf.train.get_global_step() return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(1.), train_op=tf.assign_add(global_step, 1)) def assert_features_op(expected_features, actual_features): return [ tf.debugging.assert_equal( expected_features[k], actual_features[k], name='assert_%s' % k) for k in expected_features ] def _estimator_spec(expected_features, expected_labels, actual_features, actual_labels, mode): assert_ops = tuple( assert_features_op(expected_features, actual_features) + [ tf.debugging.assert_equal( expected_labels, actual_labels, name='assert_labels') ]) global_step = tf.train.get_global_step() with tf.control_dependencies(assert_ops): return model_fn_lib.EstimatorSpec( mode=mode, predictions=tf.constant(0.), loss=tf.constant(0.), train_op=tf.assign_add(global_step, 1)) def _make_input_fn(features, labels): def _input_fn(): return {k: tf.constant(v) for k, v in six.iteritems(features) }, tf.constant(labels) return _input_fn class EstimatorTrainTest(tf.test.TestCase): def test_callable_model_fn(self): expected_features = {'x': 42., 'y': 43.} expected_labels = 44. model_fn_call_count = [0] test_self = self class ModelFn(object): def __call__(self, features, labels): model_fn_call_count[0] += 1 test_self.assertItemsEqual(expected_features.keys(), features.keys()) return _estimator_spec(expected_features, expected_labels, features, labels, ModeKeys.TRAIN) with self.assertRaisesRegexp(ValueError, 'does not include params'): estimator.EstimatorV2(model_fn=ModelFn(), params={'a': 'b'}) est = estimator.EstimatorV2( model_fn=ModelFn(), config=run_config.RunConfig()) self.assertEqual(0, model_fn_call_count[0]) est.train( input_fn=_make_input_fn(expected_features, expected_labels), steps=1) self.assertEqual(1, model_fn_call_count[0]) def test_callable_input_fn(self): expected_mode = ModeKeys.TRAIN expected_params = {'batch_size': 10} expected_config = run_config.RunConfig().replace(tf_random_seed=4321) input_fn_call_count = [0] def _model_fn(features, labels, mode, params, config): del params, config return model_fn_global_step_incrementer(features, labels, mode) test_self = self class InputFn(object): def __call__(self, mode, params, config): input_fn_call_count[0] += 1 test_self.assertEqual(expected_mode, mode) test_self.assertEqual(expected_params, params) test_self.assertEqual(4321, config.tf_random_seed) return dummy_input_fn() est = estimator.EstimatorV2( model_fn=_model_fn, params=expected_params, config=expected_config) self.assertEqual(0, input_fn_call_count[0]) est.train(InputFn(), steps=1) self.assertEqual(1, input_fn_call_count[0]) def test_nested_input_fn(self): expected_params = {'batch_size': 10} def _input_fn(): dataset_features = tf.data.Dataset.from_tensor_slices( (random_uniform([4]), random_uniform([4, 100], maxval=100, dtype=tf.dtypes.int32))) dataset_labels = tf.data.Dataset.from_tensor_slices( random_uniform([4, 10])) dataset = tf.data.Dataset.zip((dataset_features, dataset_labels)) dataset = dataset.repeat(-1) iterator = tf.data.make_initializable_iterator(dataset) return iterator.get_next() def _model_fn(features, labels, mode, params, config): del params, config return model_fn_global_step_incrementer(features, labels, mode) expected_config = run_config.RunConfig().replace(tf_random_seed=4321) est = estimator.EstimatorV2( model_fn=_model_fn, params=expected_params, config=expected_config) est.train(_input_fn, steps=4) def test_input_fn_args(self): expected_mode = ModeKeys.TRAIN expected_params = {'batch_size': 10} expected_config = run_config.RunConfig().replace(tf_random_seed=4321) input_fn_call_count = [0] def _model_fn(features, labels, mode, params, config): del params, config return model_fn_global_step_incrementer(features, labels, mode) def _input_fn(mode, params, config): input_fn_call_count[0] += 1 self.assertEqual(expected_mode, mode) self.assertEqual(expected_params, params) self.assertEqual(4321, config.tf_random_seed) return dummy_input_fn() est = estimator.EstimatorV2( model_fn=_model_fn, params=expected_params, config=expected_config) self.assertEqual(0, input_fn_call_count[0]) est.train(_input_fn, steps=1) self.assertEqual(1, input_fn_call_count[0]) def test_minimal_model_fn_args(self): expected_features = {'x': 4, 'y': 5} def _input_fn(): return expected_features model_fn_call_count = [0] def _model_fn(features): model_fn_call_count[0] += 1 self.assertItemsEqual(expected_features.keys(), features.keys()) with tf.control_dependencies( assert_features_op(expected_features, features)): return model_fn_lib.EstimatorSpec( mode=None, predictions=tf.constant(0.), loss=tf.constant(0.), train_op=tf.assign_add(tf.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) self.assertEqual(0, model_fn_call_count[0]) est.train(input_fn=_input_fn, steps=1) self.assertEqual(1, model_fn_call_count[0]) def test_labels_should_be_none_if_model_fn_does_not_use_labels(self): def _input_fn_with_labels(): return {'x': 4, 'y': 5}, [4] def _model_fn(features): _ = features return model_fn_lib.EstimatorSpec( mode=None, predictions=tf.constant(0.), loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) with self.assertRaisesRegexp(ValueError, 'model_fn does not take labels'): est.train(input_fn=_input_fn_with_labels, steps=1) def test_input_fn_len_should_be_2_if_tuple_or_list(self): def _input_fn(): return 4, 5, 6 def _model_fn(features): _ = features est = estimator.EstimatorV2(model_fn=_model_fn) with self.assertRaisesRegexp(ValueError, 'len 2 tuple'): est.train(input_fn=_input_fn, steps=1) def test_all_model_fn_args(self): expected_features = {'x': 42., 'y': 43.} expected_labels = 44. expected_params = {'some_param': 'some_value'} expected_config = run_config.RunConfig() expected_config.i_am_test = True # TODO(ptucker): We have to roll our own mock since Estimator._get_arguments # doesn't work with mock fns. model_fn_call_count = [0] # Note that args are all passed by keyword, so can be in any order. def _model_fn(mode, params, features, labels, config): model_fn_call_count[0] += 1 self.assertItemsEqual(expected_features.keys(), features.keys()) self.assertEqual(ModeKeys.TRAIN, mode) self.assertEqual(expected_params, params) self.assertTrue(config.i_am_test) return _estimator_spec(expected_features, expected_labels, features, labels, mode) est = estimator.EstimatorV2( model_fn=_model_fn, params=expected_params, config=expected_config) self.assertEqual(0, model_fn_call_count[0]) est.train( input_fn=_make_input_fn(expected_features, expected_labels), steps=1) self.assertEqual(1, model_fn_call_count[0]) def test_partial_model_fn_args(self): expected_features = {'x': 42., 'y': 43.} expected_labels = 44. expected_params = {'some_param': 'some_value'} expected_config = run_config.RunConfig() expected_config.i_am_test = True expected_foo = 45. expected_bar = 46. # TODO(ptucker): We have to roll our own mock since Estimator._get_arguments # doesn't work with mock fns. model_fn_call_count = [0] def _model_fn(features, labels, foo, mode, params, config, bar): model_fn_call_count[0] += 1 self.assertEqual(expected_foo, foo) self.assertEqual(expected_bar, bar) self.assertItemsEqual(expected_features.keys(), features.keys()) self.assertEqual(ModeKeys.TRAIN, mode) self.assertEqual(expected_params, params) self.assertTrue(config.i_am_test) return _estimator_spec(expected_features, expected_labels, features, labels, mode) partial_model_fn = functools.partial( _model_fn, foo=expected_foo, bar=expected_bar) est = estimator.EstimatorV2( model_fn=partial_model_fn, params=expected_params, config=expected_config) self.assertEqual(0, model_fn_call_count[0]) est.train( input_fn=_make_input_fn(expected_features, expected_labels), steps=1) self.assertEqual(1, model_fn_call_count[0]) def test_model_fn_must_return_estimator_spec(self): def model_fn(features, labels): _, _ = features, labels return 'NotGoodNotGood' est = estimator.EstimatorV2(model_fn=model_fn) with self.assertRaisesRegexp(ValueError, 'EstimatorSpec'): est.train(dummy_input_fn, steps=1) def test_run_train_op_and_saves_at_the_end(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) est.train(dummy_input_fn, steps=5) self.assertEqual( 5, estimator._load_global_step_from_checkpoint_dir(est.model_dir)) def test_loss_summary(self): est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig(save_summary_steps=1)) est.train(dummy_input_fn, steps=1) # Make sure nothing is stuck in limbo. tf.summary.FileWriterCache.clear() if check_eventfile_for_keyword('loss', est.model_dir): return self.fail('{} should be part of reported summaries.'.format('loss')) def test_latest_checkpoint(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) self.assertIsNone(est.latest_checkpoint()) est.train(dummy_input_fn, steps=5) self.assertIsNotNone(est.latest_checkpoint()) self.assertTrue(est.latest_checkpoint().startswith(est.model_dir)) def test_steps_and_saves_reloads(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) est.train(dummy_input_fn, steps=5) self.assertEqual( 5, estimator._load_global_step_from_checkpoint_dir(est.model_dir)) est.train(dummy_input_fn, steps=5) self.assertEqual( 10, estimator._load_global_step_from_checkpoint_dir(est.model_dir)) def test_warm_starts(self): def _make_model_fn(x): def _variable_creating_model_fn(features, labels, mode): _, _ = features, labels tf.get_variable('x', initializer=x) global_step = tf.train.get_global_step() return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(1.), train_op=tf.assign_add(global_step, 1)) return _variable_creating_model_fn est = estimator.EstimatorV2(model_fn=_make_model_fn(42.)) est.train(dummy_input_fn, steps=10) warm_started_est = estimator.EstimatorV2( model_fn=_make_model_fn(36.), warm_start_from=est.model_dir) warm_started_est.train(dummy_input_fn, steps=5) # warm_start is called after the model_fn, so x should have the value # from the checkpoint. self.assertEqual(42., warm_started_est.get_variable_value('x')) # global_step should not be warm-started. self.assertEqual( 5, estimator._load_global_step_from_checkpoint_dir( warm_started_est.model_dir)) @test_util.run_v1_only('b/119219961') def test_warm_starts_from_savedmodel(self): def _make_model_fn(x): def _variable_creating_and_export_model_fn(features, labels, mode): _, _ = features, labels tf.get_variable('x', initializer=x) global_step = tf.train.get_global_step() return model_fn_lib.EstimatorSpec( mode, predictions={'y': tf.constant(1.0)}, loss=tf.constant(1.), train_op=tf.assign_add(global_step, 1), export_outputs={ 'test': export_lib.ClassificationOutput( tf.constant([4.2]), tf.constant(['label'])) }) return _variable_creating_and_export_model_fn est = estimator.EstimatorV2(model_fn=_make_model_fn(42.)) est.train(dummy_input_fn, steps=10) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) tmpdir = tempfile.mkdtemp() export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, serving_input_receiver_fn) warm_started_est = estimator.EstimatorV2( model_fn=_make_model_fn(36.), warm_start_from=export_dir) warm_started_est.train(dummy_input_fn, steps=5) # warm_start is called after the model_fn, so x should have the value # from the SavedModel. self.assertEqual(42., warm_started_est.get_variable_value('x')) def test_max_step(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) est.train(dummy_input_fn, max_steps=5) self.assertEqual( 5, estimator._load_global_step_from_checkpoint_dir(est.model_dir)) est.train(dummy_input_fn, max_steps=5) self.assertEqual( 5, estimator._load_global_step_from_checkpoint_dir(est.model_dir)) def test_checkpoint_contains_relative_paths(self): tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2( model_dir=tmpdir, model_fn=model_fn_global_step_incrementer) est.train(dummy_input_fn, steps=5) checkpoint_file_content = file_io.read_file_to_string( os.path.join(tmpdir, 'checkpoint')) ckpt = checkpoint_state_pb2.CheckpointState() text_format.Merge(checkpoint_file_content, ckpt) self.assertEqual(ckpt.model_checkpoint_path, 'model.ckpt-5') # TODO(b/78461127): Please modify tests to not directly rely on names of # checkpoints. self.assertAllEqual(['model.ckpt-0', 'model.ckpt-5'], ckpt.all_model_checkpoint_paths) def test_train_save_copy_reload(self): tmpdir = tempfile.mkdtemp() model_dir1 = os.path.join(tmpdir, 'model_dir1') est1 = estimator.EstimatorV2( model_dir=model_dir1, model_fn=model_fn_global_step_incrementer) est1.train(dummy_input_fn, steps=5) # We have to clear the cache before we can rename the directory, # otherwise open file handles will prevent the delete on Windows. tf.summary.FileWriterCache.clear() model_dir2 = os.path.join(tmpdir, 'model_dir2') os.renames(model_dir1, model_dir2) est2 = estimator.EstimatorV2( model_dir=model_dir2, model_fn=model_fn_global_step_incrementer) self.assertEqual( 5, estimator._load_global_step_from_checkpoint_dir(est2.model_dir)) est2.train(dummy_input_fn, steps=5) self.assertEqual( 10, estimator._load_global_step_from_checkpoint_dir(est2.model_dir)) def test_steps0_raises_error(self): est = estimator.EstimatorV2(model_fn=_model_fn_with_eval_metric_ops) with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'): est.train(dummy_input_fn, steps=0) def test_steps_negative_raises_error(self): est = estimator.EstimatorV2(model_fn=_model_fn_with_eval_metric_ops) with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'): est.train(dummy_input_fn, steps=-1) def test_max_steps0_raises_error(self): est = estimator.EstimatorV2(model_fn=_model_fn_with_eval_metric_ops) with self.assertRaisesRegexp(ValueError, 'Must specify max_steps > 0'): est.train(dummy_input_fn, max_steps=0) def test_max_steps_negative_raises_error(self): est = estimator.EstimatorV2(model_fn=_model_fn_with_eval_metric_ops) with self.assertRaisesRegexp(ValueError, 'Must specify max_steps > 0'): est.train(dummy_input_fn, max_steps=-1) def test_scaffold_is_used(self): self.is_init_fn_called = False def _init_fn(scaffold, sess): _, _ = scaffold, sess self.is_init_fn_called = True def _model_fn_scaffold(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), scaffold=tf.train.Scaffold(init_fn=_init_fn)) est = estimator.EstimatorV2(model_fn=_model_fn_scaffold) est.train(dummy_input_fn, steps=1) self.assertTrue(self.is_init_fn_called) def test_hooks_should_be_session_run_hook(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'): est.train(dummy_input_fn, steps=1, hooks=['NotAHook']) def test_training_hooks_are_used(self): chief_hook = tf.test.mock.MagicMock( wraps=tf.train.SessionRunHook(), spec=tf.train.SessionRunHook) hook = tf.test.mock.MagicMock( wraps=tf.train.SessionRunHook(), spec=tf.train.SessionRunHook) def _model_fn_hooks(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), training_chief_hooks=[chief_hook], training_hooks=[hook]) est = estimator.EstimatorV2(model_fn=_model_fn_hooks) self.assertFalse(chief_hook.begin.called) self.assertFalse(hook.begin.called) est.train(dummy_input_fn, steps=1) self.assertTrue(chief_hook.begin.called) self.assertTrue(hook.begin.called) def test_saving_listeners_are_used(self): listener = tf.test.mock.Mock(spec=tf.train.CheckpointSaverListener) listener.after_save.return_value = None est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig(save_checkpoints_steps=10)) est.train(dummy_input_fn, steps=26, saving_listeners=[listener]) self.assertEqual(4, listener.before_save.call_count) self.assertEqual(4, listener.after_save.call_count) def test_saver_hook_should_exist_to_use_saving_listeners(self): listener = tf.test.mock.Mock(spec=tf.train.CheckpointSaverListener) est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig( save_checkpoints_steps=None, save_checkpoints_secs=None)) with self.assertRaisesRegexp(ValueError, 'CheckpointSaverHook to use saving_listeners'): est.train(dummy_input_fn, steps=1, saving_listeners=[listener]) def test_listeners_should_be_listeners(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) with self.assertRaisesRegexp(TypeError, 'must be a list of CheckpointSaverListener'): est.train(dummy_input_fn, steps=1, saving_listeners=['not-a-listener']) def test_chief_only_hook_should_not_be_called_on_non_chief(self): chief_hook = tf.test.mock.MagicMock( wraps=tf.train.SessionRunHook(), spec=tf.train.SessionRunHook) hook = tf.test.mock.MagicMock( wraps=tf.train.SessionRunHook(), spec=tf.train.SessionRunHook) def _model_fn_hooks(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), training_chief_hooks=[chief_hook], training_hooks=[hook]) class NonChiefRunConfig(run_config.RunConfig): @property def is_chief(self): # pylint: disable=g-wrong-blank-lines return False # Mocking the SessionManager.wait_for_session, so that worker doesn't wait # for chief. def get_initialized_session(*args, **kwargs): # Session doesn't take 'max_wait_secs' argument. kwargs.pop('max_wait_secs', None) scaffold = tf.train.Scaffold().finalize() sess = tf.Session(*args, **kwargs) sess.run(scaffold.init_op) return sess with tf.test.mock.patch.object( tf.train.SessionManager, 'wait_for_session', side_effect=get_initialized_session): est = estimator.EstimatorV2( model_fn=_model_fn_hooks, config=NonChiefRunConfig()) self.assertFalse(chief_hook.begin.called) self.assertFalse(hook.begin.called) est.train(dummy_input_fn, steps=1) self.assertFalse(chief_hook.begin.called) self.assertTrue(hook.begin.called) def test_features_labels_mode(self): given_features = {'test-features': [[1], [1]]} given_labels = {'test-labels': [[1], [1]]} def _input_fn(): return given_features, given_labels def _model_fn(features, labels, mode): self.features, self.labels, self.mode = features, labels, mode return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[0.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(_input_fn, steps=1) self.assertEqual(given_features, self.features) self.assertEqual(given_labels, self.labels) self.assertEqual(ModeKeys.TRAIN, self.mode) def test_graph_initialization_global_step_and_random_seed(self): expected_random_seed = run_config.RunConfig().tf_random_seed def _model_fn(features, labels, mode): _, _, _ = features, labels, mode self.assertIsNotNone(tf.train.get_global_step()) self.assertEqual(expected_random_seed, tf.get_default_graph().seed) return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[0.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) def test_config_should_not_be_evaluator_or_ps(self): class FakeEvaluatorConfig(run_config.RunConfig): @property def task_type(self): return run_config.TaskType.EVALUATOR est = estimator.EstimatorV2( model_fn=dummy_model_fn, config=FakeEvaluatorConfig()) with self.assertRaisesRegexp(ValueError, 'train_and_evaluate'): est.train(dummy_input_fn, steps=1) def test_master_distributed_hooks(self): tf_config = json.dumps({ 'cluster': { run_config.TaskType.PS: ['localhost:1234'], run_config.TaskType.WORKER: ['localhost:1235'], run_config.TaskType.MASTER: ['localhost:1236'] }, 'task': { 'type': run_config.TaskType.MASTER, 'index': 0 } }) with tf.test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig()) with tf.test.mock.patch.object(training, 'MonitoredTrainingSession') as mock_sess: est.train(dummy_input_fn, steps=1) self.assertFalse( any( isinstance(hook, tf.train.SummarySaverHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertFalse( any( isinstance(hook, tf.train.StepCounterHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps']) self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps']) def test_master_distributed_hooks_for_worker_0(self): tf_config = json.dumps({ 'cluster': { run_config.TaskType.PS: ['localhost:1234'], run_config.TaskType.WORKER: ['localhost:1235'], run_config.TaskType.MASTER: ['localhost:1236'] }, 'task': { 'type': run_config.TaskType.WORKER, 'index': 0 } }) with tf.test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig()) with tf.test.mock.patch.object(training, 'MonitoredTrainingSession') as mock_sess: est.train(dummy_input_fn, steps=1) self.assertTrue( any( isinstance(hook, tf.train.SummarySaverHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertTrue( any( isinstance(hook, tf.train.StepCounterHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps']) self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps']) def test_master_distributed_hooks_for_worker_nonzero(self): tf_config = json.dumps({ 'cluster': { run_config.TaskType.PS: ['localhost:1234'], run_config.TaskType.WORKER: ['localhost:1235', 'localhost:1237'], run_config.TaskType.MASTER: ['localhost:1236'] }, 'task': { 'type': run_config.TaskType.WORKER, 'index': 1 } }) with tf.test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig()) with tf.test.mock.patch.object(training, 'MonitoredTrainingSession') as mock_sess: est.train(dummy_input_fn, steps=1) self.assertFalse( any( isinstance(hook, tf.train.SummarySaverHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertFalse( any( isinstance(hook, tf.train.StepCounterHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps']) self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps']) def test_master_hooks_single_replica(self): tf_config = json.dumps({ 'cluster': { run_config.TaskType.MASTER: ['localhost:1234'] }, 'task': { 'type': run_config.TaskType.MASTER, 'index': 0 } }) with tf.test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig( save_summary_steps=100, log_step_count_steps=200)) with tf.test.mock.patch.object(training, 'MonitoredTrainingSession') as mock_sess: est.train(dummy_input_fn, steps=1) self.assertFalse( any( isinstance(hook, tf.train.SummarySaverHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertFalse( any( isinstance(hook, tf.train.StepCounterHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertEqual(100, mock_sess.call_args[1]['save_summaries_steps']) self.assertEqual(200, mock_sess.call_args[1]['log_step_count_steps']) def test_master_hooks_single_replica_with_ps(self): tf_config = json.dumps({ 'cluster': { run_config.TaskType.MASTER: ['localhost:1234'], run_config.TaskType.PS: ['localhost: 1235'], }, 'task': { 'type': run_config.TaskType.MASTER, 'index': 0 } }) with tf.test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig( save_summary_steps=100, log_step_count_steps=200)) with tf.test.mock.patch.object(training, 'MonitoredTrainingSession') as mock_sess: est.train(dummy_input_fn, steps=1) self.assertFalse( any( isinstance(hook, tf.train.SummarySaverHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertFalse( any( isinstance(hook, tf.train.StepCounterHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertEqual(100, mock_sess.call_args[1]['save_summaries_steps']) self.assertEqual(200, mock_sess.call_args[1]['log_step_count_steps']) def test_hooks_with_distributed_collective_ops(self): if tf.executing_eagerly(): self.skipTest('n/a: legacy graph only') tf_config = json.dumps({ 'cluster': { run_config.TaskType.WORKER: ['', ''], }, 'task': { 'type': run_config.TaskType.WORKER, 'index': 0 } }) # We let it skip setting eager context in multi-worker path by creating a # single-worker strategy and then passing cluster info into it. strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() strategy.configure( cluster_spec={ run_config.TaskType.WORKER: ['', ''], }, task_type=run_config.TaskType.WORKER, task_id=0) with tf.test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): config = run_config.RunConfig( train_distribute=strategy, save_summary_steps=1000, save_checkpoints_steps=500) config._distribute_coordinator_mode = None # Skip distribute coordintor. est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, config=config) def input_fn(): return tf.data.Dataset.from_tensors(({ 'x': tf.constant([[1], [1]]) }, tf.constant([[1], [1]]))) with tf.test.mock.patch.object(training, 'MonitoredTrainingSession') as mock_sess: est.train(input_fn, steps=1) self.assertFalse( any( isinstance(hook, tf.train.SummarySaverHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertFalse( any( isinstance(hook, tf.train.StepCounterHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertFalse( any( isinstance(hook, tf.train.CheckpointSaverHook) for hook in mock_sess.call_args[1]['hooks'])) self.assertEqual(1000, mock_sess.call_args[1]['save_summaries_steps']) self.assertEqual(500, mock_sess.call_args[1]['save_checkpoint_steps']) self.assertEqual(100, mock_sess.call_args[1]['log_step_count_steps']) def _model_fn_with_eval_metric_ops(features, labels, mode, params): _, _ = features, labels global_step = tf.train.get_global_step() loss = tf.constant(1.) metric_name_1 = params.get('metric_name') or 'metric' metric_value_1 = params.get('metric_value') or 2. metric_name_2 = params.get('metric_name_2') or 'metric2' metric_value_2 = params.get('metric_value_2') or 2. metric_update_op = loss.op metric_tensor = control_flow_ops.with_dependencies( [metric_update_op], tf.constant(metric_value_1)) mean = tf_keras_v1.metrics.Mean() mean.update_state(metric_value_2) return model_fn_lib.EstimatorSpec( mode, loss=loss, predictions={'predictions': tf.constant(1.)}, train_op=tf.assign_add(global_step, 1), eval_metric_ops={ metric_name_1: (metric_tensor, metric_update_op), metric_name_2: mean, }) class _StepCounterHook(tf.train.SessionRunHook): """Hooks that counts the number of times it is called.""" def __init__(self): self._steps = 0 def before_run(self, run_context): del run_context self._steps += 1 @property def steps(self): return self._steps class EstimatorGetVariablesTest(tf.test.TestCase): def test_model_should_be_trained(self): def _model_fn(features, labels, mode): _, _ = features, labels tf.Variable(1., name='one') return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) with self.assertRaisesRegexp(ValueError, 'not find trained model'): est.get_variable_names() with self.assertRaisesRegexp(ValueError, 'not find trained model'): est.get_variable_value('one') def test_get_variable_utils(self): def _model_fn(features, labels, mode): _, _ = features, labels tf.Variable(1., name='one') tf.Variable(3., name='three') return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(input_fn=dummy_input_fn, steps=1) self.assertEqual( set(['one', 'three', 'global_step']), set(est.get_variable_names())) self.assertEqual(1., est.get_variable_value('one')) self.assertEqual(3., est.get_variable_value('three')) class EstimatorTraceTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): self._profiler_dir = os.path.join(self.get_temp_dir(), 'profiler') expected_features = {'x': 42., 'y': 43.} expected_labels = 44. model_fn_call_count = [0] input_fn = _make_input_fn(expected_features, expected_labels) class ModelFn(object): def __call__(self, features, labels): EstimatorTraceTest.model_fn_call_count[0] += 1 return _estimator_spec(EstimatorTraceTest.expected_features, EstimatorTraceTest.expected_labels, features, labels, ModeKeys.TRAIN) class EstimatorDatasetIntegrationTest(tf.test.TestCase): """Tests dataset integration.""" def test_returned_by_input_fn(self): def _input_fn(): return tf.data.Dataset.from_tensors(([1.], [2.])) def _model_fn(features, labels, mode): return model_fn_lib.EstimatorSpec( mode, loss=features + labels, # 1 + 2 train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(_input_fn, steps=1) scores = est.evaluate(_input_fn, steps=1) self.assertEqual(3., scores[model_fn_lib.LOSS_METRIC_KEY]) def test_with_none_labels(self): def _input_fn(): return tf.data.Dataset.from_tensors([7.]) def _model_fn(features, labels, mode): self.assertIsNone(labels) return model_fn_lib.EstimatorSpec( mode, loss=features, # 7 train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(_input_fn, steps=1) scores = est.evaluate(_input_fn, steps=1) self.assertEqual(7., scores[model_fn_lib.LOSS_METRIC_KEY]) def test_with_predict(self): def _input_fn(): return tf.data.Dataset.from_tensors([10.]) def _model_fn(features, labels, mode): _ = labels return model_fn_lib.EstimatorSpec( mode, predictions=features, # 10 loss=features, # 10 train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(_input_fn, steps=1) self.assertEqual([10.], next(est.predict(input_fn=_input_fn))) def test_batching(self): def _input_fn(): return tf.data.Dataset.from_tensor_slices( ([[1.], [2.]], [[10.], [20.]])).batch(1) def _model_fn(features, labels, mode): return model_fn_lib.EstimatorSpec( mode, predictions=features, loss=features + (0 if labels is None else labels), # 11, 22 train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(_input_fn) scores = est.evaluate(_input_fn) # (11 + 22)/2 = 16.5 self.assertEqual(16.5, scores[model_fn_lib.LOSS_METRIC_KEY]) self.assertEqual([1., 2.], list(est.predict(_input_fn))) class EstimatorEvaluateTest(tf.test.TestCase): def test_eval_dir(self): est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer, model_dir='some_path') expected_eval_dir = os.path.join('some_path', 'eval') self.assertEqual(expected_eval_dir, est.eval_dir()) expected_eval_dir_name = os.path.join('some_path', 'eval_a_name') self.assertEqual(expected_eval_dir_name, est.eval_dir('a_name')) def test_input_fn_args(self): expected_mode = ModeKeys.EVAL expected_params = {'batch_size': 10} expected_config = run_config.RunConfig().replace(tf_random_seed=4321) input_fn_call_count = [0] def _model_fn(features, labels, mode, params, config): del params, config return model_fn_global_step_incrementer(features, labels, mode) def _input_fn(mode, params, config): input_fn_call_count[0] += 1 self.assertEqual(expected_mode, mode) self.assertEqual(expected_params, params) self.assertEqual(4321, config.tf_random_seed) return dummy_input_fn() est = estimator.EstimatorV2( model_fn=_model_fn, params=expected_params, config=expected_config) est.train(dummy_input_fn, steps=1) self.assertEqual(0, input_fn_call_count[0]) est.evaluate(_input_fn, steps=1) self.assertEqual(1, input_fn_call_count[0]) def test_model_fn_must_return_estimator_spec(self): def _model_fn(features, labels, mode): _, _ = features, labels if mode == ModeKeys.EVAL: return 'NotGoodNotGood' return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(1.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) with self.assertRaisesRegexp(ValueError, 'model_fn should return an EstimatorSpec'): est.evaluate(dummy_input_fn, steps=1) def test_no_checkpoint_uses_init(self): def _model_fn(features, labels, mode, params): del features, labels, params mean = tf_keras_v1.metrics.Mean() mean.update_state(tf.Variable(2.) + 1) return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(1.), eval_metric_ops={ 'mean1': mean, 'mean2': tf.metrics.mean(tf.compat.v1.Variable(2.) + 1) }) est = estimator.EstimatorV2(model_fn=_model_fn) scores = est.evaluate(dummy_input_fn, steps=1) # Metric value here is set to 1 + the value of the Variable that is newly # initialized (since there is no checkpoint). self.assertEqual(3., scores['mean1']) self.assertEqual(3., scores['mean2']) @test_util.run_v1_only('b/119219961') def test_no_checkpoint_uses_init_with_warm_starting(self): def _make_model_fn(x): def _variable_creating_and_export_model_fn(features, labels, mode): _, _ = features, labels x_var = tf.get_variable('x', initializer=x) global_step = tf.train.get_global_step() mean = tf_keras_v1.metrics.Mean() mean.update_state(x_var + 1) return model_fn_lib.EstimatorSpec( mode, predictions={'y': tf.constant(1.0)}, loss=tf.constant(1.), eval_metric_ops={ 'mean1': mean, 'mean2': tf.metrics.mean(x_var + 1) }, train_op=tf.assign_add(global_step, 1), export_outputs={ 'test': export_lib.ClassificationOutput( tf.constant([4.2]), tf.constant(['label'])) }) return _variable_creating_and_export_model_fn first_est = estimator.EstimatorV2(model_fn=_make_model_fn(42.)) first_est.train(dummy_input_fn, steps=10) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) tmpdir = tempfile.mkdtemp() export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) exported_path = first_est.export_saved_model(export_dir_base, serving_input_receiver_fn) # Test that we can pass either warm_start_from as an external checkpoint # or an exported SavedModel. est = estimator.EstimatorV2( model_fn=_make_model_fn(52.), warm_start_from=exported_path) eval_metrics = est.evaluate(dummy_input_fn, steps=1) # Metric value here is set to 1 + the value of the Variable that is # warm-started from the SavedModel of the first model (42.), as opposed to # the initialization in the new model_fn (52.). self.assertEqual(43., eval_metrics['mean1']) self.assertEqual(43., eval_metrics['mean2']) est = estimator.EstimatorV2( model_fn=_make_model_fn(62.), warm_start_from=first_est.model_dir) eval_metrics = est.evaluate(dummy_input_fn, steps=1) # Metric value here is set to 1 + the value of the Variable that is # warm-started from a checkpoint of the first model (42.), as opposed to # the initialization in the new model_fn (52.). self.assertEqual(43., eval_metrics['mean1']) self.assertEqual(43., eval_metrics['mean2']) def test_scores(self): est = estimator.EstimatorV2( model_fn=_model_fn_with_eval_metric_ops, params={ 'metric_name': 'metric', 'metric_value': 2., 'metric_name_2': 'metric2', 'metric_value_2': 3., }) est.train(dummy_input_fn, steps=5) scores = est.evaluate(dummy_input_fn, steps=1) self.assertIn('metric', scores) self.assertAlmostEqual(2., scores['metric']) self.assertIn('metric2', scores) self.assertAlmostEqual(3., scores['metric2']) def test_tuple_metrics(self): def _model_fn(features, labels, mode): del features # unused del labels return model_fn_lib.EstimatorSpec( mode, train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), loss=tf.constant(1.), eval_metric_ops={ 'nested_metric': ( ((tf.constant(2.), tf.constant(1)), tf.constant(3., dtype=tf.dtypes.float64)), tf.no_op()) }) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) evaluation = est.evaluate(dummy_input_fn, steps=1) ((two_float, one_integer), three_double) = evaluation['nested_metric'] self.assertAlmostEqual(2., two_float) self.assertEqual(1, one_integer) self.assertAlmostEqual(3., three_double) def test_steps0_raises_error(self): est = estimator.EstimatorV2(model_fn=_model_fn_with_eval_metric_ops) est.train(dummy_input_fn, steps=5) with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'): est.evaluate(dummy_input_fn, steps=0) def test_steps_negative_raises_error(self): est = estimator.EstimatorV2(model_fn=_model_fn_with_eval_metric_ops) est.train(dummy_input_fn, steps=5) with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'): est.evaluate(dummy_input_fn, steps=-1) def test_global_step_metric_raises_error(self): est = estimator.EstimatorV2( model_fn=_model_fn_with_eval_metric_ops, params={ 'metric_name': 'global_step', 'metric_value': 2. }) est.train(dummy_input_fn, steps=5) with self.assertRaisesRegexp( ValueError, 'Metric with name `global_step` is not allowed'): est.evaluate(dummy_input_fn, steps=1) def test_global_step_is_reported(self): est = estimator.EstimatorV2( model_fn=_model_fn_with_eval_metric_ops, params={ 'metric_name': 'metric', 'metric_value': 2., 'metric_name_2': 'metric2', 'metric_value_2': 3., }) est.train(dummy_input_fn, steps=5) scores = est.evaluate(dummy_input_fn, steps=1) self.assertIn('global_step', scores) self.assertEqual(5, scores['global_step']) def test_loss_metric_is_reported(self): def _model_fn_with_incremental_loss(features, labels, mode): _, _ = features, labels local_weight = tf.Variable( 0., name='local_weight', collections=[tf.GraphKeys.LOCAL_VARIABLES]) # Loss will be 2, 4, 6, ... loss = 2 * tf.assign_add(local_weight, 1.) return model_fn_lib.EstimatorSpec( mode, loss=loss, train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) est = estimator.EstimatorV2(model_fn=_model_fn_with_incremental_loss) est.train(dummy_input_fn, steps=1) scores = est.evaluate(dummy_input_fn, steps=5) self.assertIn(model_fn_lib.LOSS_METRIC_KEY, scores) # Average loss will be (2 + 4 + 6 + 8 + 10)/5=6 self.assertAlmostEqual(6., scores[model_fn_lib.LOSS_METRIC_KEY]) def test_hooks_should_be_session_run_hook(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) est.train(dummy_input_fn, steps=1) with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'): est.evaluate(dummy_input_fn, steps=5, hooks=['NotAHook']) def test_hooks_are_used(self): step_counter_hook = _StepCounterHook() est = estimator.EstimatorV2(model_fn=_model_fn_with_eval_metric_ops) est.train(dummy_input_fn, steps=1) est.evaluate(dummy_input_fn, steps=5, hooks=[step_counter_hook]) self.assertEqual(5, step_counter_hook.steps) def test_evaluate_from_checkpoint(self): params = { 'metric_name': 'metric', 'metric_value': 2., 'metric_name_2': 'metric2', 'metric_value_2': 3., } est1 = estimator.EstimatorV2( model_fn=_model_fn_with_eval_metric_ops, params=params) est1.train(dummy_input_fn, steps=5) est2 = estimator.EstimatorV2( model_fn=_model_fn_with_eval_metric_ops, params=params) scores = est2.evaluate( dummy_input_fn, steps=1, checkpoint_path=est1.latest_checkpoint()) self.assertEqual(5, scores['global_step']) @test_util.run_v1_only('VariableV1 is only exported in v1') def test_wrong_shape_throws_reasonable_error(self): """Make sure we are helpful when model_fns change. See b/110263146.""" def _get_model_fn(val=1): def _model_fn(features, labels, mode): del features, labels # unused tf.Variable(val, name='weight') return model_fn_lib.EstimatorSpec( mode=mode, predictions=tf.constant([[1.]]), loss=tf.constant(0.), train_op=tf.assign_add(tf.train.get_global_step(), 1)) return _model_fn model_fn_1 = _get_model_fn() model_fn_2 = _get_model_fn(val=[1]) est1 = estimator.EstimatorV2(model_fn=model_fn_1) est1.train(dummy_input_fn, steps=5) est2 = estimator.EstimatorV2(model_fn=model_fn_2, model_dir=est1.model_dir) expected_msg = 'Restoring from checkpoint failed.*a mismatch between' with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, expected_msg): est2.train( dummy_input_fn, steps=1, ) def test_scaffold_is_used(self): def _model_fn_scaffold(features, labels, mode): _, _ = features, labels tf.Variable(1., name='weight') self.mock_saver = get_mock_saver() return model_fn_lib.EstimatorSpec( mode=mode, predictions=tf.constant([[1.]]), loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), scaffold=tf.train.Scaffold(saver=self.mock_saver)) est = estimator.EstimatorV2(model_fn=_model_fn_scaffold) est.train(dummy_input_fn, steps=1) est.evaluate(dummy_input_fn, steps=1) self.assertTrue(self.mock_saver.restore.called) def test_features_labels_mode(self): given_features = {'test-features': [[1], [1]]} given_labels = {'test-labels': [[1], [1]]} def _input_fn(): return given_features, given_labels def _model_fn(features, labels, mode): self.features, self.labels, self.mode = features, labels, mode return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[0.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(_input_fn, steps=1) est.evaluate(_input_fn, steps=1) self.assertEqual(given_features, self.features) self.assertEqual(given_labels, self.labels) self.assertEqual(ModeKeys.EVAL, self.mode) def test_graph_initialization_global_step_and_random_seed(self): expected_random_seed = run_config.RunConfig().tf_random_seed def _model_fn(features, labels, mode): _, _, _ = features, labels, mode self.assertIsNotNone(tf.train.get_global_step()) self.assertEqual(expected_random_seed, tf.get_default_graph().seed) return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[0.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) est.evaluate(dummy_input_fn, steps=1) def test_evaluation_hooks_are_used(self): hook = tf.test.mock.MagicMock( wraps=tf.train.SessionRunHook(), spec=tf.train.SessionRunHook) def _model_fn_hooks(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), evaluation_hooks=[hook]) est = estimator.EstimatorV2(model_fn=_model_fn_hooks) est.train(dummy_input_fn, steps=1) self.assertFalse(hook.begin.called) est.evaluate(dummy_input_fn, steps=1) self.assertTrue(hook.begin.called) def test_summary_writing_with_summary_proto(self): def model_fn_global_step_incrementer_image(features, labels, mode): _, _ = features, labels global_step = tf.train.get_global_step() image = tf.zeros([5, 3, 3, 1]) eval_metric_ops = { 'foo': (tf.summary.image('image', image, max_outputs=3), tf.constant(1)) } return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(1.), train_op=tf.assign_add(global_step, 1), eval_metric_ops=eval_metric_ops) est = estimator.EstimatorV2( model_fn=model_fn_global_step_incrementer_image, config=run_config.RunConfig(save_summary_steps=1)) est.train(dummy_input_fn, steps=200) est.evaluate( input_fn=dummy_input_fn, steps=200, ) # Make sure nothing is stuck in limbo. tf.summary.FileWriterCache.clear() # Get last evaluation Event written. for key in ['foo/0', 'foo/1', 'foo/2']: self.assertTrue( check_eventfile_for_keyword(key, est.eval_dir()), '{} should be part of reported summaries.'.format(key)) # Verify that evaluated checkpoint path is written to event file. checkpoint_path_tag = 'checkpoint_path' self.assertTrue( check_eventfile_for_keyword(checkpoint_path_tag, est.eval_dir()), '{} should be part of reported summaries.'.format(checkpoint_path_tag)) expected_tensor_proto = tf.make_tensor_proto( est.latest_checkpoint(), dtype=tf.dtypes.string) summaries = summaries_with_matching_keyword(checkpoint_path_tag, est.eval_dir()) self.assertProtoEquals(expected_tensor_proto, next(summaries).value[0].tensor) def test_summary_writing_with_tensor(self): def model_fn_with_prediction_mean_tensor_eval_metric_ops( features, labels, mode, params): _, _ = features, labels global_step = tf.train.get_global_step() metric_name = params.get('metric_name') or 'metric' predictions = tf.constant([1., .5, 0.]) eval_metric_ops = {metric_name: tf.metrics.mean_tensor(predictions)} return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(1.), predictions={'predictions': predictions}, train_op=tf.assign_add(global_step, 1), eval_metric_ops=eval_metric_ops) metric_key = 'PMT' params = { 'metric_name': metric_key, } est = estimator.EstimatorV2( model_fn=model_fn_with_prediction_mean_tensor_eval_metric_ops, params=params, config=run_config.RunConfig(save_summary_steps=1)) est.train(input_fn=dummy_input_fn, steps=10) est.evaluate( input_fn=dummy_input_fn, steps=10, ) tf.summary.FileWriterCache.clear() self.assertTrue( check_eventfile_for_keyword(metric_key, est.eval_dir()), '{} should be part of reported summaries.'.format(metric_key)) summaries = summaries_with_matching_keyword(metric_key, est.eval_dir()) for value in next(summaries).value: if value.tag == metric_key: self.assertTrue(value.HasField('tensor')) class EstimatorPredictTest(tf.test.TestCase): def test_input_fn_args(self): expected_mode = ModeKeys.PREDICT expected_params = {'batch_size': 10} expected_config = run_config.RunConfig().replace(tf_random_seed=4321) input_fn_call_count = [0] def _model_fn(features, labels, mode, params, config): del features, labels, params, config return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.]])) def _input_fn(mode, params, config): input_fn_call_count[0] += 1 self.assertEqual(expected_mode, mode) self.assertEqual(expected_params, params) self.assertEqual(4321, config.tf_random_seed) return dummy_input_fn() est = estimator.EstimatorV2( model_fn=_model_fn, params=expected_params, config=expected_config) est.train(dummy_input_fn, steps=1) self.assertEqual(0, input_fn_call_count[0]) next(est.predict(_input_fn)) self.assertEqual(1, input_fn_call_count[0]) def test_no_checkpoint_uses_init(self): def _model_fn(features, labels, mode, params, config): del features, labels, params, config x = tf.Variable([[3.]], name='x') return model_fn_lib.EstimatorSpec(mode, predictions=tf.math.add(x, 1.)) est = estimator.EstimatorV2(model_fn=_model_fn) # Expected prediction value is 1 + the value of the Variable that is newly # initialized (since there is no checkpoint). self.assertEqual(4., next(est.predict(dummy_input_fn))) @test_util.run_v1_only('b/119219961') def test_no_checkpoint_uses_init_with_warm_starting(self): def _make_model_fn(x): def _variable_creating_and_export_model_fn(features, labels, mode): _, _ = features, labels x_var = tf.Variable([[x]], name='x') return model_fn_lib.EstimatorSpec( mode, predictions=tf.math.add(x_var, 1.), loss=tf.constant(1.), train_op=tf.assign_add(tf.train.get_global_step(), 1), export_outputs={ 'test': export_lib.ClassificationOutput( tf.constant([4.2]), tf.constant(['label'])) }) return _variable_creating_and_export_model_fn first_est = estimator.EstimatorV2(model_fn=_make_model_fn(3.)) first_est.train(dummy_input_fn, steps=10) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) tmpdir = tempfile.mkdtemp() export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) exported_path = first_est.export_saved_model(export_dir_base, serving_input_receiver_fn) # Test that we can pass either warm_start_from as an external checkpoint # or an exported SavedModel. est = estimator.EstimatorV2( model_fn=_make_model_fn(30.), warm_start_from=exported_path) # Prediction here is set to 1 + the value of the Variable that is # warm-started from the SavedModel of the first model (3.), as opposed to # the initialization in the new model_fn (30.). self.assertEqual(4., next(est.predict(dummy_input_fn))) est = estimator.EstimatorV2( model_fn=_make_model_fn(40.), warm_start_from=first_est.model_dir) # Prediction here is set to 1 + the value of the Variable that is # warm-started from a checkpoint of the first model (3.), as opposed to # the initialization in the new model_fn (40.). self.assertEqual(4., next(est.predict(dummy_input_fn))) def test_no_trained_model_invalid_checkpoint_path(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) with self.assertRaises(ValueError): next( est.predict( dummy_input_fn, checkpoint_path=tf.train.latest_checkpoint('fakedir'))) def test_tensor_predictions(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) self.assertEqual(10., next(est.predict(dummy_input_fn))) def test_predictionhooks_are_used(self): hook = tf.test.mock.MagicMock( wraps=tf.train.SessionRunHook(), spec=tf.train.SessionRunHook) def _model_fn_hooks(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.]]), prediction_hooks=[hook]) est = estimator.EstimatorV2(model_fn=_model_fn_hooks) est.train(dummy_input_fn, steps=1) self.assertFalse(hook.begin.called) next(est.predict(dummy_input_fn)) self.assertTrue(hook.begin.called) def test_warn_if_no_queue_runner(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) with tf.test.mock.patch.object(tf.logging, 'warning') as mock_log: next(est.predict(dummy_input_fn)) self.assertRegexpMatches( str(mock_log.call_args), 'Input graph does not.*contain a QueueRunner.') def test_skip_warn_if_dataset_returns_features(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.]])) def _input_fn(): dataset = tf.data.Dataset.from_tensors([1]) iterator = tf.data.make_one_shot_iterator(dataset) return iterator.get_next() est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) with tf.test.mock.patch.object(tf.logging, 'warning') as mock_log: next(est.predict(_input_fn)) # The warning should not have keyword QueueRunner. self.assertRegexpMatches(str(mock_log.call_args), '^((?!QueueRunner).)*$') def test_skip_warn_if_dataset_returns_features_dict(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.]])) def _input_fn(): dataset = tf.data.Dataset.from_tensors([1]) iterator = tf.data.make_one_shot_iterator(dataset) features = {'age': iterator.get_next()} return features est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) with tf.test.mock.patch.object(tf.logging, 'warning') as mock_log: next(est.predict(_input_fn)) # The warning should not have keyword QueueRunner. self.assertRegexpMatches(str(mock_log.call_args), '^((?!QueueRunner).)*$') def test_input_fn_can_return_just_features(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) def _only_features(): return {'x': tf.constant([[0.]])} self.assertEqual([10.], next(est.predict(_only_features))) def test_batch_size_mismatch(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions={ 'y1': tf.constant([[10.]]), 'y2': tf.constant([[12.], [13]]) }) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) with self.assertRaisesRegexp(ValueError, 'Batch length of predictions should be same'): next(est.predict(dummy_input_fn)) def test_iterate_batches(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions={ # First dim is different but the prediction should still work 'y1': tf.zeros(shape=[3]), 'y2': tf.zeros(shape=[5, 3]) }) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) predictions = next(est.predict(dummy_input_fn, yield_single_examples=False)) self.assertAllEqual(predictions['y1'].shape, [3]) self.assertAllEqual(predictions['y2'].shape, [5, 3]) def test_predict_keys_defined_for_tensor(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) with self.assertRaisesRegexp( ValueError, 'predict_keys argument is not valid in case of non-dict predictions'): next(est.predict(dummy_input_fn, predict_keys=['y'])) def test_predict_keys_does_not_exists(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions={ 'y1': tf.constant([[10.]]), 'y2': tf.constant([[12.]]) }) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) with self.assertRaisesRegexp(ValueError, 'Expected to run at least one output from'): next(est.predict(dummy_input_fn, predict_keys=['y3'])) def test_return_given_predict_keys(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions={ 'y1': tf.constant([[10.]]), 'y2': tf.constant([[12.]]) }) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) results = next(est.predict(dummy_input_fn, predict_keys=['y1'])) self.assertIn('y1', results) self.assertNotIn('y2', results) def test_yield_rows_of_tensor(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.], [12.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) results = est.predict(dummy_input_fn) self.assertEqual([10.], next(results)) self.assertEqual([12.], next(results)) def test_yield_rows_of_dict(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions={ 'y1': tf.constant([[10.], [12]]), 'y2': tf.constant([[0.], [2.]]) }) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) results = est.predict(dummy_input_fn) self.assertDictEqual({'y1': [10.], 'y2': [0.]}, next(results)) self.assertDictEqual({'y1': [12.], 'y2': [2.]}, next(results)) def test_hooks_should_be_session_run_hook(self): est = estimator.EstimatorV2(model_fn=model_fn_global_step_incrementer) est.train(dummy_input_fn, steps=1) with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'): next(est.predict(dummy_input_fn, hooks=['NotAHook'])) def test_hooks_are_used(self): def _model_fn(features, labels, mode): _, _ = features, labels return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[10.], [12.]])) step_counter_hook = _StepCounterHook() est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) results = est.predict(dummy_input_fn, hooks=[step_counter_hook]) self.assertEqual(0, step_counter_hook.steps) # not called yet next(results) self.assertEqual(1, step_counter_hook.steps) # first call next(results) self.assertEqual(1, step_counter_hook.steps) # it's in same batch next(results) self.assertEqual(2, step_counter_hook.steps) # next batch def test_predict_from_old_model_dir(self): def _model_fn(features, labels, mode): _, _ = features, labels v = tf.Variable([[16.]], name='weight') prediction = v * 2 return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=prediction) est1 = estimator.EstimatorV2(model_fn=_model_fn) est1.train(dummy_input_fn, steps=1) est2 = estimator.EstimatorV2(model_fn=_model_fn, model_dir=est1.model_dir) self.assertEqual([32.], next(est2.predict(dummy_input_fn))) def test_predict_from_checkpoint_path(self): def _model_fn(features, labels, mode): _, _ = features, labels v = tf.Variable([[16.]], name='weight') prediction = v * 2 return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=prediction) est1 = estimator.EstimatorV2(model_fn=_model_fn) est1.train(dummy_input_fn, steps=1) est2 = estimator.EstimatorV2(model_fn=_model_fn, model_dir=est1.model_dir) self.assertEqual([32.], next( est2.predict( dummy_input_fn, checkpoint_path=est2.latest_checkpoint()))) def test_scaffold_is_used(self): def _model_fn_scaffold(features, labels, mode): _, _ = features, labels tf.Variable(1., name='weight') self.mock_saver = get_mock_saver() return model_fn_lib.EstimatorSpec( mode=mode, predictions=tf.constant([[1.]]), loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), scaffold=tf.train.Scaffold(saver=self.mock_saver)) est = estimator.EstimatorV2(model_fn=_model_fn_scaffold) est.train(dummy_input_fn, steps=1) next(est.predict(dummy_input_fn)) self.assertTrue(self.mock_saver.restore.called) def test_features_labels_mode(self): given_features = {'test-features': [[1], [1]]} given_labels = {'test-labels': [[1], [1]]} def _input_fn(): return given_features, given_labels def _model_fn(features, labels, mode): self.features, self.labels, self.mode = features, labels, mode return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[0.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(_input_fn, steps=1) next(est.predict(_input_fn)) self.assertEqual(given_features, self.features) self.assertIsNone(self.labels) self.assertEqual(ModeKeys.PREDICT, self.mode) def test_graph_initialization_global_step_and_random_seed(self): expected_random_seed = run_config.RunConfig().tf_random_seed def _model_fn(features, labels, mode): _, _, _ = features, labels, mode self.assertIsNotNone(tf.train.get_global_step()) self.assertEqual(expected_random_seed, tf.get_default_graph().seed) return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[0.]])) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) next(est.predict(dummy_input_fn)) def _model_fn_for_export_tests(features, labels, mode): _, _ = features, labels tf.Variable(1., name='weight') scores = tf.constant([3.]) classes = tf.constant(['wumpus']) update_global_step = tf.assign_add(tf.train.get_global_step(), 1) with tf.control_dependencies([update_global_step]): train_op = tf.constant(2.) return model_fn_lib.EstimatorSpec( mode, predictions=tf.constant(10.), loss=tf.constant(1.), train_op=train_op, export_outputs={'test': export_lib.ClassificationOutput(scores, classes)}) def _x_y_input_fn(): return ({ 'x': tf.constant([[1], [1]], name='feature_x'), 'y': tf.constant([[2], [2]], name='feature_y') }, tf.constant([[1], [1]], name='truth')) def _model_fn_with_x_y(features, labels, mode): _ = labels tf.Variable(1., name='weight') scores = tf.constant([3.]) classes = tf.constant(['wumpus']) if mode == ModeKeys.PREDICT: tf.Variable(36., name='name_collision') return model_fn_lib.EstimatorSpec( mode, predictions=tf.constant(10.), export_outputs={ 'test': export_lib.ClassificationOutput(scores, classes) }) else: prefix = 'eval_' if mode == ModeKeys.EVAL else '' multiplied = tf.math.multiply( features['x'], features['y'], name='{}multiplied'.format(prefix)) mean = tf_keras_v1.metrics.Mean(name='{}mean'.format(prefix)) mean.update_state(features['x'] - features['y']) eval_metrics = { 'mean1': mean, 'mean2': tf.metrics.mean( features['x'] - features['y'], name='{}mean'.format(prefix)) } tf.Variable(1., name='later_var') tf.Variable(3., name='name_collision') return model_fn_lib.EstimatorSpec( mode, predictions=multiplied, loss=tf.constant(1.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), eval_metric_ops=eval_metrics) def _model_fn_with_saveables_for_export_tests(features, labels, mode): _, _ = features, labels table = saver_test_utils.CheckpointedOp(name='v2') update_global_step = tf.assign_add(tf.train.get_global_step(), 1) with tf.control_dependencies([update_global_step]): train_op = table.insert('k1', 30.0) prediction = table.lookup('k1', 0.0) return model_fn_lib.EstimatorSpec( mode, predictions=prediction, loss=tf.constant(1.), train_op=train_op, export_outputs={ 'test': export_lib.PredictOutput({'prediction': prediction}) }) def _get_serving_input_receiver_fn(): feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } return export_lib.build_parsing_serving_input_receiver_fn(feature_spec) def _get_supervised_input_receiver_fn(): return export_lib.build_supervised_input_receiver_fn_from_input_fn( _x_y_input_fn) _VOCAB_FILE_CONTENT = 'emerson\nlake\npalmer\n' _EXTRA_FILE_CONTENT = 'kermit\npiggy\nralph\n' @test_util.run_v1_only('b/119219961') class EstimatorExportTest(tf.test.TestCase): def test_export_saved_model_proto_roundtrip_raw_receiver(self): tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=_model_fn_for_export_tests) est.train(input_fn=dummy_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) serving_input_receiver_fn = _get_serving_input_receiver_fn() export_dir = est.export_saved_model(export_dir_base, serving_input_receiver_fn) # Check that all the files are in the right places. self.assertTrue(tf.gfile.Exists(export_dir_base)) self._validate_exported_files(export_dir) # Restore, to validate that the export was well-formed. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('input_example_tensor' in graph_ops) self.assertTrue('ParseExample/ParseExampleV2' in graph_ops) self.assertTrue('weight' in graph_ops) def test_export_saved_model_train(self): self._test_export_saved_model_for_mode(_get_supervised_input_receiver_fn(), ModeKeys.TRAIN) def test_export_saved_model_eval(self): self._test_export_saved_model_for_mode(_get_supervised_input_receiver_fn(), ModeKeys.EVAL) def test_export_saved_model_predict(self): self._test_export_saved_model_for_mode(_get_serving_input_receiver_fn(), ModeKeys.PREDICT) def _test_export_saved_model_for_mode(self, input_receiver_fn, mode): tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=_model_fn_for_export_tests) est.train(input_fn=_x_y_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model( export_dir_base, input_receiver_fn, experimental_mode=mode) # Check that all the files are in the right places. self.assertTrue(tf.gfile.Exists(export_dir_base)) self._validate_exported_files(export_dir) # Restore, to validate that the export was well-formed. tag_set = export_lib.EXPORT_TAG_MAP[mode] with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, tag_set, export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertFalse('name_collision_1' in graph_ops) self.assertTrue('weight' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_all_saved_models_proto_roundtrip_receiver_map(self): input_receiver_fn_map = {ModeKeys.PREDICT: _get_serving_input_receiver_fn()} export_dir, tmpdir = self._test_export_all_saved_models( input_receiver_fn_map) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('input_example_tensor' in graph_ops) self.assertTrue('ParseExample/ParseExampleV2' in graph_ops) self.assertFalse('feature_x' in graph_ops) self.assertTrue('weight' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_all_saved_models_proto_roundtrip_train_only(self): input_receiver_fn_map = { ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), } export_dir, tmpdir = self._test_export_all_saved_models( input_receiver_fn_map) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.TRAINING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('multiplied' in graph_ops) self.assertTrue('mean/update_op' in graph_ops) self.assertFalse('eval_multiplied' in graph_ops) self.assertTrue('feature_x' in graph_ops) self.assertTrue('weight' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_all_saved_models_proto_roundtrip_eval_only(self): input_receiver_fn_map = {ModeKeys.EVAL: _get_supervised_input_receiver_fn()} export_dir, tmpdir = self._test_export_all_saved_models( input_receiver_fn_map) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tag_constants.EVAL], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('eval_multiplied' in graph_ops) self.assertTrue('eval_mean/value' in graph_ops) self.assertFalse('multiplied' in graph_ops) self.assertTrue('feature_x' in graph_ops) self.assertTrue('weight' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_all_saved_models_proto_roundtrip_no_serving(self): input_receiver_fn_map = { ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), ModeKeys.EVAL: _get_supervised_input_receiver_fn() } export_dir, tmpdir = self._test_export_all_saved_models( input_receiver_fn_map) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.TRAINING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('multiplied' in graph_ops) self.assertFalse('eval_multiplied' in graph_ops) self.assertTrue('feature_x' in graph_ops) self.assertTrue('weight' in graph_ops) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tag_constants.EVAL], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('eval_multiplied' in graph_ops) self.assertFalse('multiplied' in graph_ops) self.assertTrue('feature_x' in graph_ops) self.assertTrue('feature_y' in graph_ops) self.assertTrue('weight' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_all_saved_models_proto_roundtrip_three_defs(self): input_receiver_fn_map = { ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), ModeKeys.EVAL: _get_supervised_input_receiver_fn(), ModeKeys.PREDICT: _get_serving_input_receiver_fn() } export_dir, tmpdir = self._test_export_all_saved_models( input_receiver_fn_map) # Restore, to validate that the export was well-formed. for tag_set in export_lib.EXPORT_TAG_MAP.values(): with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, tag_set, export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('global_step/Assign' in graph_ops) self.assertTrue('global_step/Initializer/zeros' in graph_ops) self.assertTrue('weight' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_all_saved_models_proto_roundtrip_all_vars(self): input_receiver_fn_map = { ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), ModeKeys.PREDICT: _get_serving_input_receiver_fn() } export_dir, tmpdir = self._test_export_all_saved_models( input_receiver_fn_map) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.TRAINING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('later_var' in graph_ops) self.assertTrue('weight' in graph_ops) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertFalse('later_var' in graph_ops) self.assertTrue('weight' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_all_saved_models_name_collision(self): input_receiver_fn_map = { ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), ModeKeys.PREDICT: _get_serving_input_receiver_fn() } export_dir, tmpdir = self._test_export_all_saved_models( input_receiver_fn_map) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.TRAINING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('name_collision' in graph_ops) self.assertFalse('name_collision_1' in graph_ops) collection_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self.assertEqual(3, collection_vars[-1].eval()) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('name_collision' in graph_ops) self.assertFalse('name_collision_1' in graph_ops) collection_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # This is a non-obvious detail: when we load the estimator spec # for predict, name_collision gets set to 36. However, we then restore # from checkpoint, which should overwrite that var and make it the 3 # from training. In practice, this would not be a good way to write # a model_fn, but leaving this check in for now to ensure consistency # with what would happen given our current order of spec, then # checkpoint. self.assertEqual(3, collection_vars[-1].eval()) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def _test_export_all_saved_models(self, input_receiver_fn_map): tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=_model_fn_with_x_y) est.train(input_fn=_x_y_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.experimental_export_all_saved_models( export_dir_base, input_receiver_fn_map) # Check that all the files are in the right places. self.assertTrue(tf.gfile.Exists(export_dir_base)) self._validate_exported_files(export_dir) return export_dir, tmpdir def _validate_exported_files(self, export_dir): self.assertTrue(tf.gfile.Exists(export_dir)) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('saved_model.pb')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables/variables.index')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables/variables.data-00000-of-00001')))) def test_export_all_saved_models_var_not_found(self): input_receiver_fn_map = { ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), ModeKeys.EVAL: _get_supervised_input_receiver_fn(), ModeKeys.PREDICT: _get_serving_input_receiver_fn() } def _model_fn_with_predict_only_vars(features, labels, mode): _, _ = features, labels if mode == ModeKeys.PREDICT: tf.Variable(1., name='only_in_predict') else: tf.Variable(1., name='otherwise') prediction = tf.constant(1.) return model_fn_lib.EstimatorSpec( mode, predictions=prediction, loss=tf.constant(1.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), export_outputs={ 'test': export_lib.PredictOutput({'prediction': prediction}) }) tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=_model_fn_with_predict_only_vars) est.train(input_fn=_x_y_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) err_regex = r'Could not load all requested variables[\w\W]*infer' with self.assertRaisesRegexp(ValueError, err_regex): est.experimental_export_all_saved_models(export_dir_base, input_receiver_fn_map) def test_export_all_saved_models_metric_operation(self): """Ensures metrics ops.Operations can be expoerted (b/109740581).""" def _model_fn(features, labels, mode): del features, labels # Unused metric_obj = tf_keras_v1.metrics.Mean() metric_obj.update_state(tf.constant([0])) eval_metrics = { 'metrics1': (tf.constant([0]), tf.no_op()), 'metrics2': metric_obj, } return model_fn_lib.EstimatorSpec( mode, predictions=tf.constant(10.), loss=tf.constant(1.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), eval_metric_ops=eval_metrics) tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=_model_fn) est.train(input_fn=dummy_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('metric_operation_export')) input_receiver_fn_map = {ModeKeys.EVAL: _get_supervised_input_receiver_fn()} export_dir = est.experimental_export_all_saved_models( export_dir_base, input_receiver_fn_map) # Restore, to validate that the export was well-formed. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: meta_graph = tf.saved_model.load(sess, [tag_constants.EVAL], export_dir) sig_outputs = meta_graph.signature_def[ModeKeys.EVAL].outputs self.assertTrue(sig_outputs['metrics1/update_op'].name.startswith( 'metric_op_wrapper')) self.assertTrue(sig_outputs['metrics2/update_op'].name.startswith( 'metric_op_wrapper')) def test_export_saved_model_with_saveables_proto_roundtrip(self): tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2( model_fn=_model_fn_with_saveables_for_export_tests) est.train(input_fn=dummy_input_fn, steps=1) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, serving_input_receiver_fn) # Check that all the files are in the right places. self.assertTrue(tf.gfile.Exists(export_dir_base)) self.assertTrue(tf.gfile.Exists(export_dir)) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('saved_model.pb')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables/variables.index')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables/variables.data-00000-of-00001')))) # Restore, to validate that the export was well-formed. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('input_example_tensor' in graph_ops) self.assertTrue('ParseExample/ParseExampleV2' in graph_ops) # The original saver is used to restore variables self.assertTrue('save/LookupTableImportV2' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_saved_model_assets(self): tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=_model_fn_for_export_tests) est.train(input_fn=dummy_input_fn, steps=1) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) # Create a fake asset. vocab_file_name = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('my_vocab_file')) vocab_file = tf.io.gfile.GFile(vocab_file_name, mode='w') vocab_file.write(_VOCAB_FILE_CONTENT) vocab_file.close() # hack in an op that uses the asset, in order to test asset export. # this is not actually valid, of course. def serving_input_receiver_with_asset_fn(): features, receiver_tensor, _ = serving_input_receiver_fn() filename = ops.convert_to_tensor( vocab_file_name, tf.dtypes.string, name='asset_filepath') tf.add_to_collection(tf.compat.v1.GraphKeys.ASSET_FILEPATHS, filename) features['bogus_filename'] = filename return export_lib.ServingInputReceiver(features, receiver_tensor) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, serving_input_receiver_with_asset_fn) # Check that the asset files are in the right places. expected_vocab_file_name = os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('assets/my_vocab_file')) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('assets')))) self.assertTrue(tf.gfile.Exists(expected_vocab_file_name)) self.assertEqual( tf.compat.as_bytes(_VOCAB_FILE_CONTENT), tf.compat.as_bytes(tf.io.gfile.GFile(expected_vocab_file_name).read())) # Restore, to validate that the export was well-formed. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) assets = [ x.eval() for x in graph.get_collection(tf.GraphKeys.ASSET_FILEPATHS) ] self.assertItemsEqual([vocab_file_name], assets) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('input_example_tensor' in graph_ops) self.assertTrue('ParseExample/ParseExampleV2' in graph_ops) self.assertTrue('asset_filepath' in graph_ops) self.assertTrue('weight' in graph_ops) # cleanup tf.gfile.DeleteRecursively(tmpdir) def test_export_saved_model_extra_assets(self): tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=_model_fn_for_export_tests) est.train(input_fn=dummy_input_fn, steps=1) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) # Create a fake asset. extra_file_name = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('my_extra_file')) extra_file = tf.io.gfile.GFile(extra_file_name, mode='w') extra_file.write(_EXTRA_FILE_CONTENT) extra_file.close() # Perform the export. assets_extra = {'some/sub/directory/my_extra_file': extra_file_name} export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model( export_dir_base, serving_input_receiver_fn, assets_extra=assets_extra) # Check that the asset files are in the right places. expected_extra_path = os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('assets.extra/some/sub/directory/my_extra_file')) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('assets.extra')))) self.assertTrue(tf.gfile.Exists(expected_extra_path)) self.assertEqual( tf.compat.as_bytes(_EXTRA_FILE_CONTENT), tf.compat.as_bytes(tf.io.gfile.GFile(expected_extra_path).read())) # cleanup tf.gfile.DeleteRecursively(tmpdir) def test_export_saved_model_tensor_features(self): """Test that models accepting a single raw Tensor can be exported. See https://github.com/tensorflow/tensorflow/issues/11674 If the model_fn and receiver_fn accept raw tensors rather than dictionaries as input, export_saved_model should be okay with that, too. """ tmpdir = tempfile.mkdtemp() def _input_fn_tensor_features(): t = tf.constant([1, 2, 3], dtype=tf.dtypes.float32, shape=[1, 3]) return (t, None) def _model_fn_tensor_features(features, labels, mode): _ = labels prediction = tf.linalg.matmul(features, features, transpose_b=True) return model_fn_lib.EstimatorSpec( mode, predictions=prediction, loss=tf.constant(1.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), export_outputs={ 'test': export_lib.PredictOutput({'prediction': prediction}) }) def _serving_input_receiver_fn(): feat = tf.placeholder(dtype=tf.dtypes.float32) return export_lib.TensorServingInputReceiver( features=feat, receiver_tensors=feat) est = estimator.EstimatorV2(model_fn=_model_fn_tensor_features) est.train(input_fn=_input_fn_tensor_features, steps=1) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, _serving_input_receiver_fn) # Restore, to validate that the export was well-formed. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) graph_ops = [x.name.lower() for x in graph.get_operations()] self.assertTrue('const' in graph_ops) self.assertTrue('matmul' in graph_ops) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_saved_model_int_feature_keys(self): """Test that the `features` dict can contain int keys.""" tmpdir = tempfile.mkdtemp() def _input_fn_with_int_keys(): features = { 'string_key': tf.constant([1], dtype=tf.dtypes.float32), 42: tf.constant([43], dtype=tf.dtypes.float32), } return (features, None) def _model_fn_with_int_keys(features, labels, mode): _ = labels prediction = tf.math.maximum(features['string_key'], features[42]) return model_fn_lib.EstimatorSpec( mode, predictions=prediction, loss=tf.constant(1.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), export_outputs={ 'test': export_lib.PredictOutput({'prediction': prediction}) }) def _serving_input_receiver_fn(): features = { 'string_key': tf.placeholder(dtype=tf.dtypes.float32), 42: tf.placeholder(dtype=tf.dtypes.float32, name='42_placeholder'), } # int is only allowed in the `features` dict, not the `receiver_tensors`. receiver_tensors = { 'string_key': features['string_key'], '42_key': features[42], } return export_lib.ServingInputReceiver( features=features, receiver_tensors=receiver_tensors) est = estimator.EstimatorV2(model_fn=_model_fn_with_int_keys) est.train(input_fn=_input_fn_with_int_keys, steps=1) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, _serving_input_receiver_fn) # Restore, to validate that the export was well-formed. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: meta_graph_def = tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) graph_ops = [x.name.lower() for x in graph.get_operations()] self.assertTrue('maximum' in graph_ops) self.assertTrue('42_placeholder' in graph_ops) self.assertTrue( '42_key' in meta_graph_def.signature_def['serving_default'].inputs) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_scaffold_is_used_for_saver(self): tmpdir = tempfile.mkdtemp() def _model_fn_scaffold(features, labels, mode): _, _ = features, labels tf.Variable(1., name='weight') self.mock_saver = get_mock_saver() scores = tf.constant([3.]) return model_fn_lib.EstimatorSpec( mode=mode, predictions=tf.constant([[1.]]), loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), scaffold=tf.train.Scaffold(saver=self.mock_saver), export_outputs={'test': export_lib.ClassificationOutput(scores)}) est = estimator.EstimatorV2(model_fn=_model_fn_scaffold) est.train(dummy_input_fn, steps=1) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) est.export_saved_model(export_dir_base, serving_input_receiver_fn) self.assertTrue(self.mock_saver.restore.called) self.assertTrue(self.mock_saver.export_meta_graph.called) self.assertTrue(self.mock_saver.save.called) def test_scaffold_is_used_for_saver_multiple_modes(self): tmpdir = tempfile.mkdtemp() savers = {'predict_saver': None, 'train_saver': None} def _model_fn_scaffold(features, labels, mode): _, _ = features, labels tf.Variable(1., name='weight') scores = tf.constant([3.]) if mode == ModeKeys.PREDICT: savers['predict_saver'] = get_mock_saver() scaffold = tf.train.Scaffold(saver=savers['predict_saver']) elif mode == ModeKeys.TRAIN: savers['train_saver'] = get_mock_saver() scaffold = tf.train.Scaffold(saver=savers['train_saver']) else: scaffold = tf.train.Scaffold() return model_fn_lib.EstimatorSpec( mode=mode, predictions=tf.constant([[1.]]), loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), scaffold=scaffold, export_outputs={'test': export_lib.ClassificationOutput(scores)}) est = estimator.EstimatorV2(model_fn=_model_fn_scaffold) est.train(dummy_input_fn, steps=1) input_receiver_fn_map = { ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), ModeKeys.EVAL: _get_supervised_input_receiver_fn(), ModeKeys.PREDICT: _get_serving_input_receiver_fn() } # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) est.experimental_export_all_saved_models(export_dir_base, input_receiver_fn_map) self.assertTrue(savers['train_saver'].restore.called) self.assertEqual(savers['train_saver'].export_meta_graph.call_count, 1) self.assertEqual(savers['train_saver'].save.call_count, 1) self.assertTrue(savers['predict_saver'].restore.called) self.assertEqual(savers['predict_saver'].export_meta_graph.call_count, 1) self.assertEqual(savers['predict_saver'].save.call_count, 0) def test_scaffold_is_used_for_local_init(self): tmpdir = tempfile.mkdtemp() def _model_fn_scaffold(features, labels, mode): _, _ = features, labels my_int = tf.Variable( 1, name='my_int', collections=[tf.GraphKeys.LOCAL_VARIABLES]) _ = training.get_or_create_steps_per_run_variable() scores = tf.constant([3.]) with tf.control_dependencies([ tf.initializers.local_variables(), tf.initializers.tables_initializer() ]): assign_op = tf.assign(my_int, 12345) # local_initSop must be an Operation, not a Tensor. custom_local_init_op = tf.group(assign_op) return model_fn_lib.EstimatorSpec( mode=mode, predictions=tf.constant([[1.]]), loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), scaffold=tf.train.Scaffold(local_init_op=custom_local_init_op), export_outputs={'test': export_lib.ClassificationOutput(scores)}) est = estimator.EstimatorV2(model_fn=_model_fn_scaffold) est.train(dummy_input_fn, steps=1) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, serving_input_receiver_fn) # Restore, to validate that the custom local_init_op runs. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) my_int = graph.get_tensor_by_name('my_int:0') my_int_value = sess.run(my_int) self.assertEqual(12345, my_int_value) def test_scaffold_is_used_for_local_init_multiple_modes(self): tmpdir = tempfile.mkdtemp() def _model_fn_scaffold(features, labels, mode): _, _ = features, labels my_int = tf.Variable( 1, name='my_int', collections=[tf.GraphKeys.LOCAL_VARIABLES]) scores = tf.constant([3.]) with tf.control_dependencies([ tf.initializers.local_variables(), tf.initializers.tables_initializer() ]): assign_op = tf.assign(my_int, 12345) custom_local_init_op = None if mode == ModeKeys.PREDICT: # local_initSop must be an Operation, not a Tensor. custom_local_init_op = tf.group(assign_op) return model_fn_lib.EstimatorSpec( mode=mode, predictions=tf.constant([[1.]]), loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), scaffold=tf.train.Scaffold(local_init_op=custom_local_init_op), export_outputs={'test': export_lib.ClassificationOutput(scores)}) est = estimator.EstimatorV2(model_fn=_model_fn_scaffold) est.train(dummy_input_fn, steps=1) input_receiver_fn_map = { ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), ModeKeys.EVAL: _get_supervised_input_receiver_fn(), ModeKeys.PREDICT: _get_serving_input_receiver_fn() } # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.experimental_export_all_saved_models( export_dir_base, input_receiver_fn_map) # Restore, to validate that the custom local_init_op runs. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) my_int = graph.get_tensor_by_name('my_int:0') my_int_value = sess.run(my_int) self.assertEqual(12345, my_int_value) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: tf.saved_model.load(sess, [tf.saved_model.TRAINING], export_dir) my_int = graph.get_tensor_by_name('my_int:0') my_int_value = sess.run(my_int) self.assertEqual(1, my_int_value) def test_features_labels_mode(self): given_features = {'test-features': tf.constant([[1], [1]])} def serving_input_receiver_fn(): return export_lib.ServingInputReceiver( given_features, tf.placeholder(dtype=tf.dtypes.string)) def _model_fn(features, labels, mode): self.features, self.labels, self.mode = features, labels, mode return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[0.]]), export_outputs={ 'test': export_lib.ClassificationOutput(tf.constant([[0.]])) }) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertEqual(given_features, self.features) self.assertIsNone(self.labels) self.assertEqual(ModeKeys.PREDICT, self.mode) def test_graph_initialization_global_step_and_random_seed(self): expected_random_seed = run_config.RunConfig().tf_random_seed def _model_fn(features, labels, mode): _, _, _ = features, labels, mode self.assertIsNotNone(tf.train.get_global_step()) self.assertEqual(expected_random_seed, tf.get_default_graph().seed) return model_fn_lib.EstimatorSpec( mode=mode, loss=tf.constant(0.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1), predictions=tf.constant([[0.]]), export_outputs={ 'test': export_lib.ClassificationOutput(tf.constant([[0.]])) }) def serving_input_receiver_fn(): return export_lib.ServingInputReceiver( {'test-features': tf.constant([[1], [1]])}, tf.placeholder(dtype=tf.dtypes.string)) est = estimator.EstimatorV2(model_fn=_model_fn) est.train(dummy_input_fn, steps=1) est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) def test_export_saved_model_respects_soft_placement(self): def model_fn_with_a_gpu_op_but_no_kernel(features, labels, mode): _, _ = features, labels table = saver_test_utils.CheckpointedOp(name='v2') update_global_step = tf.assign_add(tf.train.get_global_step(), 1) with tf.control_dependencies([update_global_step]): train_op = table.insert('k1', 30.0) # In this test, there are no GPUs available. The goal is to verify that # export_saved_model executes nevertheless. with tf.device('/gpu:0'): string_op = tf.strings.as_string(update_global_step) with tf.control_dependencies([string_op]): prediction = table.lookup('k1', 0.0) return model_fn_lib.EstimatorSpec( mode, predictions=prediction, loss=tf.constant(1.), train_op=train_op, export_outputs={ 'test': export_lib.PredictOutput({'prediction': prediction}) }) tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=model_fn_with_a_gpu_op_but_no_kernel) est.train(input_fn=dummy_input_fn, steps=1) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, serving_input_receiver_fn) # At this point, if export_saved_model executed with # allow_soft_placement=True, then the GPU-assigned operation was silently # placed on the CPU. Otherwise, an exception would have been raised # related to the fact that the requested GPU device isn't available. # Expectations below assume that export_saved_model has completed normally. self.assertTrue(tf.gfile.Exists(export_dir_base)) self.assertTrue(tf.gfile.Exists(export_dir)) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('saved_model.pb')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables/variables.index')))) self.assertTrue( tf.gfile.Exists( os.path.join( tf.compat.as_bytes(export_dir), tf.compat.as_bytes('variables/variables.data-00000-of-00001')))) tf.gfile.DeleteRecursively(tmpdir) def _validate_strip_default_attrs(self, estimator_cls, export_fn, attributes_stripped): """Validate estimator export correctly strips/leaves default attributes. Args: estimator_cls: `Estimator` or `EstimatorV2` export_fn: a function that takes in an estimator and export arguments, and exports the estimator. attributes_stripped: whether to attributes are expected to be stripped in the MetaGraphDef. """ est = estimator_cls(model_fn=_model_fn_for_export_tests) est.train(input_fn=dummy_input_fn, steps=1) feature_spec = { 'x': tf.io.VarLenFeature(dtype=tf.dtypes.int64), 'y': tf.io.VarLenFeature(dtype=tf.dtypes.int64) } serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) # Perform the export, and obtain the MetaGraphDefs tmpdir = tempfile.mkdtemp() export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('export')) export_dir = export_fn(est, export_dir_base, serving_input_receiver_fn) saved_model_pb = loader_impl.parse_saved_model(export_dir) self.assertIsNotNone(saved_model_pb) meta_graph_def = [ x for x in saved_model_pb.meta_graphs if x.meta_info_def.tags == [tf.saved_model.SERVING] ][0] # "weight" node in graph is a "Variable" Op with 2 default valued attrs. # o "container" : "". # o "shared_name" : "". # When default attributes are not stripped, the "weight" node should have # attributes "container" and "shared_name". When default attributes are # stripped, the node should not have these attributes. node_def = test_util.get_node_def_from_graph('weight', meta_graph_def.graph_def) self.assertEqual(attributes_stripped, 'container' not in node_def.attr) self.assertEqual(attributes_stripped, 'shared_name' not in node_def.attr) # Clean up. tf.gfile.DeleteRecursively(tmpdir) def test_export_saved_model_proto_strip_default_attrs(self): # Test deprecated export_savedmodel to ensure that V1 behavior is consistent self._validate_strip_default_attrs( estimator.Estimator, lambda e, *args: e.export_savedmodel(*args, strip_default_attrs=True), True) self._validate_strip_default_attrs( estimator.Estimator, lambda e, *args: e.export_savedmodel(*args, strip_default_attrs=False), False) # Make sure that export_saved_model strips the default attributes. self._validate_strip_default_attrs( estimator.Estimator, lambda e, *args: e.export_saved_model(*args), True) self._validate_strip_default_attrs( estimator.EstimatorV2, lambda e, *args: e.export_saved_model(*args), True) def test_export_saved_model_no_export_outputs(self): """Ensure that an EstimatorSpec without outputs defined can be exported.""" def _model_fn(features, labels, mode): _, _ = features, labels tf.Variable(1., name='weight') return model_fn_lib.EstimatorSpec( mode, predictions=tf.constant(10.), loss=tf.constant(1.), train_op=tf.assign_add(tf.compat.v1.train.get_global_step(), 1)) tmpdir = tempfile.mkdtemp() est = estimator.EstimatorV2(model_fn=_model_fn) est.train(input_fn=dummy_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( tf.compat.as_bytes(tmpdir), tf.compat.as_bytes('no_export_outputs')) export_dir = est.export_saved_model(export_dir_base, _get_serving_input_receiver_fn()) # Check that all the files are in the right places. self.assertTrue(tf.gfile.Exists(export_dir_base)) self._validate_exported_files(export_dir) # Restore, to validate that the export was well-formed. with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: meta_graph = tf.saved_model.load(sess, [tf.saved_model.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('weight' in graph_ops) sig_def = meta_graph.signature_def self.assertEqual(len(sig_def), 1) sig_outputs = sig_def[ tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY].outputs self.assertEqual(sig_outputs['output'].name, 'Const:0') def test_export_from_warm_start(self): def _make_model_fn(x): def _variable_creating_model_fn(features, labels, mode): _, _ = features, labels tf.get_variable('x', initializer=x) global_step = tf.train.get_global_step() return model_fn_lib.EstimatorSpec( mode, predictions=tf.constant(1.), loss=tf.constant(1.), train_op=tf.assign_add(global_step, 1)) return _variable_creating_model_fn est = estimator.EstimatorV2(model_fn=_make_model_fn(42.)) est.train(dummy_input_fn, steps=10) warm_started_est = estimator.EstimatorV2( model_fn=_make_model_fn(36.), warm_start_from=est.model_dir) saved_model_dir = warm_started_est.export_saved_model( tempfile.mkdtemp(), _get_serving_input_receiver_fn()) variable_dir = path_helpers.get_variables_path(saved_model_dir) self.assertEqual(42., tf.train.load_variable(variable_dir, 'x')) def test_export_saved_model_symbol_deprecated(self): est = estimator.EstimatorV2(model_fn=_model_fn_for_export_tests) with self.assertRaisesRegexp(AttributeError, 'Please use `export_saved_model`'): est.export_savedmodel class EstimatorHookOrderingTest(tf.test.TestCase): def testCustomHooksAreCalledBeforeNanTensorHook(self): def nan_making_model_fn(mode, features, labels): """A graph that generates NaN's for testing.""" del features, labels global_step = tf.Variable(0, dtype=tf.dtypes.int64, name='global_step') inc_global_step = tf.assign_add(global_step, 1) nan_const = tf.constant(np.nan, dtype=tf.dtypes.float32) loss = tf.cond(inc_global_step > 1, lambda: nan_const, lambda: 1.0) return model_fn_lib.EstimatorSpec( mode=mode, predictions=global_step.read_value(), loss=loss, train_op=inc_global_step) def empty_input_fn(): return dict(), None class AfterRunCountingHook(tf.train.SessionRunHook): """Hooks that counts the number of times after_run() is called.""" def __init__(self): self.after_run_count = 0 def after_run(self, run_context, run_values): del run_context, run_values self.after_run_count += 1 test_hook = AfterRunCountingHook() est = estimator.EstimatorV2(model_fn=nan_making_model_fn) with self.assertRaises(tf.train.NanLossDuringTrainingError): est.train(input_fn=empty_input_fn, steps=2, hooks=[test_hook]) self.assertEqual(2, test_hook.after_run_count) class EstimatorIntegrationTest(tf.test.TestCase): def test_complete_flow_with_a_simple_linear_model(self): def _model_fn(features, labels, mode): predictions = tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.initializers.zeros()) export_outputs = {'predictions': export_lib.RegressionOutput(predictions)} if mode == ModeKeys.PREDICT: return model_fn_lib.EstimatorSpec( mode, predictions=predictions, export_outputs=export_outputs) loss = tf_keras_v1.losses.MeanSquaredError()(labels, predictions) train_op = tf.train.GradientDescentOptimizer(learning_rate=0.5).minimize( loss, tf.train.get_global_step()) mean = tf_keras_v1.metrics.Mean() mean.update_state(loss) eval_metric_ops = { 'absolute_error': tf.metrics.mean_absolute_error(labels, predictions), 'mean': mean, } return model_fn_lib.EstimatorSpec( mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs) est = estimator.EstimatorV2(model_fn=_model_fn) data = np.linspace(0., 1., 100, dtype=np.float32).reshape(-1, 1) # TRAIN # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=50, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) # EVALUATE eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=50, num_epochs=1, shuffle=True) scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores['global_step']) self.assertGreater(0.1, scores['absolute_error']) self.assertAlmostEqual(4.4e-14, scores['mean'], places=2) # PREDICT predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=None, batch_size=10, num_epochs=1, shuffle=False) predictions = list(est.predict(predict_input_fn)) self.assertAllClose(data, predictions, atol=0.01) # EXPORT feature_spec = {'x': tf.io.FixedLenFeature([1], tf.dtypes.float32)} serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) export_dir = est.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(tf.gfile.Exists(export_dir)) class EstimatorInputContextTest(tf.test.TestCase): def test_with_input_fn(self): total_batch_size = 10 num_shards = 2 def _input_with_context(input_context): batch_size = total_batch_size // num_shards self.assertEqual('DummyInputContext', input_context.name) self.assertEqual(batch_size, input_context.batch_size) return tf.data.Dataset.from_tensors(([1.], [2.])) def _input_without_context(): return tf.data.Dataset.from_tensors(([1.], [2.])) class DummyInputContext(object): def __init__(self, n_shards, total_bs): self._name = 'DummyInputContext' self._num_shards = n_shards self._total_batch_size = total_bs @property def name(self): return self._name @property def batch_size(self): return self._total_batch_size // self._num_shards # This class is the mock for DistributionStrategy. It only overrides # the make_input_fn_iterator method. class DummyDistributionStrategy(object): def __init__(self, n_shards): self._num_shards = n_shards def make_input_fn_iterator(self, input_fn): input_context = DummyInputContext(num_shards, total_batch_size) return input_fn(input_context) distribution = DummyDistributionStrategy(num_shards) est = estimator.EstimatorV2(model_fn=dummy_model_fn) # We only test the `input_fn` instead of calling `Estimator.train` est._get_iterator_from_input_fn(_input_with_context, None, distribution) # pylint: disable=protected-access est._get_iterator_from_input_fn(_input_without_context, None, distribution) # pylint: disable=protected-access if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/export/__init__.py ================================================ ================================================ FILE: tensorflow_estimator/python/estimator/export/export.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Configuration and utilities for receiving inputs at serving time. Extends the export utils defined in core TensorFlow. Please avoid importing this file directly, all of the public functions have been exported to export_lib.py. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import six import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.saved_model.model_utils import export_utils from tensorflow.python.saved_model.model_utils.export_utils import SINGLE_FEATURE_DEFAULT_NAME from tensorflow.python.saved_model.model_utils.export_utils import SINGLE_LABEL_DEFAULT_NAME from tensorflow.python.saved_model.model_utils.export_utils import SINGLE_RECEIVER_DEFAULT_NAME from tensorflow_estimator.python.estimator import util from tensorflow_estimator.python.estimator.estimator_export import estimator_export _SINGLE_TENSOR_DEFAULT_NAMES = { 'feature': SINGLE_FEATURE_DEFAULT_NAME, 'label': SINGLE_LABEL_DEFAULT_NAME, 'receiver_tensor': SINGLE_RECEIVER_DEFAULT_NAME, 'receiver_tensors_alternative': SINGLE_RECEIVER_DEFAULT_NAME } def wrap_and_check_input_tensors(tensors, field_name, allow_int_keys=False): """Ensure that tensors is a dict of str to Tensor mappings. Args: tensors: dict of `str` (or `int`s if `allow_int_keys=True`) to `Tensors`, or a single `Tensor`. field_name: name of the member field of `ServingInputReceiver` whose value is being passed to `tensors`. allow_int_keys: If set to true, the `tensor` dict keys may also be `int`s. Returns: dict of str to Tensors; this is the original dict if one was passed, or the original tensor wrapped in a dictionary. Raises: ValueError: if tensors is None, or has non-string keys, or non-Tensor values """ if tensors is None: raise ValueError('{}s must be defined.'.format(field_name)) if not isinstance(tensors, dict): tensors = {_SINGLE_TENSOR_DEFAULT_NAMES[field_name]: tensors} for name, tensor in tensors.items(): _check_tensor_key(name, error_label=field_name, allow_ints=allow_int_keys) _check_tensor(tensor, name, error_label=field_name) return tensors def _check_tensor(tensor, name, error_label='feature'): """Check that passed `tensor` is a Tensor or SparseTensor or RaggedTensor.""" if not (isinstance(tensor, tf.Tensor) or isinstance(tensor, tf.sparse.SparseTensor) or isinstance(tensor, tf.RaggedTensor)): fmt_name = ' {}'.format(name) if name else '' value_error = ValueError('{}{} must be a Tensor, SparseTensor, or ' 'RaggedTensor.'.format(error_label, fmt_name)) # NOTE(ericmc): This if-else block is a specific carve-out for # LabeledTensor, which has a `.tensor` attribute and which is # convertible to tf.Tensor via ops.convert_to_tensor. # Allowing all types convertible to tf.Tensor is considered by soergel@ # to be too permissive. # TODO(soergel): accept any type convertible to Tensor, # as in cl/193238295 snapshot #6. if hasattr(tensor, 'tensor'): try: ops.convert_to_tensor(tensor) except TypeError: raise value_error else: raise value_error def _check_tensor_key(name, error_label='feature', allow_ints=False): if not isinstance(name, six.string_types): if not allow_ints: raise ValueError('{} keys must be strings: {}.'.format(error_label, name)) elif not isinstance(name, six.integer_types): raise ValueError('{} keys must be strings or ints: {}.'.format( error_label, name)) @estimator_export('estimator.export.ServingInputReceiver') class ServingInputReceiver( collections.namedtuple( 'ServingInputReceiver', ['features', 'receiver_tensors', 'receiver_tensors_alternatives'])): """A return type for a serving_input_receiver_fn. Attributes: features: A `Tensor`, `SparseTensor`, or dict of string or int to `Tensor` or `SparseTensor`, specifying the features to be passed to the model. Note: if `features` passed is not a dict, it will be wrapped in a dict with a single entry, using 'feature' as the key. Consequently, the model must accept a feature dict of the form {'feature': tensor}. You may use `TensorServingInputReceiver` if you want the tensor to be passed as is. receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or `SparseTensor`, specifying input nodes where this receiver expects to be fed by default. Typically, this is a single placeholder expecting serialized `tf.Example` protos. receiver_tensors_alternatives: a dict of string to additional groups of receiver tensors, each of which may be a `Tensor`, `SparseTensor`, or dict of string to `Tensor` or`SparseTensor`. These named receiver tensor alternatives generate additional serving signatures, which may be used to feed inputs at different points within the input receiver subgraph. A typical usage is to allow feeding raw feature `Tensor`s *downstream* of the tf.parse_example() op. Defaults to None. """ def __new__(cls, features, receiver_tensors, receiver_tensors_alternatives=None): features = wrap_and_check_input_tensors( features, 'feature', allow_int_keys=True) receiver_tensors = wrap_and_check_input_tensors(receiver_tensors, 'receiver_tensor') if receiver_tensors_alternatives is not None: if not isinstance(receiver_tensors_alternatives, dict): raise ValueError( 'receiver_tensors_alternatives must be a dict: {}.'.format( receiver_tensors_alternatives)) for alternative_name, receiver_tensors_alt in ( six.iteritems(receiver_tensors_alternatives)): # Updating dict during iteration is OK in this case. receiver_tensors_alternatives[alternative_name] = ( wrap_and_check_input_tensors(receiver_tensors_alt, 'receiver_tensors_alternative')) return super(ServingInputReceiver, cls).__new__( cls, features=features, receiver_tensors=receiver_tensors, receiver_tensors_alternatives=receiver_tensors_alternatives) @estimator_export('estimator.export.TensorServingInputReceiver') class TensorServingInputReceiver( collections.namedtuple( 'TensorServingInputReceiver', ['features', 'receiver_tensors', 'receiver_tensors_alternatives'])): """A return type for a serving_input_receiver_fn. This is for use with models that expect a single `Tensor` or `SparseTensor` as an input feature, as opposed to a dict of features. The normal `ServingInputReceiver` always returns a feature dict, even if it contains only one entry, and so can be used only with models that accept such a dict. For models that accept only a single raw feature, the `serving_input_receiver_fn` provided to `Estimator.export_saved_model()` should return this `TensorServingInputReceiver` instead. See: https://github.com/tensorflow/tensorflow/issues/11674 Note that the receiver_tensors and receiver_tensor_alternatives arguments will be automatically converted to the dict representation in either case, because the SavedModel format requires each input `Tensor` to have a name (provided by the dict key). Attributes: features: A single `Tensor` or `SparseTensor`, representing the feature to be passed to the model. receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or `SparseTensor`, specifying input nodes where this receiver expects to be fed by default. Typically, this is a single placeholder expecting serialized `tf.Example` protos. receiver_tensors_alternatives: a dict of string to additional groups of receiver tensors, each of which may be a `Tensor`, `SparseTensor`, or dict of string to `Tensor` or`SparseTensor`. These named receiver tensor alternatives generate additional serving signatures, which may be used to feed inputs at different points within the input receiver subgraph. A typical usage is to allow feeding raw feature `Tensor`s *downstream* of the tf.parse_example() op. Defaults to None. """ def __new__(cls, features, receiver_tensors, receiver_tensors_alternatives=None): if features is None: raise ValueError('features must be defined.') _check_tensor(features, None) receiver = ServingInputReceiver( features=features, receiver_tensors=receiver_tensors, receiver_tensors_alternatives=receiver_tensors_alternatives) return super(TensorServingInputReceiver, cls).__new__( cls, features=receiver.features[SINGLE_FEATURE_DEFAULT_NAME], receiver_tensors=receiver.receiver_tensors, receiver_tensors_alternatives=receiver.receiver_tensors_alternatives) class UnsupervisedInputReceiver(ServingInputReceiver): """A return type for a training_input_receiver_fn or eval_input_receiver_fn. This differs from SupervisedInputReceiver in that it does not require a set of labels. Attributes: features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or `SparseTensor`, specifying the features to be passed to the model. receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or `SparseTensor`, specifying input nodes where this receiver expects to be fed by default. Typically, this is a single placeholder expecting serialized `tf.Example` protos. """ def __new__(cls, features, receiver_tensors): return super(UnsupervisedInputReceiver, cls).__new__( cls, features=features, receiver_tensors=receiver_tensors, receiver_tensors_alternatives=None) class SupervisedInputReceiver( collections.namedtuple('SupervisedInputReceiver', ['features', 'labels', 'receiver_tensors'])): """A return type for a training_input_receiver_fn or eval_input_receiver_fn. This differs from a ServingInputReceiver in that (1) this receiver expects a set of labels to be passed in with features, and (2) this receiver does not support receiver_tensors_alternatives, which are primarily used for serving. The expected return values are: features: A `Tensor`, `SparseTensor`, or dict of string or int to `Tensor` or `SparseTensor`, specifying the features to be passed to the model. labels: A `Tensor`, `SparseTensor`, or dict of string or int to `Tensor` or `SparseTensor`, specifying the labels to be passed to the model. receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or `SparseTensor`, specifying input nodes where this receiver expects to be fed by default. Typically, this is a single placeholder expecting serialized `tf.Example` protos. """ def __new__(cls, features, labels, receiver_tensors): # Both features and labels can be dicts or raw tensors. # wrap_and_check_input_tensors is called here only to validate the tensors. # The wrapped dict that is returned is deliberately discarded. wrap_and_check_input_tensors(features, 'feature', allow_int_keys=True) wrap_and_check_input_tensors(labels, 'label', allow_int_keys=True) receiver_tensors = wrap_and_check_input_tensors(receiver_tensors, 'receiver_tensor') return super(SupervisedInputReceiver, cls).__new__( cls, features=features, labels=labels, receiver_tensors=receiver_tensors) @estimator_export('estimator.export.build_parsing_serving_input_receiver_fn') def build_parsing_serving_input_receiver_fn(feature_spec, default_batch_size=None): """Build a serving_input_receiver_fn expecting fed tf.Examples. Creates a serving_input_receiver_fn that expects a serialized tf.Example fed into a string placeholder. The function parses the tf.Example according to the provided feature_spec, and returns all parsed Tensors as features. Args: feature_spec: a dict of string to `VarLenFeature`/`FixedLenFeature`. default_batch_size: the number of query examples expected per batch. Leave unset for variable batch size (recommended). Returns: A serving_input_receiver_fn suitable for use in serving. """ def serving_input_receiver_fn(): """An input_fn that expects a serialized tf.Example.""" serialized_tf_example = tf.compat.v1.placeholder( dtype=tf.dtypes.string, shape=[default_batch_size], name='input_example_tensor') receiver_tensors = {'examples': serialized_tf_example} features = tf.compat.v1.io.parse_example(serialized_tf_example, feature_spec) return ServingInputReceiver(features, receiver_tensors) return serving_input_receiver_fn def _placeholder_from_tensor(t, default_batch_size=None): """Creates a placeholder that matches the dtype and shape of passed tensor. Args: t: Tensor or EagerTensor default_batch_size: the number of query examples expected per batch. Leave unset for variable batch size (recommended). Returns: Placeholder that matches the passed tensor. """ batch_shape = tf.TensorShape([default_batch_size]) shape = batch_shape.concatenate(t.get_shape()[1:]) # Reuse the feature tensor's op name (t.op.name) for the placeholder, # excluding the index from the tensor's name (t.name): # t.name = "%s:%d" % (t.op.name, t._value_index) try: name = t.op.name except AttributeError: # In Eager mode, tensors don't have ops or names, and while they do have # IDs, those are not maintained across runs. The name here is used # primarily for debugging, and is not critical to the placeholder. # So, in order to make this Eager-compatible, continue with an empty # name if none is available. name = None return tf.compat.v1.placeholder(dtype=t.dtype, shape=shape, name=name) def _placeholders_from_receiver_tensors_dict(input_vals, default_batch_size=None): return { name: _placeholder_from_tensor(t, default_batch_size) for name, t in input_vals.items() } @estimator_export('estimator.export.build_raw_serving_input_receiver_fn') def build_raw_serving_input_receiver_fn(features, default_batch_size=None): """Build a serving_input_receiver_fn expecting feature Tensors. Creates an serving_input_receiver_fn that expects all features to be fed directly. Args: features: a dict of string to `Tensor`. default_batch_size: the number of query examples expected per batch. Leave unset for variable batch size (recommended). Returns: A serving_input_receiver_fn. """ def serving_input_receiver_fn(): """A serving_input_receiver_fn that expects features to be fed directly.""" receiver_tensors = _placeholders_from_receiver_tensors_dict( features, default_batch_size) return ServingInputReceiver(receiver_tensors, receiver_tensors) return serving_input_receiver_fn @estimator_export( 'estimator.experimental.build_raw_supervised_input_receiver_fn') def build_raw_supervised_input_receiver_fn(features, labels, default_batch_size=None): """Build a supervised_input_receiver_fn for raw features and labels. This function wraps tensor placeholders in a supervised_receiver_fn with the expectation that the features and labels appear precisely as the model_fn expects them. Features and labels can therefore be dicts of tensors, or raw tensors. Args: features: a dict of string to `Tensor` or `Tensor`. labels: a dict of string to `Tensor` or `Tensor`. default_batch_size: the number of query examples expected per batch. Leave unset for variable batch size (recommended). Returns: A supervised_input_receiver_fn. Raises: ValueError: if features and labels have overlapping keys. """ # Check for overlapping keys before beginning. try: feat_keys = features.keys() except AttributeError: feat_keys = [SINGLE_RECEIVER_DEFAULT_NAME] try: label_keys = labels.keys() except AttributeError: label_keys = [SINGLE_LABEL_DEFAULT_NAME] overlap_keys = set(feat_keys) & set(label_keys) if overlap_keys: raise ValueError('Features and labels must have distinct keys. ' 'Found overlapping keys: {}'.format(overlap_keys)) def supervised_input_receiver_fn(): """A receiver_fn that expects pass-through features and labels.""" if not isinstance(features, dict): features_cp = _placeholder_from_tensor(features, default_batch_size) receiver_features = {SINGLE_RECEIVER_DEFAULT_NAME: features_cp} else: receiver_features = _placeholders_from_receiver_tensors_dict( features, default_batch_size) features_cp = receiver_features if not isinstance(labels, dict): labels_cp = _placeholder_from_tensor(labels, default_batch_size) receiver_labels = {SINGLE_LABEL_DEFAULT_NAME: labels_cp} else: receiver_labels = _placeholders_from_receiver_tensors_dict( labels, default_batch_size) labels_cp = receiver_labels receiver_tensors = dict(receiver_features) receiver_tensors.update(receiver_labels) return SupervisedInputReceiver(features_cp, labels_cp, receiver_tensors) return supervised_input_receiver_fn def build_supervised_input_receiver_fn_from_input_fn(input_fn, **input_fn_args): """Get a function that returns a SupervisedInputReceiver matching an input_fn. Note that this function calls the input_fn in a local graph in order to extract features and labels. Placeholders are then created from those features and labels in the default graph. Args: input_fn: An Estimator input_fn, which is a function that returns one of: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. * A tuple (features, labels): Where `features` is a `Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. **input_fn_args: set of kwargs to be passed to the input_fn. Note that these will not be checked or validated here, and any errors raised by the input_fn will be thrown to the top. Returns: A function taking no arguments that, when called, returns a SupervisedInputReceiver. This function can be passed in as part of the input_receiver_map when exporting SavedModels from Estimator with multiple modes. """ # Wrap the input_fn call in a graph to prevent sullying the default namespace with tf.Graph().as_default(): result = input_fn(**input_fn_args) features, labels, _ = util.parse_input_fn_result(result) # Placeholders are created back in the default graph. return build_raw_supervised_input_receiver_fn(features, labels) ### Below utilities are specific to SavedModel exports. # TODO(kathywu): Rename all references to use the original definition in # model_utils, or estimator/export/export_lib.py if other estimator export # functions are used. build_all_signature_defs = export_utils.build_all_signature_defs get_temp_export_dir = export_utils.get_temp_export_dir get_timestamped_export_dir = export_utils.get_timestamped_export_dir ================================================ FILE: tensorflow_estimator/python/estimator/export/export_lib.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """All public utility methods for exporting Estimator to SavedModel. This file includes functions and constants from core (model_utils) and export.py """ from __future__ import absolute_import from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long, wildcard-import from tensorflow.python.saved_model.model_utils import build_all_signature_defs from tensorflow.python.saved_model.model_utils import export_outputs_for_mode from tensorflow.python.saved_model.model_utils import EXPORT_TAG_MAP from tensorflow.python.saved_model.model_utils import get_export_outputs from tensorflow.python.saved_model.model_utils import get_temp_export_dir from tensorflow.python.saved_model.model_utils import get_timestamped_export_dir from tensorflow.python.saved_model.model_utils import SIGNATURE_KEY_MAP from tensorflow.python.saved_model.model_utils.export_output import _SupervisedOutput from tensorflow.python.saved_model.model_utils.export_output import ClassificationOutput from tensorflow.python.saved_model.model_utils.export_output import EvalOutput from tensorflow.python.saved_model.model_utils.export_output import ExportOutput from tensorflow.python.saved_model.model_utils.export_output import PredictOutput from tensorflow.python.saved_model.model_utils.export_output import RegressionOutput from tensorflow.python.saved_model.model_utils.export_output import TrainOutput from tensorflow_estimator.python.estimator.export.export import build_parsing_serving_input_receiver_fn from tensorflow_estimator.python.estimator.export.export import build_raw_serving_input_receiver_fn from tensorflow_estimator.python.estimator.export.export import build_raw_supervised_input_receiver_fn from tensorflow_estimator.python.estimator.export.export import build_supervised_input_receiver_fn_from_input_fn from tensorflow_estimator.python.estimator.export.export import ServingInputReceiver from tensorflow_estimator.python.estimator.export.export import SupervisedInputReceiver from tensorflow_estimator.python.estimator.export.export import TensorServingInputReceiver from tensorflow_estimator.python.estimator.export.export import UnsupervisedInputReceiver from tensorflow_estimator.python.estimator.export.export import wrap_and_check_input_tensors # pylint: enable=unused-import,line-too-long, wildcard-import ================================================ FILE: tensorflow_estimator/python/estimator/export/export_output.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Classes for different types of export output.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function # pylint: disable=unused-import from tensorflow.python.saved_model.model_utils.export_output import _SupervisedOutput from tensorflow.python.saved_model.model_utils.export_output import ClassificationOutput from tensorflow.python.saved_model.model_utils.export_output import EvalOutput from tensorflow.python.saved_model.model_utils.export_output import ExportOutput from tensorflow.python.saved_model.model_utils.export_output import PredictOutput from tensorflow.python.saved_model.model_utils.export_output import RegressionOutput from tensorflow.python.saved_model.model_utils.export_output import TrainOutput # pylint: enable=unused-import from tensorflow_estimator.python.estimator.estimator_export import estimator_export estimator_export('estimator.export.ExportOutput')(ExportOutput) estimator_export('estimator.export.ClassificationOutput')(ClassificationOutput) estimator_export('estimator.export.RegressionOutput')(RegressionOutput) estimator_export('estimator.export.PredictOutput')(PredictOutput) estimator_export('estimator.export.EvalOutput')(EvalOutput) ================================================ FILE: tensorflow_estimator/python/estimator/export/export_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for export.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from google.protobuf import text_format from tensorflow.core.example import example_pb2 from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.export import export class LabeledTensorMock(object): """Mock class emulating LabeledTensor.""" def __init__(self): self.tensor = tf.constant([1]) def _convert_labeled_tensor_mock_to_tensor(value, *args, **kwargs): return ops.internal_convert_to_tensor(value.tensor, *args, **kwargs) tf.register_tensor_conversion_function(LabeledTensorMock, _convert_labeled_tensor_mock_to_tensor) class ServingInputReceiverTest(tf.test.TestCase): def test_serving_input_receiver_constructor(self): """Tests that no errors are raised when input is expected.""" features = { "feature0": tf.constant([0]), u"feature1": tf.constant([1]), "feature2": tf.sparse.SparseTensor( indices=[[0, 0]], values=[1], dense_shape=[1, 1]), # ints are allowed only in the `features` dict 42: tf.constant([3]), } receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } export.ServingInputReceiver(features, receiver_tensors) def test_serving_input_receiver_features_invalid(self): receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } with self.assertRaisesRegexp(ValueError, "features must be defined"): export.ServingInputReceiver( features=None, receiver_tensors=receiver_tensors) with self.assertRaisesRegexp(ValueError, "feature keys must be strings or ints"): export.ServingInputReceiver( features={42.2: tf.constant([1])}, receiver_tensors=receiver_tensors) with self.assertRaisesRegexp( ValueError, "feature feature1 must be a Tensor, SparseTensor, or " "RaggedTensor."): export.ServingInputReceiver( features={"feature1": [1]}, receiver_tensors=receiver_tensors) def test_serving_input_receiver_receiver_tensors_invalid(self): features = { "feature0": tf.constant([0]), u"feature1": tf.constant([1]), "feature2": tf.sparse.SparseTensor( indices=[[0, 0]], values=[1], dense_shape=[1, 1]), } with self.assertRaisesRegexp(ValueError, "receiver_tensors must be defined"): export.ServingInputReceiver(features=features, receiver_tensors=None) with self.assertRaisesRegexp(ValueError, "receiver_tensor keys must be strings"): export.ServingInputReceiver( features=features, receiver_tensors={1: tf.constant(["test"], name="example0")}) with self.assertRaisesRegexp(ValueError, "receiver_tensor example1 must be a Tensor"): export.ServingInputReceiver( features=features, receiver_tensors={"example1": [1]}) def test_single_feature_single_receiver(self): feature = tf.constant(5) receiver_tensor = tf.constant(["test"]) input_receiver = export.ServingInputReceiver(feature, receiver_tensor) # single feature is automatically named feature_key, = input_receiver.features.keys() self.assertEqual("feature", feature_key) # single receiver is automatically named receiver_key, = input_receiver.receiver_tensors.keys() self.assertEqual("input", receiver_key) def test_multi_feature_single_receiver(self): features = {"foo": tf.constant(5), "bar": tf.constant(6)} receiver_tensor = tf.constant(["test"]) _ = export.ServingInputReceiver(features, receiver_tensor) def test_multi_feature_multi_receiver(self): features = {"foo": tf.constant(5), "bar": tf.constant(6)} receiver_tensors = {"baz": tf.constant(5), "qux": tf.constant(6)} _ = export.ServingInputReceiver(features, receiver_tensors) def test_feature_wrong_type(self): feature = "not a tensor" receiver_tensor = tf.constant(["test"]) with self.assertRaises(ValueError): _ = export.ServingInputReceiver(feature, receiver_tensor) def test_feature_labeled_tensor(self): feature = LabeledTensorMock() receiver_tensor = tf.constant(["test"]) _ = export.ServingInputReceiver(feature, receiver_tensor) def test_receiver_wrong_type(self): feature = tf.constant(5) receiver_tensor = "not a tensor" with self.assertRaises(ValueError): _ = export.ServingInputReceiver(feature, receiver_tensor) class UnsupervisedInputReceiverTest(tf.test.TestCase): # Since this is basically a wrapper around ServingInputReceiver, we only # have a simple sanity check to ensure that it works. def test_unsupervised_input_receiver_constructor(self): """Tests that no errors are raised when input is expected.""" features = { "feature0": tf.constant([0]), u"feature1": tf.constant([1]), "feature2": tf.sparse.SparseTensor( indices=[[0, 0]], values=[1], dense_shape=[1, 1]), 42: # ints are allowed only in the `features` dict tf.constant([3]), } receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } export.UnsupervisedInputReceiver(features, receiver_tensors) class SupervisedInputReceiverTest(tf.test.TestCase): def test_input_receiver_constructor(self): """Tests that no errors are raised when input is expected.""" features = { "feature0": tf.constant([0]), u"feature1": tf.constant([1]), "feature2": tf.sparse.SparseTensor( indices=[[0, 0]], values=[1], dense_shape=[1, 1]), 42: # ints are allowed in the `features` dict tf.constant([3]), } labels = { "classes": tf.constant([0] * 100), 43: # ints are allowed in the `labels` dict tf.constant([3]), } receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } export.SupervisedInputReceiver(features, labels, receiver_tensors) def test_input_receiver_raw_values(self): """Tests that no errors are raised when input is expected.""" features = { "feature0": tf.constant([0]), u"feature1": tf.constant([1]), "feature2": tf.sparse.SparseTensor( indices=[[0, 0]], values=[1], dense_shape=[1, 1]), 42: # ints are allowed in the `features` dict tf.constant([3]), } labels = { "classes": tf.constant([0] * 100), 43: # ints are allowed in the `labels` dict tf.constant([3]), } receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } rec = export.SupervisedInputReceiver(features["feature2"], labels, receiver_tensors) self.assertIsInstance(rec.features, tf.sparse.SparseTensor) rec = export.SupervisedInputReceiver(features, labels["classes"], receiver_tensors) self.assertIsInstance(rec.labels, tf.Tensor) def test_input_receiver_features_invalid(self): features = tf.constant([0] * 100) labels = tf.constant([0]) receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } with self.assertRaisesRegexp(ValueError, "features must be defined"): export.SupervisedInputReceiver( features=None, labels=labels, receiver_tensors=receiver_tensors) with self.assertRaisesRegexp(ValueError, "feature keys must be strings or ints"): export.SupervisedInputReceiver( features={1.11: tf.constant([1])}, labels=labels, receiver_tensors=receiver_tensors) with self.assertRaisesRegexp(ValueError, "label keys must be strings or ints"): export.SupervisedInputReceiver( features=features, labels={1.11: tf.constant([1])}, receiver_tensors=receiver_tensors) with self.assertRaisesRegexp( ValueError, "feature feature1 must be a Tensor, SparseTensor, or " "RaggedTensor."): export.SupervisedInputReceiver( features={"feature1": [1]}, labels=labels, receiver_tensors=receiver_tensors) with self.assertRaisesRegexp(ValueError, "feature must be a Tensor, SparseTensor, " "or RaggedTensor."): export.SupervisedInputReceiver( features=[1], labels=labels, receiver_tensors=receiver_tensors) with self.assertRaisesRegexp(ValueError, "label must be a Tensor, SparseTensor, " "or RaggedTensor."): export.SupervisedInputReceiver( features=features, labels=100, receiver_tensors=receiver_tensors) def test_input_receiver_receiver_tensors_invalid(self): features = { "feature0": tf.constant([0]), u"feature1": tf.constant([1]), "feature2": tf.sparse.SparseTensor( indices=[[0, 0]], values=[1], dense_shape=[1, 1]), } labels = tf.constant([0]) with self.assertRaisesRegexp(ValueError, "receiver_tensors must be defined"): export.SupervisedInputReceiver( features=features, labels=labels, receiver_tensors=None) with self.assertRaisesRegexp(ValueError, "receiver_tensor keys must be strings"): export.SupervisedInputReceiver( features=features, labels=labels, receiver_tensors={1: tf.constant(["test"], name="example0")}) with self.assertRaisesRegexp(ValueError, "receiver_tensor example1 must be a Tensor"): export.SupervisedInputReceiver( features=features, labels=labels, receiver_tensors={"example1": [1]}) def test_single_feature_single_receiver(self): feature = tf.constant(5) label = tf.constant(5) receiver_tensor = tf.constant(["test"]) input_receiver = export.SupervisedInputReceiver(feature, label, receiver_tensor) # single receiver is automatically named receiver_key, = input_receiver.receiver_tensors.keys() self.assertEqual("input", receiver_key) def test_multi_feature_single_receiver(self): features = {"foo": tf.constant(5), "bar": tf.constant(6)} labels = {"value": tf.constant(5)} receiver_tensor = tf.constant(["test"]) _ = export.SupervisedInputReceiver(features, labels, receiver_tensor) def test_multi_feature_multi_receiver(self): features = {"foo": tf.constant(5), "bar": tf.constant(6)} labels = {"value": tf.constant(5)} receiver_tensors = {"baz": tf.constant(5), "qux": tf.constant(6)} _ = export.SupervisedInputReceiver(features, labels, receiver_tensors) def test_feature_labeled_tensor(self): feature = LabeledTensorMock() label = tf.constant(5) receiver_tensor = tf.constant(["test"]) _ = export.SupervisedInputReceiver(feature, label, receiver_tensor) class ExportTest(tf.test.TestCase): # Calling serving_input_receiver_fn requires graph mode. @test_util.deprecated_graph_mode_only def test_build_parsing_serving_input_receiver_fn(self): feature_spec = { "int_feature": tf.io.VarLenFeature(tf.dtypes.int64), "float_feature": tf.io.VarLenFeature(tf.dtypes.float32) } serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) with tf.Graph().as_default(): serving_input_receiver = serving_input_receiver_fn() self.assertEqual( set(["int_feature", "float_feature"]), set(serving_input_receiver.features.keys())) self.assertEqual( set(["examples"]), set(serving_input_receiver.receiver_tensors.keys())) example = example_pb2.Example() text_format.Parse( "features: { " " feature: { " " key: 'int_feature' " " value: { " " int64_list: { " " value: [ 21, 2, 5 ] " " } " " } " " } " " feature: { " " key: 'float_feature' " " value: { " " float_list: { " " value: [ 525.25 ] " " } " " } " " } " "} ", example) with self.cached_session() as sess: sparse_result = sess.run( serving_input_receiver.features, feed_dict={ serving_input_receiver.receiver_tensors["examples"].name: [ example.SerializeToString() ] }) self.assertAllEqual([[0, 0], [0, 1], [0, 2]], sparse_result["int_feature"].indices) self.assertAllEqual([21, 2, 5], sparse_result["int_feature"].values) self.assertAllEqual([[0, 0]], sparse_result["float_feature"].indices) self.assertAllEqual([525.25], sparse_result["float_feature"].values) # Calling serving_input_receiver_fn requires graph mode. @test_util.deprecated_graph_mode_only def test_build_raw_serving_input_receiver_fn_name(self): """Test case for issue #12755.""" f = { "feature": tf.compat.v1.placeholder( name="feature", shape=[32], dtype=tf.dtypes.float32) } serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn(f) v = serving_input_receiver_fn() self.assertIsInstance(v, export.ServingInputReceiver) # Calling serving_input_receiver_fn requires graph mode. @test_util.deprecated_graph_mode_only def test_build_raw_serving_input_receiver_fn_without_shape(self): """Test case for issue #21178.""" f = { "feature_1": tf.compat.v1.placeholder(tf.dtypes.float32), "feature_2": tf.compat.v1.placeholder(tf.dtypes.int32) } serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn(f) v = serving_input_receiver_fn() self.assertIsInstance(v, export.ServingInputReceiver) self.assertEqual(tensor_shape.unknown_shape(), v.receiver_tensors["feature_1"].shape) self.assertEqual(tensor_shape.unknown_shape(), v.receiver_tensors["feature_2"].shape) def test_build_raw_serving_input_receiver_fn(self): features = { "feature_1": tf.constant(["hello"]), "feature_2": tf.constant([42]) } serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn( features) with tf.Graph().as_default(): serving_input_receiver = serving_input_receiver_fn() self.assertEqual( set(["feature_1", "feature_2"]), set(serving_input_receiver.features.keys())) self.assertEqual( set(["feature_1", "feature_2"]), set(serving_input_receiver.receiver_tensors.keys())) self.assertEqual( tf.dtypes.string, serving_input_receiver.receiver_tensors["feature_1"].dtype) self.assertEqual( tf.dtypes.int32, serving_input_receiver.receiver_tensors["feature_2"].dtype) def test_build_raw_supervised_input_receiver_fn(self): features = { "feature_1": tf.constant(["hello"]), "feature_2": tf.constant([42]) } labels = {"foo": tf.constant([5]), "bar": tf.constant([6])} input_receiver_fn = export.build_raw_supervised_input_receiver_fn( features, labels) with tf.Graph().as_default(): input_receiver = input_receiver_fn() self.assertEqual( set(["feature_1", "feature_2"]), set(input_receiver.features.keys())) self.assertEqual(set(["foo", "bar"]), set(input_receiver.labels.keys())) self.assertEqual( set(["feature_1", "feature_2", "foo", "bar"]), set(input_receiver.receiver_tensors.keys())) self.assertEqual(tf.dtypes.string, input_receiver.receiver_tensors["feature_1"].dtype) self.assertEqual(tf.dtypes.int32, input_receiver.receiver_tensors["feature_2"].dtype) def test_build_raw_supervised_input_receiver_fn_raw_tensors(self): features = { "feature_1": tf.constant(["hello"]), "feature_2": tf.constant([42]) } labels = {"foo": tf.constant([5]), "bar": tf.constant([6])} input_receiver_fn1 = export.build_raw_supervised_input_receiver_fn( features["feature_1"], labels) input_receiver_fn2 = export.build_raw_supervised_input_receiver_fn( features["feature_1"], labels["foo"]) with tf.Graph().as_default(): input_receiver = input_receiver_fn1() self.assertIsInstance(input_receiver.features, tf.Tensor) self.assertEqual(set(["foo", "bar"]), set(input_receiver.labels.keys())) self.assertEqual( set(["input", "foo", "bar"]), set(input_receiver.receiver_tensors.keys())) input_receiver = input_receiver_fn2() self.assertIsInstance(input_receiver.features, tf.Tensor) self.assertIsInstance(input_receiver.labels, tf.Tensor) self.assertEqual( set(["input", "label"]), set(input_receiver.receiver_tensors.keys())) def test_build_raw_supervised_input_receiver_fn_batch_size(self): features = { "feature_1": tf.constant(["hello"]), "feature_2": tf.constant([42]) } labels = {"foo": tf.constant([5]), "bar": tf.constant([6])} input_receiver_fn = export.build_raw_supervised_input_receiver_fn( features, labels, default_batch_size=10) with tf.Graph().as_default(): input_receiver = input_receiver_fn() self.assertEqual([10], input_receiver.receiver_tensors["feature_1"].shape) self.assertEqual([10], input_receiver.features["feature_1"].shape) def test_build_raw_supervised_input_receiver_fn_overlapping_keys(self): features = { "feature_1": tf.constant(["hello"]), "feature_2": tf.constant([42]) } labels = {"feature_1": tf.constant([5]), "bar": tf.constant([6])} with self.assertRaises(ValueError): export.build_raw_supervised_input_receiver_fn(features, labels) def test_build_supervised_input_receiver_fn_from_input_fn(self): def dummy_input_fn(): return ({ "x": tf.constant([[1], [1]]), "y": tf.constant(["hello", "goodbye"]) }, tf.constant([[1], [1]])) input_receiver_fn = export.build_supervised_input_receiver_fn_from_input_fn( dummy_input_fn) with tf.Graph().as_default(): input_receiver = input_receiver_fn() self.assertEqual(set(["x", "y"]), set(input_receiver.features.keys())) self.assertIsInstance(input_receiver.labels, tf.Tensor) self.assertEqual( set(["x", "y", "label"]), set(input_receiver.receiver_tensors.keys())) def test_build_supervised_input_receiver_fn_from_input_fn_args(self): def dummy_input_fn(feature_key="x"): return ({ feature_key: tf.constant([[1], [1]]), "y": tf.constant(["hello", "goodbye"]) }, { "my_label": tf.constant([[1], [1]]) }) input_receiver_fn = export.build_supervised_input_receiver_fn_from_input_fn( dummy_input_fn, feature_key="z") with tf.Graph().as_default(): input_receiver = input_receiver_fn() self.assertEqual(set(["z", "y"]), set(input_receiver.features.keys())) self.assertEqual(set(["my_label"]), set(input_receiver.labels.keys())) self.assertEqual( set(["z", "y", "my_label"]), set(input_receiver.receiver_tensors.keys())) class TensorServingReceiverTest(tf.test.TestCase): def test_tensor_serving_input_receiver_constructor(self): features = tf.constant([0]) receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } r = export.TensorServingInputReceiver(features, receiver_tensors) self.assertIsInstance(r.features, tf.Tensor) self.assertIsInstance(r.receiver_tensors, dict) def test_tensor_serving_input_receiver_sparse(self): features = tf.sparse.SparseTensor( indices=[[0, 0]], values=[1], dense_shape=[1, 1]) receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } r = export.TensorServingInputReceiver(features, receiver_tensors) self.assertIsInstance(r.features, tf.sparse.SparseTensor) self.assertIsInstance(r.receiver_tensors, dict) def test_serving_input_receiver_features_invalid(self): receiver_tensors = { "example0": tf.constant(["test0"], name="example0"), u"example1": tf.constant(["test1"], name="example1"), } with self.assertRaisesRegexp(ValueError, "features must be defined"): export.TensorServingInputReceiver( features=None, receiver_tensors=receiver_tensors) with self.assertRaisesRegexp(ValueError, "feature must be a Tensor"): export.TensorServingInputReceiver( features={"1": tf.constant([1])}, receiver_tensors=receiver_tensors) def test_serving_input_receiver_receiver_tensors_invalid(self): features = tf.constant([0]) with self.assertRaisesRegexp(ValueError, "receiver_tensors must be defined"): export.TensorServingInputReceiver( features=features, receiver_tensors=None) with self.assertRaisesRegexp(ValueError, "receiver_tensor keys must be strings"): export.TensorServingInputReceiver( features=features, receiver_tensors={1: tf.constant(["test"], name="example0")}) with self.assertRaisesRegexp(ValueError, "receiver_tensor example1 must be a Tensor"): export.TensorServingInputReceiver( features=features, receiver_tensors={"example1": [1]}) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/export/function.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Defines class for wrapping an Estimator model function.""" # TODO(kathywu): support remaining outputs from the EstimatorSpec. from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import tensorflow as tf from tensorflow.python.eager import function from tensorflow.python.eager import wrap_function from tensorflow.python.framework import func_graph from tensorflow.python.saved_model.model_utils import export_utils from tensorflow.python.util import function_utils from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys class ModelFunction(tf.compat.v2.__internal__.tracking.AutoTrackable): """A checkpointable ModelFunction object. This object stores a global mapping of variables and functions for each mode. """ def __init__(self, config=None, params=None): self._config = config self._params = params self._functions = {} self._variable_holder = wrap_function.VariableHolder(share_variables=True) # Add reference to the variable holder's mapping of variables, which is a # trackable object. self._variables_by_name = self._variable_holder.variables @staticmethod def from_function(model_fn, all_modes=None, config=None, params=None): """Creates a new ModelFunction object from a model function.""" if all_modes is None: all_modes = [ModeKeys.TRAIN, ModeKeys.EVAL, ModeKeys.PREDICT] else: all_modes = list(all_modes) obj = ModelFunction(config=config, params=params) for mode in all_modes: obj.add_mode(model_fn, mode) return obj @property def variables(self): return self._variables_by_name def add_mode(self, fn, mode, input_signature=None): if mode in self._functions: raise ValueError('ModelFunction object has multiple functions with name' ' {}.'.format(mode)) spec_fn = EstimatorSpecFunction( fn, mode, config=self._config, params=self._params, variable_holder=self._variable_holder, input_signature=input_signature) self._functions[mode] = spec_fn def train(self, features, labels): return self.call(ModeKeys.TRAIN, features, labels) def evaluate(self, features, labels): return self.call(ModeKeys.EVAL, features, labels) def predict(self, features): return self.call(ModeKeys.PREDICT, features) def call(self, mode, features, labels=None): if mode not in self._functions: raise ValueError( 'Mode {} is not defined the ModelFunction. To add modes,' ' use the `add_mode()` function. Available modes: {}'.format( mode, self._functions.keys())) fn = self._functions[mode] if fn.expects_labels: return fn(features, labels) else: return fn(features) def _wrap_and_verify_model_fn(model_fn, mode=None, config=None, params=None, input_signature=None): """Returns a function that only has only tensor arguments (features, labels). Args: model_fn: Model function. Must follow the signature defined in `tf.estimator.Estimator`. mode: Optional string `tf.estimstor.ModeKey`. config: Optional `estimator.RunConfig` object. params: Optional `dict` of hyperparameters. input_signature: Possibly nested TensorSpec of the tensor arguments. Returns: tuple of ( function that only accepts tensor arguments (features and/or labels), whether the returned function expects a labels argument) """ model_fn_lib.verify_model_fn_args(model_fn, params) args = function_utils.fn_args(model_fn) kwargs = {} if 'mode' in args: kwargs['mode'] = mode if 'params' in args: kwargs['params'] = params if 'config' in args: kwargs['config'] = config if 'labels' in args: if input_signature is None or len(input_signature) == 2: def wrapped_model_fn(features, labels=None): return model_fn(features=features, labels=labels, **kwargs) else: def wrapped_model_fn(features): return model_fn(features=features, labels=None, **kwargs) else: def wrapped_model_fn(features): return model_fn(features=features, **kwargs) return wrapped_model_fn, 'labels' in args class EstimatorSpecFunction(tf.compat.v2.__internal__.function.Function): """Wraps graph functions defined for a function returning an EstimatorSpec. Instances of this class are revivable when attached to a checkpointable object. """ def __init__(self, fn, mode, config=None, params=None, variable_holder=None, **kwargs): """Initializes an EstimatorSpecFunction. Args: fn: Python model function. mode: String mode to run the function. config: RunConfig that is passed to the `config` arg in the function. params: object that is passed to the `params` argument in the function. variable_holder: Optional `wrap_function.VariableHolder` object. **kwargs: Optional keyword arguments to pass to tf.function (e.g. input_signature). """ python_function, self.expects_labels = _wrap_and_verify_model_fn( fn, mode=mode, config=config, params=params, input_signature=kwargs.get('input_signature', None)) super(EstimatorSpecFunction, self).__init__(python_function, mode, **kwargs) self._variable_holder = variable_holder def _defun(self, fn): return _EstimatorSpecFunction( fn, name=self._name, variable_holder=self._variable_holder, input_signature=self.input_signature, autograph=self._autograph, autograph_options=self._experimental_autograph_options) class _EstimatorSpecFunction(tf.compat.v2.__internal__.function.Function): """Wraps graph functions defined for a function returning an EstimatorSpec. This object handles creation of the graph functions. """ def __init__(self, python_function, name, variable_holder=None, **kwargs): super(_EstimatorSpecFunction, self).__init__(python_function, name, **kwargs) self._variable_holder = variable_holder def _create_graph_function(self, args, kwargs, **other_kwargs): _ = other_kwargs wrapped_graph = _EstimatorWrappedGraph(self._variable_holder) return wrapped_graph.wrap_model_fn( self._python_function, self._name, signature=self.input_signature, args=args, kwargs=kwargs) class _EstimatorWrappedGraph(wrap_function.WrappedGraph): """WrappedGraph that handles global step creation and wraps estimator fns.""" def __init__(self, *args, **kwargs): super(_EstimatorWrappedGraph, self).__init__(*args, **kwargs) # Create global step variable, which may be used by the input and model fns. self._global_step_read_fn = self.wrap_function( self._global_step, signature=[]) self._concrete_model_fn = None # Original EstimatorSpec object returned by the model function. Only tensors # and ops are returned by the concrete model function. self._estimator_spec = None def _global_step(self): return tf.compat.v1.train.get_or_create_global_step() @property def global_step(self): return self._global_step_read_fn() @property def model_fn(self): return self._concrete_model_fn @property def estimator_spec(self): if self._concrete_model_fn is None: raise ValueError('Please wrap a model function first.') return self._estimator_spec def wrap_model_fn(self, model_fn, mode, args=None, kwargs=None, signature=None): """Wraps a model function, and stores the returned estimator spec.""" if self._concrete_model_fn is not None: raise ValueError('`wrap_model_fn` should be only called once per graph.') def fn(*args, **kwargs): """Returns tensor and op outputs from the returned spec.""" ret = model_fn(*args, **kwargs) if isinstance(ret, model_fn_lib.EstimatorSpec): self._estimator_spec = ret return _filter_estimator_spec_outputs(ret) return ret name = 'model_fn_{}'.format(mode) self._concrete_model_fn = self._wrap_function(fn, args, kwargs, signature, name) return self._concrete_model_fn def wrap_input_receiver_fn(self, input_receiver_fn): """Converts an input receiver function to one or more concrete functions. Input receiver functions are python functions with no arguments. Placeholders are created within the function and used to receive inputs to the model. The function (or multiple functions) generated depends on the InputReceiver object returned by `input_receiver_fn`. Generally, the returned function will have inputs and outputs: input_receiver(**receiver_tensors) --> features or (if the InputReceiver returns labels): input_receiver(**receiver_tensors) --> features, labels __Alternate Receiver Tensors__ The InputReceiver may have alternate receiver tensors, in which case additional concrete functions are generated. Example: InputReceiver.receiver_tensors_alternatives = { 'alt_input_1': Tensor, 'alt_input_2': { 'tensor_1': Tensor, 'tensor_2': Tensor } } This will generate concrete functions: input_receiver_alt_input_1(input) --> features input_receiver_alt_input_2(tensor_1, tensor_2) --> features Args: input_receiver_fn: a no-argument function that returns an `InputReceiver` object. Returns: A list of tuples of (concrete function, receiver name). The name of the default input receiver is `None`. """ ret = [None] def fn(): ret[0] = input_receiver = input_receiver_fn() features = input_receiver.features labels = getattr(input_receiver, 'labels', None) if labels is None: return features return features, labels func_graph.func_graph_from_py_func( None, # Name is unused. self._variable_holder.call_with_variable_creator_scope(fn), args=None, kwargs=None, signature=[], add_control_dependencies=False, func_graph=self.graph) functions = [] input_receiver = ret[0] wrapped_input_receiver_fn = _prune_receiver_tensors( self._wrapped_function, receiver_tensors=input_receiver.receiver_tensors, outputs=self.graph.structured_outputs, name=_input_receiver_fn_name(None)) functions.append((wrapped_input_receiver_fn, None)) receiver_tensors_alternatives = getattr(input_receiver, 'receiver_tensors_alternatives', None) if receiver_tensors_alternatives: for receiver_name, receiver_tensors_alt in ( six.iteritems(receiver_tensors_alternatives)): receiver_tensors_alt = _canonicalize_receiver_tensors( receiver_tensors_alt) wrapped_input_receiver_fn = _prune_receiver_tensors( self._wrapped_function, receiver_tensors=receiver_tensors_alt, outputs=self.graph.structured_outputs, name=_input_receiver_fn_name(receiver_name)) functions.append((wrapped_input_receiver_fn, receiver_name)) return functions def _filter_estimator_spec_outputs(spec): """Filters tensors and ops from an EstimatorSpec and returns a dictionary.""" # TODO(kathywu): Add loss, export outputs, eval metrics depending on the mode. if spec.mode == ModeKeys.TRAIN: return dict(predictions=spec.predictions, train_op=spec.train_op) return dict(predictions=spec.predictions) _RECEIVER_FN_NAME = '_input_receiver' def _canonicalize_receiver_tensors(receiver_tensors): """Converts receiver tensors to the expected format of `as_signature_def`.""" # TODO(b/129646028): Wrap function doesn't support composite tensors. for tensor in tf.nest.flatten(receiver_tensors): if not isinstance(tensor, tf.Tensor): raise ValueError('All receiver tensors must be tensors (composite ' 'tensors are not yet supported).') if isinstance(receiver_tensors, dict): return receiver_tensors return {export_utils.SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors} def _input_receiver_fn_name(name): if name is None: return _RECEIVER_FN_NAME else: return '{}_{}'.format(_RECEIVER_FN_NAME, name) def _prune_receiver_tensors(wrapped_function, receiver_tensors, outputs, name): inputs = _canonicalize_receiver_tensors(receiver_tensors) return wrapped_function.prune( inputs, outputs, name=name, input_signature=(None, func_graph.convert_structure_to_signature(inputs))) ================================================ FILE: tensorflow_estimator/python/estimator/export/function_test.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for Estimator function objects.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import six as six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.export import export_lib from tensorflow_estimator.python.estimator.export import function from tensorflow_estimator.python.estimator.mode_keys import ModeKeys def _string_fix(obj): return tf.nest.map_structure( lambda x: tf.compat.as_bytes(x) if isinstance(x, six.string_types) else x, obj) def _model_fn(features, labels, mode): v = tf.Variable(tf.constant(23), name='v') if mode == ModeKeys.PREDICT: return model_fn_lib.EstimatorSpec( ModeKeys.PREDICT, predictions=features + 1) elif mode == ModeKeys.EVAL: return model_fn_lib.EstimatorSpec( ModeKeys.EVAL, loss=tf.constant(5) + v, predictions=features + labels) elif mode == ModeKeys.TRAIN: return model_fn_lib.EstimatorSpec( ModeKeys.TRAIN, predictions=features * labels, loss=tf.constant(5) + v, train_op=tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1)) def _model_fn_train_only(features, labels): v = tf.Variable(tf.constant(23), name='v') return model_fn_lib.EstimatorSpec( ModeKeys.TRAIN, predictions=features * labels, loss=tf.constant(5) + v, train_op=tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1)) def _model_fn_predict_only(features): return model_fn_lib.EstimatorSpec(ModeKeys.PREDICT, predictions=features + 1) # TODO(kathywu): Re-enable test after def_function changes are built into # nightlies. @test_util.run_all_in_graph_and_eager_modes class ModelFunctionTest(object): def test_from_function(self): mfn = function.ModelFunction.from_function(_model_fn) out = mfn.train(tf.constant(3), tf.constant(5)) self.evaluate(tf.compat.v1.initializers.variables(mfn.variables.values())) self.assertEqual(15, self.evaluate(out['predictions'])) out = mfn.evaluate(tf.constant(7), tf.constant(9)) self.assertEqual(16, self.evaluate(out['predictions'])) out = mfn.predict(tf.constant(10)) self.assertEqual(11, self.evaluate(out['predictions'])) def test_model_fn_train_only(self): mfn = function.ModelFunction() mfn.add_mode(_model_fn_train_only, ModeKeys.TRAIN) out = mfn.train(tf.constant(4), tf.constant(6)) self.evaluate(tf.compat.v1.initializers.variables(mfn.variables.values())) self.assertEqual(24, self.evaluate(out['predictions'])) with self.assertRaisesRegexp(ValueError, 'not defined'): out = mfn.evaluate(tf.constant(7), tf.constant(9)) def test_model_fn_predict_only(self): mfn = function.ModelFunction() mfn.add_mode(_model_fn_predict_only, ModeKeys.PREDICT) out = mfn.predict(tf.constant(4)) self.evaluate(tf.compat.v1.initializers.variables(mfn.variables.values())) self.assertEqual(5, self.evaluate(out['predictions'])) with self.assertRaisesRegexp(ValueError, 'not defined'): out = mfn.evaluate(tf.constant(7), tf.constant(9)) def test_save_and_load(self): mfn = function.ModelFunction.from_function(_model_fn) out = mfn.train(tf.constant(3), tf.constant(5)) self.evaluate(tf.compat.v1.initializers.variables(mfn.variables.values())) self.evaluate(out['predictions']) for _ in range(2): out = mfn.train(tf.constant(3), tf.constant(5)) self.evaluate(out['predictions']) self.assertEqual( 3, self.evaluate(mfn._variable_holder.variables['global_step'])) mfn.evaluate(tf.constant(7), tf.constant(9)) mfn.predict(tf.constant(10)) save_dir = os.path.join(self.get_temp_dir(), 'model_function') tf.saved_model.save(mfn, save_dir) obj = tf.saved_model.load(save_dir) variables_by_name = obj._variables_by_name self.evaluate( tf.compat.v1.initializers.variables( variables_by_name._unconditional_dependency_names.values())) self.assertEqual(3, self.evaluate(variables_by_name.global_step)) out = obj._functions['train'](tf.constant(3), tf.constant(5)) self.assertEqual(15, self.evaluate(out['predictions'])) self.assertEqual(4, self.evaluate(variables_by_name.global_step)) out = obj._functions['eval'](tf.constant(7), tf.constant(9)) self.assertEqual(16, self.evaluate(out['predictions'])) out = obj._functions['infer'](tf.constant(10)) self.assertEqual(11, self.evaluate(out['predictions'])) def _model_fn_callable_variable_initializers(features, labels, mode): """Model_fn with callable variable initializers (for WrappedGraph tests).""" _ = features, labels v = tf.Variable(lambda: tf.constant(23), name='v') if mode == ModeKeys.PREDICT: return model_fn_lib.EstimatorSpec( ModeKeys.PREDICT, predictions=features + 1) elif mode == ModeKeys.EVAL: return model_fn_lib.EstimatorSpec( ModeKeys.EVAL, loss=tf.constant(5) + v, predictions=features + labels) elif mode == ModeKeys.TRAIN: return model_fn_lib.EstimatorSpec( ModeKeys.TRAIN, predictions=features * labels, loss=tf.constant(5) + v, train_op=tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1)) @test_util.run_all_in_graph_and_eager_modes class EstimatorWrappedGraphTest(tf.test.TestCase): def test_wrap_model_fn_train(self): graph = function._EstimatorWrappedGraph() features = tf.constant(3) labels = tf.constant(4) mode = ModeKeys.TRAIN fn = graph.wrap_model_fn( _model_fn_callable_variable_initializers, mode=mode, args=[features, labels, mode], kwargs={}) self.evaluate(tf.compat.v1.initializers.variables(graph.variables.values())) self.assertEqual(0, self.evaluate(graph.global_step)) self.assertEqual(12, self.evaluate(fn(features, labels)['predictions'])) self.assertEqual(1, self.evaluate(graph.global_step)) self.assertEqual('AssignAddVariableOp', graph.estimator_spec.train_op.type) def test_wrap_model_fn_eval(self): graph = function._EstimatorWrappedGraph() features = tf.constant(5) labels = tf.constant(6) mode = ModeKeys.EVAL fn = graph.wrap_model_fn( _model_fn_callable_variable_initializers, mode=mode, args=[features, labels, mode], kwargs={}) self.assertDictEqual({'predictions': 11}, self.evaluate(fn(features, labels))) def test_wrap_model_fn_predict(self): graph = function._EstimatorWrappedGraph() features = tf.constant(7) mode = ModeKeys.PREDICT fn = graph.wrap_model_fn( _model_fn_callable_variable_initializers, mode=mode, args=[features, None, mode], kwargs={}) self.assertDictEqual({'predictions': 8}, self.evaluate(fn(features))) def test_wrap_input_receiver_fn(self): def serving_input_fn(): receiver_1 = tf.compat.v1.placeholder(tf.dtypes.string) receiver_2 = tf.compat.v1.placeholder(tf.dtypes.string) receiver_tensors = { 'rec1': receiver_1, u'rec2': receiver_2, } concat = tf.strings.join([receiver_1, receiver_2]) concat2 = tf.identity(concat) features = { 'feature0': tf.strings.join([concat, concat2], ':'), u'feature1': tf.constant([1]) } alternate_tensors = { 'alt_name_1': concat, 'alt_name_2': { 'tensor1': concat, 'tensor2': concat2 } } return export_lib.ServingInputReceiver(features, receiver_tensors, alternate_tensors) graph = function._EstimatorWrappedGraph() fns = graph.wrap_input_receiver_fn(serving_input_fn) for fn, name in fns: if name is None: out = fn(tf.constant('1'), tf.constant('2')) self.assertDictEqual( _string_fix({ 'feature0': '12:12', 'feature1': [1] }), _string_fix(self.evaluate(out))) elif name == 'alt_name_1': out = fn(tf.constant('3')) self.assertDictEqual( _string_fix({ 'feature0': '3:3', 'feature1': [1] }), _string_fix(self.evaluate(out))) elif name == 'alt_name_2': out = fn(tf.constant('4'), tf.constant('5')) self.assertDictEqual( _string_fix({ 'feature0': '4:5', 'feature1': [1] }), _string_fix(self.evaluate(out))) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/exporter.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """`Exporter` class represents different flavors of model export.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import os import tensorflow as tf from tensorflow_estimator.python.estimator import gc from tensorflow_estimator.python.estimator import util from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.estimator_export import estimator_export @estimator_export('estimator.Exporter') class Exporter(object): """A class representing a type of model export.""" @abc.abstractproperty def name(self): """Directory name. A directory name under the export base directory where exports of this type are written. Should not be `None` nor empty. """ pass @abc.abstractmethod def export(self, estimator, export_path, checkpoint_path, eval_result, is_the_final_export): """Exports the given `Estimator` to a specific format. Args: estimator: the `Estimator` to export. export_path: A string containing a directory where to write the export. checkpoint_path: The checkpoint path to export. eval_result: The output of `Estimator.evaluate` on this checkpoint. is_the_final_export: This boolean is True when this is an export in the end of training. It is False for the intermediate exports during the training. When passing `Exporter` to `tf.estimator.train_and_evaluate` `is_the_final_export` is always False if `TrainSpec.max_steps` is `None`. Returns: The string path to the exported directory or `None` if export is skipped. """ pass class _SavedModelExporter(Exporter): """This class exports the serving graph and checkpoints. This class provides a basic exporting functionality and serves as a foundation for specialized `Exporter`s. """ def __init__(self, name, serving_input_receiver_fn, assets_extra=None, as_text=False): """Create an `Exporter` to use with `tf.estimator.EvalSpec`. Args: name: unique name of this `Exporter` that is going to be used in the export path. serving_input_receiver_fn: a function that takes no arguments and returns a `ServingInputReceiver`. assets_extra: An optional dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. Defaults to `False`. Raises: ValueError: if any arguments is invalid. """ self._name = name self._serving_input_receiver_fn = serving_input_receiver_fn self._assets_extra = assets_extra self._as_text = as_text @property def name(self): return self._name def export(self, estimator, export_path, checkpoint_path, eval_result, is_the_final_export): del is_the_final_export export_result = estimator.export_saved_model( export_path, self._serving_input_receiver_fn, assets_extra=self._assets_extra, as_text=self._as_text, checkpoint_path=checkpoint_path) return export_result def _loss_smaller(best_eval_result, current_eval_result): """Compares two evaluation results and returns true if the 2nd one is smaller. Both evaluation results should have the values for MetricKeys.LOSS, which are used for comparison. Args: best_eval_result: best eval metrics. current_eval_result: current eval metrics. Returns: True if the loss of current_eval_result is smaller; otherwise, False. Raises: ValueError: If input eval result is None or no loss is available. """ default_key = metric_keys.MetricKeys.LOSS if not best_eval_result or default_key not in best_eval_result: raise ValueError( 'best_eval_result cannot be empty or no loss is found in it.') if not current_eval_result or default_key not in current_eval_result: raise ValueError( 'current_eval_result cannot be empty or no loss is found in it.') return best_eval_result[default_key] > current_eval_result[default_key] def _verify_compare_fn_args(compare_fn): """Verifies compare_fn arguments.""" args = set(util.fn_args(compare_fn)) if 'best_eval_result' not in args: raise ValueError('compare_fn (%s) must include best_eval_result argument.' % compare_fn) if 'current_eval_result' not in args: raise ValueError( 'compare_fn (%s) must include current_eval_result argument.' % compare_fn) non_valid_args = list(args - set(['best_eval_result', 'current_eval_result'])) if non_valid_args: raise ValueError('compare_fn (%s) has following not expected args: %s' % (compare_fn, non_valid_args)) @estimator_export('estimator.BestExporter') class BestExporter(Exporter): """This class exports the serving graph and checkpoints of the best models. This class performs a model export everytime the new model is better than any existing model. """ def __init__(self, name='best_exporter', serving_input_receiver_fn=None, event_file_pattern='eval/*.tfevents.*', compare_fn=_loss_smaller, assets_extra=None, as_text=False, exports_to_keep=5): """Create an `Exporter` to use with `tf.estimator.EvalSpec`. Example of creating a BestExporter for training and evaluation: ```python def make_train_and_eval_fn(): # Set up feature columns. categorical_feature_a = ( tf.feature_column.categorical_column_with_hash_bucket(...)) categorical_feature_a_emb = embedding_column( categorical_column=categorical_feature_a, ...) ... # other feature columns estimator = tf.estimator.DNNClassifier( config=tf.estimator.RunConfig( model_dir='/my_model', save_summary_steps=100), feature_columns=[categorical_feature_a_emb, ...], hidden_units=[1024, 512, 256]) serving_feature_spec = tf.feature_column.make_parse_example_spec( categorical_feature_a_emb) serving_input_receiver_fn = ( tf.estimator.export.build_parsing_serving_input_receiver_fn( serving_feature_spec)) exporter = tf.estimator.BestExporter( name="best_exporter", serving_input_receiver_fn=serving_input_receiver_fn, exports_to_keep=5) train_spec = tf.estimator.TrainSpec(...) eval_spec = [tf.estimator.EvalSpec( input_fn=eval_input_fn, steps=100, exporters=exporter, start_delay_secs=0, throttle_secs=5)] tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) ``` Args: name: unique name of this `Exporter` that is going to be used in the export path. serving_input_receiver_fn: a function that takes no arguments and returns a `ServingInputReceiver`. event_file_pattern: event file name pattern relative to model_dir. If None, however, the exporter would not be preemption-safe. To be preemption-safe, event_file_pattern must be specified. compare_fn: a function that compares two evaluation results and returns true if current evaluation result is better. Follows the signature: * Args: * `best_eval_result`: This is the evaluation result of the best model. * `current_eval_result`: This is the evaluation result of current candidate model. * Returns: True if current evaluation result is better; otherwise, False. assets_extra: An optional dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. Defaults to `False`. exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to `None` to disable garbage collection. Raises: ValueError: if any argument is invalid. """ self._compare_fn = compare_fn if self._compare_fn is None: raise ValueError('`compare_fn` must not be None.') _verify_compare_fn_args(self._compare_fn) self._saved_model_exporter = _SavedModelExporter(name, serving_input_receiver_fn, assets_extra, as_text) self._event_file_pattern = event_file_pattern self._model_dir = None self._best_eval_result = None self._has_exported = False self._exports_to_keep = exports_to_keep if exports_to_keep is not None and exports_to_keep <= 0: raise ValueError( '`exports_to_keep`, if provided, must be a positive number. Got %s' % exports_to_keep) @property def name(self): return self._saved_model_exporter.name def export(self, estimator, export_path, checkpoint_path, eval_result, is_the_final_export): export_result = None if self._model_dir != estimator.model_dir and self._event_file_pattern: # Loads best metric from event files. tf.compat.v1.logging.info('Loading best metric from event files.') self._model_dir = estimator.model_dir full_event_file_pattern = os.path.join(self._model_dir, self._event_file_pattern) self._best_eval_result = self._get_best_eval_result( full_event_file_pattern) if (self._best_eval_result is None or # check if this is the first export. not self._has_exported or self._compare_fn( best_eval_result=self._best_eval_result, current_eval_result=eval_result)): tf.compat.v1.logging.info('Performing best model export.') self._best_eval_result = eval_result export_result = self._saved_model_exporter.export(estimator, export_path, checkpoint_path, eval_result, is_the_final_export) self._garbage_collect_exports(export_path) self._has_exported = True return export_result def _garbage_collect_exports(self, export_dir_base): """Deletes older exports, retaining only a given number of the most recent. Export subdirectories are assumed to be named with monotonically increasing integers; the most recent are taken to be those with the largest values. Args: export_dir_base: the base directory under which each export is in a versioned subdirectory. """ if self._exports_to_keep is None: return def _export_version_parser(path): # create a simple parser that pulls the export_version from the directory. filename = os.path.basename(path.path) if not (len(filename) == 10 and filename.isdigit()): return None return path._replace(export_version=int(filename)) # pylint: disable=protected-access keep_filter = gc._largest_export_versions(self._exports_to_keep) delete_filter = gc._negation(keep_filter) for p in delete_filter( gc._get_paths(export_dir_base, parser=_export_version_parser)): try: tf.compat.v1.gfile.DeleteRecursively(p.path) except tf.errors.NotFoundError as e: tf.compat.v1.logging.warn('Can not delete %s recursively: %s', p.path, e) # pylint: enable=protected-access def _get_best_eval_result(self, event_files): """Get the best eval result from event files. Args: event_files: Absolute pattern of event files. Returns: The best eval result. """ if not event_files: return None best_eval_result = None for event_file in tf.compat.v1.gfile.Glob(os.path.join(event_files)): for event in tf.compat.v1.train.summary_iterator(event_file): if event.HasField('summary'): event_eval_result = {} for value in event.summary.value: if value.HasField('simple_value'): event_eval_result[value.tag] = value.simple_value if event_eval_result: if best_eval_result is None or self._compare_fn( best_eval_result, event_eval_result): best_eval_result = event_eval_result return best_eval_result @estimator_export('estimator.FinalExporter') class FinalExporter(Exporter): """This class exports the serving graph and checkpoints at the end. This class performs a single export at the end of training. """ def __init__(self, name, serving_input_receiver_fn, assets_extra=None, as_text=False): """Create an `Exporter` to use with `tf.estimator.EvalSpec`. Args: name: unique name of this `Exporter` that is going to be used in the export path. serving_input_receiver_fn: a function that takes no arguments and returns a `ServingInputReceiver`. assets_extra: An optional dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. Defaults to `False`. Raises: ValueError: if any arguments is invalid. """ self._saved_model_exporter = _SavedModelExporter(name, serving_input_receiver_fn, assets_extra, as_text) @property def name(self): return self._saved_model_exporter.name def export(self, estimator, export_path, checkpoint_path, eval_result, is_the_final_export): if not is_the_final_export: return None tf.compat.v1.logging.info( 'Performing the final export in the end of training.') return self._saved_model_exporter.export(estimator, export_path, checkpoint_path, eval_result, is_the_final_export) @estimator_export('estimator.LatestExporter') class LatestExporter(Exporter): """This class regularly exports the serving graph and checkpoints. In addition to exporting, this class also garbage collects stale exports. """ def __init__(self, name, serving_input_receiver_fn, assets_extra=None, as_text=False, exports_to_keep=5): """Create an `Exporter` to use with `tf.estimator.EvalSpec`. Args: name: unique name of this `Exporter` that is going to be used in the export path. serving_input_receiver_fn: a function that takes no arguments and returns a `ServingInputReceiver`. assets_extra: An optional dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. Defaults to `False`. exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to `None` to disable garbage collection. Raises: ValueError: if any arguments is invalid. """ self._saved_model_exporter = _SavedModelExporter(name, serving_input_receiver_fn, assets_extra, as_text) self._exports_to_keep = exports_to_keep if exports_to_keep is not None and exports_to_keep <= 0: raise ValueError( '`exports_to_keep`, if provided, must be positive number') @property def name(self): return self._saved_model_exporter.name def export(self, estimator, export_path, checkpoint_path, eval_result, is_the_final_export): export_result = self._saved_model_exporter.export(estimator, export_path, checkpoint_path, eval_result, is_the_final_export) self._garbage_collect_exports(export_path) return export_result def _garbage_collect_exports(self, export_dir_base): """Deletes older exports, retaining only a given number of the most recent. Export subdirectories are assumed to be named with monotonically increasing integers; the most recent are taken to be those with the largest values. Args: export_dir_base: the base directory under which each export is in a versioned subdirectory. """ if self._exports_to_keep is None: return def _export_version_parser(path): # create a simple parser that pulls the export_version from the directory. filename = os.path.basename(path.path) if not (len(filename) == 10 and filename.isdigit()): return None return path._replace(export_version=int(filename)) # pylint: disable=protected-access keep_filter = gc._largest_export_versions(self._exports_to_keep) delete_filter = gc._negation(keep_filter) for p in delete_filter( gc._get_paths(export_dir_base, parser=_export_version_parser)): try: tf.compat.v1.gfile.DeleteRecursively(p.path) except tf.errors.NotFoundError as e: tf.compat.v1.logging.warn('Can not delete %s recursively: %s', p.path, e) # pylint: enable=protected-access ================================================ FILE: tensorflow_estimator/python/estimator/exporter_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for `Exporter`s.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import tempfile import time import tensorflow as tf from tensorflow.python.eager import context from tensorflow.python.framework import test_util from tensorflow.python.platform import gfile from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import exporter as exporter_lib class BestExporterTest(tf.test.TestCase): def test_error_out_if_exports_to_keep_is_zero(self): def _serving_input_receiver_fn(): pass with self.assertRaisesRegexp(ValueError, "positive number"): exporter = exporter_lib.BestExporter( name="best_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, exports_to_keep=0) self.assertEqual("best_exporter", exporter.name) def test_best_exporter(self): def _serving_input_receiver_fn(): pass export_dir_base = tempfile.mkdtemp() tf.compat.v1.gfile.MkDir(export_dir_base) tf.compat.v1.gfile.MkDir(export_dir_base + "/export") tf.compat.v1.gfile.MkDir(export_dir_base + "/eval") exporter = exporter_lib.BestExporter( name="best_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, assets_extra={"from/path": "to/path"}, as_text=False, exports_to_keep=5) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) estimator.export_saved_model.return_value = "export_result_path" estimator.model_dir = export_dir_base export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {}, False) self.assertEqual("export_result_path", export_result) estimator.export_saved_model.assert_called_with( export_dir_base, _serving_input_receiver_fn, assets_extra={"from/path": "to/path"}, as_text=False, checkpoint_path="checkpoint_path") def test_best_export_is_saved(self): def _serving_input_receiver_fn(): pass export_dir_base = tempfile.mkdtemp() tf.compat.v1.gfile.MkDir(export_dir_base) tf.compat.v1.gfile.MkDir(export_dir_base + "/export") tf.compat.v1.gfile.MkDir(export_dir_base + "/eval") exporter = exporter_lib.BestExporter( name="best_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, assets_extra={"from/path": "to/path"}, as_text=False, exports_to_keep=1) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) estimator.export_saved_model.return_value = "export_result_path" estimator.model_dir = export_dir_base export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 0.5}, False) self.assertTrue(estimator.export_saved_model.called) self.assertEqual("export_result_path", export_result) export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 0.6}, False) self.assertEqual(None, export_result) export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 0.4}, False) self.assertEqual("export_result_path", export_result) def test_best_exporter_with_preemption(self): def _serving_input_receiver_fn(): pass export_dir_base = tempfile.mkdtemp() tf.compat.v1.gfile.MkDir(export_dir_base) tf.compat.v1.gfile.MkDir(export_dir_base + "/export") tf.compat.v1.gfile.MkDir(export_dir_base + "/eval") eval_dir_base = os.path.join(export_dir_base, "eval_continuous") # _write_dict_to_summary is only called internally within graph mode. with context.graph_mode(): estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 50}, 1) estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 60}, 2) exporter = exporter_lib.BestExporter( name="best_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, event_file_pattern="eval_continuous/*.tfevents.*", assets_extra={"from/path": "to/path"}, as_text=False, exports_to_keep=1) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) estimator.model_dir = export_dir_base estimator.export_saved_model.return_value = "export_result_path" export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 100}, False) self.assertEqual("export_result_path", export_result) export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 10}, False) self.assertEqual("export_result_path", export_result) export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 20}, False) self.assertEqual(None, export_result) @test_util.run_v1_only("Tests v1 only symbols") def test_best_exporter_with_empty_event(self): def _serving_input_receiver_fn(): pass export_dir_base = tempfile.mkdtemp() tf.compat.v1.gfile.MkDir(export_dir_base) tf.compat.v1.gfile.MkDir(export_dir_base + "/export") tf.compat.v1.gfile.MkDir(export_dir_base + "/eval") eval_dir_base = os.path.join(export_dir_base, "eval_continuous") estimator_lib._write_dict_to_summary(eval_dir_base, {}, 1) estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 60}, 2) exporter = exporter_lib.BestExporter( name="best_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, event_file_pattern="eval_continuous/*.tfevents.*", assets_extra={"from/path": "to/path"}, as_text=False, exports_to_keep=1) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) estimator.model_dir = export_dir_base estimator.export_saved_model.return_value = "export_result_path" export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 100}, False) self.assertEqual("export_result_path", export_result) export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 10}, False) self.assertEqual("export_result_path", export_result) def test_the_first_export(self): def _serving_input_receiver_fn(): pass export_dir_base = tempfile.mkdtemp() tf.compat.v1.gfile.MkDir(export_dir_base) tf.compat.v1.gfile.MkDir(export_dir_base + "/export") tf.compat.v1.gfile.MkDir(export_dir_base + "/eval") exporter = exporter_lib.BestExporter( name="best_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, event_file_pattern="eval_continuous/*.tfevents.*", assets_extra={"from/path": "to/path"}, as_text=False, exports_to_keep=1) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) estimator.model_dir = export_dir_base estimator.export_saved_model.return_value = "export_result_path" # Note that evaluation occurs before export with context.graph_mode(): eval_dir_base = os.path.join(export_dir_base, "eval_continuous") first_evaluation_results = {"loss": 60} estimator_lib._write_dict_to_summary(eval_dir_base, first_evaluation_results, 1) # export the model with the same results computed in the first evaluation export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", first_evaluation_results, False) self.assertEqual("export_result_path", export_result) def test_garbage_collect_exports(self): export_dir_base = tempfile.mkdtemp() tf.compat.v1.gfile.MkDir(export_dir_base) tf.compat.v1.gfile.MkDir(export_dir_base + "/export") tf.compat.v1.gfile.MkDir(export_dir_base + "/eval") export_dir_1 = _create_test_export_dir(export_dir_base) export_dir_2 = _create_test_export_dir(export_dir_base) export_dir_3 = _create_test_export_dir(export_dir_base) export_dir_4 = _create_test_export_dir(export_dir_base) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_1)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_2)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_3)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_4)) def _serving_input_receiver_fn(): return tf.constant([1]), None exporter = exporter_lib.BestExporter( name="best_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, exports_to_keep=2) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) estimator.model_dir = export_dir_base # Garbage collect all but the most recent 2 exports, # where recency is determined based on the timestamp directory names. exporter.export(estimator, export_dir_base, None, None, False) self.assertFalse(tf.compat.v1.gfile.Exists(export_dir_1)) self.assertFalse(tf.compat.v1.gfile.Exists(export_dir_2)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_3)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_4)) class LatestExporterTest(tf.test.TestCase): def test_error_out_if_exports_to_keep_is_zero(self): def _serving_input_receiver_fn(): pass with self.assertRaisesRegexp(ValueError, "positive number"): exporter = exporter_lib.LatestExporter( name="latest_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, exports_to_keep=0) self.assertEqual("latest_exporter", exporter.name) def test_latest_exporter(self): def _serving_input_receiver_fn(): pass export_dir_base = tempfile.mkdtemp() + "export/" tf.compat.v1.gfile.MkDir(export_dir_base) exporter = exporter_lib.LatestExporter( name="latest_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, assets_extra={"from/path": "to/path"}, as_text=False, exports_to_keep=5) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) estimator.export_saved_model.return_value = "export_result_path" export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {}, False) self.assertEqual("export_result_path", export_result) estimator.export_saved_model.assert_called_with( export_dir_base, _serving_input_receiver_fn, assets_extra={"from/path": "to/path"}, as_text=False, checkpoint_path="checkpoint_path") def test_only_the_last_export_is_saved(self): def _serving_input_receiver_fn(): pass export_dir_base = tempfile.mkdtemp() + "export/" tf.compat.v1.gfile.MkDir(export_dir_base) exporter = exporter_lib.FinalExporter( name="latest_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, assets_extra={"from/path": "to/path"}, as_text=False) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) estimator.export_saved_model.return_value = "export_result_path" export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {}, False) self.assertFalse(estimator.export_saved_model.called) self.assertEqual(None, export_result) export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {}, True) self.assertEqual("export_result_path", export_result) estimator.export_saved_model.assert_called_with( export_dir_base, _serving_input_receiver_fn, assets_extra={"from/path": "to/path"}, as_text=False, checkpoint_path="checkpoint_path") def test_garbage_collect_exports(self): export_dir_base = tempfile.mkdtemp() + "export/" tf.compat.v1.gfile.MkDir(export_dir_base) export_dir_1 = _create_test_export_dir(export_dir_base) export_dir_2 = _create_test_export_dir(export_dir_base) export_dir_3 = _create_test_export_dir(export_dir_base) export_dir_4 = _create_test_export_dir(export_dir_base) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_1)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_2)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_3)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_4)) def _serving_input_receiver_fn(): return tf.constant([1]), None exporter = exporter_lib.LatestExporter( name="latest_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, exports_to_keep=2) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) # Garbage collect all but the most recent 2 exports, # where recency is determined based on the timestamp directory names. exporter.export(estimator, export_dir_base, None, None, False) self.assertFalse(tf.compat.v1.gfile.Exists(export_dir_1)) self.assertFalse(tf.compat.v1.gfile.Exists(export_dir_2)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_3)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_4)) def test_garbage_collect_exports_with_trailing_delimiter(self): export_dir_base = tempfile.mkdtemp() + "export/" tf.compat.v1.gfile.MkDir(export_dir_base) export_dir_1 = _create_test_export_dir(export_dir_base) export_dir_2 = _create_test_export_dir(export_dir_base) export_dir_3 = _create_test_export_dir(export_dir_base) export_dir_4 = _create_test_export_dir(export_dir_base) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_1)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_2)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_3)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_4)) def _serving_input_receiver_fn(): return tf.constant([1]), None exporter = exporter_lib.LatestExporter( name="latest_exporter", serving_input_receiver_fn=_serving_input_receiver_fn, exports_to_keep=1) estimator = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) # Garbage collect all but the most recent 2 exports, # where recency is determined based on the timestamp directory names. with tf.compat.v1.test.mock.patch.object( gfile, "ListDirectory") as mock_list_directory: mock_list_directory.return_value = [ os.path.basename(export_dir_1) + b"/", os.path.basename(export_dir_2) + b"/", os.path.basename(export_dir_3) + b"/", os.path.basename(export_dir_4) + b"/", ] exporter.export(estimator, export_dir_base, None, None, False) self.assertFalse(tf.compat.v1.gfile.Exists(export_dir_1)) self.assertFalse(tf.compat.v1.gfile.Exists(export_dir_2)) self.assertFalse(tf.compat.v1.gfile.Exists(export_dir_3)) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir_4)) def _create_test_export_dir(export_dir_base): export_dir = _get_timestamped_export_dir(export_dir_base) tf.compat.v1.gfile.MkDir(export_dir) time.sleep(2) return export_dir def _get_timestamped_export_dir(export_dir_base): # When we create a timestamped directory, there is a small chance that the # directory already exists because another worker is also writing exports. # In this case we just wait one second to get a new timestamp and try again. # If this fails several times in a row, then something is seriously wrong. max_directory_creation_attempts = 10 attempts = 0 while attempts < max_directory_creation_attempts: export_timestamp = int(time.time()) export_dir = os.path.join( tf.compat.as_bytes(export_dir_base), tf.compat.as_bytes(str(export_timestamp))) if not tf.compat.v1.gfile.Exists(export_dir): # Collisions are still possible (though extremely unlikely): this # directory is not actually created yet, but it will be almost # instantly on return from this function. return export_dir time.sleep(1) attempts += 1 tf.compat.v1.logging.warn( "Export directory {} already exists; retrying (attempt {}/{})".format( export_dir, attempts, max_directory_creation_attempts)) raise RuntimeError("Failed to obtain a unique export directory name after " "{} attempts.".format(max_directory_creation_attempts)) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/extenders.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Extenders of tf.estimator.Estimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.util import function_utils from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.mode_keys import ModeKeys _VALID_METRIC_FN_ARGS = set(['features', 'labels', 'predictions', 'config']) @estimator_export('estimator.add_metrics') def add_metrics(estimator, metric_fn): """Creates a new `tf.estimator.Estimator` which has given metrics. Example: ```python def my_auc(labels, predictions): auc_metric = tf_keras.metrics.AUC(name="my_auc") auc_metric.update_state(y_true=labels, y_pred=predictions['logistic']) return {'auc': auc_metric} estimator = tf.estimator.DNNClassifier(...) estimator = tf.estimator.add_metrics(estimator, my_auc) estimator.train(...) estimator.evaluate(...) ``` Example usage of custom metric which uses features: ```python def my_auc(labels, predictions, features): auc_metric = tf_keras.metrics.AUC(name="my_auc") auc_metric.update_state(y_true=labels, y_pred=predictions['logistic'], sample_weight=features['weight']) return {'auc': auc_metric} estimator = tf.estimator.DNNClassifier(...) estimator = tf.estimator.add_metrics(estimator, my_auc) estimator.train(...) estimator.evaluate(...) ``` Args: estimator: A `tf.estimator.Estimator` object. metric_fn: A function which should obey the following signature: - Args: can only have following four arguments in any order: * predictions: Predictions `Tensor` or dict of `Tensor` created by given `estimator`. * features: Input `dict` of `Tensor` objects created by `input_fn` which is given to `estimator.evaluate` as an argument. * labels: Labels `Tensor` or dict of `Tensor` created by `input_fn` which is given to `estimator.evaluate` as an argument. * config: config attribute of the `estimator`. - Returns: Dict of metric results keyed by name. Final metrics are a union of this and `estimator's` existing metrics. If there is a name conflict between this and `estimator`s existing metrics, this will override the existing one. The values of the dict are the results of calling a metric function, namely a `(metric_tensor, update_op)` tuple. Returns: A new `tf.estimator.Estimator` which has a union of original metrics with given ones. """ _verify_metric_fn_args(metric_fn) def new_model_fn(features, labels, mode, config): spec = estimator.model_fn(features, labels, mode, config) if mode != ModeKeys.EVAL: return spec new_metrics = _call_metric_fn(metric_fn, features, labels, spec.predictions, config) all_metrics = spec.eval_metric_ops or {} all_metrics.update(new_metrics) return spec._replace(eval_metric_ops=all_metrics) return estimator_lib.Estimator( model_fn=new_model_fn, model_dir=estimator.model_dir, config=estimator.config, # pylint: disable=protected-access warm_start_from=estimator._warm_start_settings) # pylint: enable=protected-access def _verify_metric_fn_args(metric_fn): args = set(function_utils.fn_args(metric_fn)) invalid_args = list(args - _VALID_METRIC_FN_ARGS) if invalid_args: raise ValueError('metric_fn (%s) has following not expected args: %s' % (metric_fn, invalid_args)) def _call_metric_fn(metric_fn, features, labels, predictions, config): """Calls metric fn with proper arguments.""" metric_fn_args = function_utils.fn_args(metric_fn) kwargs = {} if 'features' in metric_fn_args: kwargs['features'] = features if 'labels' in metric_fn_args: kwargs['labels'] = labels if 'predictions' in metric_fn_args: kwargs['predictions'] = predictions if 'config' in metric_fn_args: kwargs['config'] = config return metric_fn(**kwargs) ================================================ FILE: tensorflow_estimator/python/estimator/extenders_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """extenders tests.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow_estimator.python.estimator import extenders from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator import run_config from tensorflow_estimator.python.estimator.canned import linear def get_input_fn(x, y): def input_fn(): dataset = tf.compat.v1.data.Dataset.from_tensor_slices({'x': x, 'y': y}) iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) features = iterator.get_next() labels = features.pop('y') return features, labels return input_fn class AddMetricsTest(tf.test.TestCase): def test_should_add_metrics(self): def _test_metric_fn(metric_fn): input_fn = get_input_fn( x=np.arange(4)[:, None, None], y=np.ones(4)[:, None]) config = run_config.RunConfig(log_step_count_steps=1) estimator = linear.LinearClassifierV2( [tf.feature_column.numeric_column('x')], config=config) estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertIn('mean_x', metrics) self.assertEqual(1.5, metrics['mean_x']) # assert that it keeps original estimators metrics self.assertIn('auc', metrics) def metric_fn(features): metric = tf_keras.metrics.Mean() metric.update_state(features['x']) return {'mean_x': metric} _test_metric_fn(metric_fn) def test_should_error_out_for_not_recognized_args(self): estimator = linear.LinearClassifierV2( [tf.feature_column.numeric_column('x')]) def metric_fn(features, not_recognized): _, _ = features, not_recognized return {} with self.assertRaisesRegexp(ValueError, 'not_recognized'): estimator = extenders.add_metrics(estimator, metric_fn) def test_all_supported_args(self): input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]]) estimator = linear.LinearClassifierV2( [tf.feature_column.numeric_column('x')]) def metric_fn(features, predictions, labels, config): self.assertIn('x', features) self.assertIsNotNone(labels) self.assertIn('logistic', predictions) self.assertTrue(isinstance(config, run_config.RunConfig)) return {} estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) estimator.evaluate(input_fn=input_fn) def test_all_supported_args_in_different_order(self): input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]]) estimator = linear.LinearClassifierV2( [tf.feature_column.numeric_column('x')]) def metric_fn(labels, config, features, predictions): self.assertIn('x', features) self.assertIsNotNone(labels) self.assertIn('logistic', predictions) self.assertTrue(isinstance(config, run_config.RunConfig)) return {} estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) estimator.evaluate(input_fn=input_fn) def test_all_args_are_optional(self): def _test_metric_fn(metric_fn): input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]]) estimator = linear.LinearClassifierV2( [tf.feature_column.numeric_column('x')]) estimator = extenders.add_metrics(estimator, metric_fn) estimator.train(input_fn=input_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertEqual(2., metrics['two']) def metric_fn(): metric = tf_keras.metrics.Mean() metric.update_state(tf.constant([2.])) return {'two': metric} _test_metric_fn(metric_fn) def test_overrides_existing_metrics(self): def _test_metric_fn(metric_fn): input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]]) estimator = linear.LinearClassifierV2( [tf.feature_column.numeric_column('x')]) estimator.train(input_fn=input_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertNotEqual(2., metrics['auc']) estimator = extenders.add_metrics(estimator, metric_fn) metrics = estimator.evaluate(input_fn=input_fn) self.assertEqual(2., metrics['auc']) def metric_fn(): metric = tf_keras.metrics.Mean() metric.update_state(tf.constant([2.])) return {'auc': metric} _test_metric_fn(metric_fn) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/gc.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== r"""System for specifying garbage collection (GC) of path based data. This framework allows for GC of data specified by path names, for example files on disk. gc.Path objects each represent a single item stored at a path and may be a base directory, /tmp/exports/0/... /tmp/exports/1/... ... or a fully qualified file, /tmp/train-1.ckpt /tmp/train-2.ckpt ... A gc filter function takes and returns a list of gc.Path items. Filter functions are responsible for selecting Path items for preservation or deletion. Note that functions should always return a sorted list. For example, base_dir = "/tmp" # Create the directories. for e in xrange(10): os.mkdir("%s/%d" % (base_dir, e), 0o755) # Create a simple parser that pulls the export_version from the directory. path_regex = "^" + re.escape(base_dir) + "/(\\d+)$" def parser(path): match = re.match(path_regex, path.path) if not match: return None return path._replace(export_version=int(match.group(1))) path_list = gc._get_paths("/tmp", parser) # contains all ten Paths every_fifth = gc._mod_export_version(5) print(every_fifth(path_list)) # shows ["/tmp/0", "/tmp/5"] largest_three = gc.largest_export_versions(3) print(largest_three(all_paths)) # shows ["/tmp/7", "/tmp/8", "/tmp/9"] both = gc._union(every_fifth, largest_three) print(both(all_paths)) # shows ["/tmp/0", "/tmp/5", # "/tmp/7", "/tmp/8", "/tmp/9"] # Delete everything not in 'both'. to_delete = gc._negation(both) for p in to_delete(all_paths): gfile.DeleteRecursively(p.path) # deletes: "/tmp/1", "/tmp/2", # "/tmp/3", "/tmp/4", "/tmp/6", """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import heapq import math import os import tensorflow as tf from tensorflow.python.platform import gfile Path = collections.namedtuple('Path', 'path export_version') def _largest_export_versions(n): """Creates a filter that keeps the largest n export versions. Args: n: number of versions to keep. Returns: A filter function that keeps the n largest paths. """ def keep(paths): heap = [] for idx, path in enumerate(paths): if path.export_version is not None: heapq.heappush(heap, (path.export_version, idx)) keepers = [paths[i] for _, i in heapq.nlargest(n, heap)] return sorted(keepers) return keep def _one_of_every_n_export_versions(n): """Creates a filter that keeps one of every n export versions. Args: n: interval size. Returns: A filter function that keeps exactly one path from each interval [0, n], (n, 2n], (2n, 3n], etc... If more than one path exists in an interval the largest is kept. """ def keep(paths): """A filter function that keeps exactly one out of every n paths.""" keeper_map = {} # map from interval to largest path seen in that interval for p in paths: if p.export_version is None: # Skip missing export_versions. continue # Find the interval (with a special case to map export_version = 0 to # interval 0. interval = math.floor( (p.export_version - 1) / n) if p.export_version else 0 existing = keeper_map.get(interval, None) if (not existing) or (existing.export_version < p.export_version): keeper_map[interval] = p return sorted(keeper_map.values()) return keep def _mod_export_version(n): """Creates a filter that keeps every export that is a multiple of n. Args: n: step size. Returns: A filter function that keeps paths where export_version % n == 0. """ def keep(paths): keepers = [] for p in paths: if p.export_version % n == 0: keepers.append(p) return sorted(keepers) return keep def _union(lf, rf): """Creates a filter that keeps the union of two filters. Args: lf: first filter rf: second filter Returns: A filter function that keeps the n largest paths. """ def keep(paths): l = set(lf(paths)) r = set(rf(paths)) return sorted(list(l | r)) return keep def _negation(f): """Negate a filter. Args: f: filter function to invert Returns: A filter function that returns the negation of f. """ def keep(paths): l = set(paths) r = set(f(paths)) return sorted(list(l - r)) return keep def _get_paths(base_dir, parser): """Gets a list of Paths in a given directory. Args: base_dir: directory. parser: a function which gets the raw Path and can augment it with information such as the export_version, or ignore the path by returning None. An example parser may extract the export version from a path such as "/tmp/exports/100" an another may extract from a full file name such as "/tmp/checkpoint-99.out". Returns: A list of Paths contained in the base directory with the parsing function applied. By default the following fields are populated, - Path.path The parsing function is responsible for populating, - Path.export_version """ # We are mocking this in the test, hence we should not use public API raw_paths = gfile.ListDirectory(base_dir) paths = [] for r in raw_paths: # ListDirectory() return paths with "/" at the last if base_dir was GCS URL r = tf.compat.as_str_any(r) if r[-1] == '/': r = r[0:len(r) - 1] p = parser(Path(os.path.join(tf.compat.as_str_any(base_dir), r), None)) if p: paths.append(p) return sorted(paths) ================================================ FILE: tensorflow_estimator/python/estimator/gc_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for garbage collection utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import re from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf from tensorflow.python.platform import gfile from tensorflow_estimator.python.estimator import gc def _create_parser(base_dir): # create a simple parser that pulls the export_version from the directory. def parser(path): # Modify the path object for RegEx match for Windows Paths if os.name == "nt": match = re.match( "^" + tf.compat.as_str_any(base_dir).replace("\\", "/") + "/(\\d+)$", tf.compat.as_str_any(path.path).replace("\\", "/")) else: match = re.match("^" + tf.compat.as_str_any(base_dir) + "/(\\d+)$", tf.compat.as_str_any(path.path)) if not match: return None return path._replace(export_version=int(match.group(1))) return parser class GcTest(tf.test.TestCase): def testLargestExportVersions(self): paths = [gc.Path("/foo", 8), gc.Path("/foo", 9), gc.Path("/foo", 10)] newest = gc._largest_export_versions(2) n = newest(paths) self.assertEqual(n, [gc.Path("/foo", 9), gc.Path("/foo", 10)]) def testLargestExportVersionsDoesNotDeleteZeroFolder(self): paths = [gc.Path("/foo", 0), gc.Path("/foo", 3)] newest = gc._largest_export_versions(2) n = newest(paths) self.assertEqual(n, [gc.Path("/foo", 0), gc.Path("/foo", 3)]) def testModExportVersion(self): paths = [ gc.Path("/foo", 4), gc.Path("/foo", 5), gc.Path("/foo", 6), gc.Path("/foo", 9) ] mod = gc._mod_export_version(2) self.assertEqual(mod(paths), [gc.Path("/foo", 4), gc.Path("/foo", 6)]) mod = gc._mod_export_version(3) self.assertEqual(mod(paths), [gc.Path("/foo", 6), gc.Path("/foo", 9)]) def testOneOfEveryNExportVersions(self): paths = [ gc.Path("/foo", 0), gc.Path("/foo", 1), gc.Path("/foo", 3), gc.Path("/foo", 5), gc.Path("/foo", 6), gc.Path("/foo", 7), gc.Path("/foo", 8), gc.Path("/foo", 33) ] one_of = gc._one_of_every_n_export_versions(3) self.assertEqual( one_of(paths), [ gc.Path("/foo", 3), gc.Path("/foo", 6), gc.Path("/foo", 8), gc.Path("/foo", 33) ]) def testOneOfEveryNExportVersionsZero(self): # Zero is a special case since it gets rolled into the first interval. # Test that here. paths = [gc.Path("/foo", 0), gc.Path("/foo", 4), gc.Path("/foo", 5)] one_of = gc._one_of_every_n_export_versions(3) self.assertEqual(one_of(paths), [gc.Path("/foo", 0), gc.Path("/foo", 5)]) def testUnion(self): paths = [] for i in xrange(10): paths.append(gc.Path("/foo", i)) f = gc._union(gc._largest_export_versions(3), gc._mod_export_version(3)) self.assertEqual( f(paths), [ gc.Path("/foo", 0), gc.Path("/foo", 3), gc.Path("/foo", 6), gc.Path("/foo", 7), gc.Path("/foo", 8), gc.Path("/foo", 9) ]) def testNegation(self): paths = [ gc.Path("/foo", 4), gc.Path("/foo", 5), gc.Path("/foo", 6), gc.Path("/foo", 9) ] mod = gc._negation(gc._mod_export_version(2)) self.assertEqual(mod(paths), [gc.Path("/foo", 5), gc.Path("/foo", 9)]) mod = gc._negation(gc._mod_export_version(3)) self.assertEqual(mod(paths), [gc.Path("/foo", 4), gc.Path("/foo", 5)]) def testPathsWithParse(self): base_dir = os.path.join(tf.compat.v1.test.get_temp_dir(), "paths_parse") self.assertFalse(tf.compat.v1.gfile.Exists(base_dir)) for p in xrange(3): tf.compat.v1.gfile.MakeDirs(os.path.join(base_dir, "%d" % p)) # add a base_directory to ignore tf.compat.v1.gfile.MakeDirs(os.path.join(base_dir, "ignore")) self.assertEqual( gc._get_paths(base_dir, _create_parser(base_dir)), [ gc.Path(os.path.join(base_dir, "0"), 0), gc.Path(os.path.join(base_dir, "1"), 1), gc.Path(os.path.join(base_dir, "2"), 2) ]) tf.compat.v1.gfile.DeleteRecursively(base_dir) def testMixedStrTypes(self): temp_dir = tf.compat.as_bytes(tf.compat.v1.test.get_temp_dir()) for sub_dir in ["str", b"bytes", u"unicode"]: base_dir = os.path.join( (temp_dir if isinstance(sub_dir, bytes) else temp_dir.decode()), sub_dir) self.assertFalse(tf.compat.v1.gfile.Exists(base_dir)) tf.compat.v1.gfile.MakeDirs( os.path.join(tf.compat.as_str_any(base_dir), "42")) gc._get_paths(base_dir, _create_parser(base_dir)) tf.compat.v1.gfile.DeleteRecursively(base_dir) def testGcsDirWithSeparator(self): base_dir = "gs://bucket/foo" with tf.compat.v1.test.mock.patch.object( gfile, "ListDirectory") as mock_list_directory: # gfile.ListDirectory returns directory names with separator '/' mock_list_directory.return_value = ["0/", "1/"] self.assertEqual( gc._get_paths(base_dir, _create_parser(base_dir)), [ gc.Path(os.path.join(base_dir, "0"), 0), gc.Path(os.path.join(base_dir, "1"), 1) ]) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/head/__init__.py ================================================ ================================================ FILE: tensorflow_estimator/python/estimator/head/base_head.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Abstractions for the base head class.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import six import tensorflow as tf from tensorflow.python.feature_column import feature_column_lib from tensorflow.python.feature_column.feature_column import _LazyBuilder from tensorflow.python.feature_column.feature_column import _NumericColumn from tensorflow.python.framework import ops from tensorflow.python.util import function_utils from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_output DEFAULT_SERVING_KEY = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY # The above default is defined by TF Serving, but these next three are just # a local convention without any special meaning. CLASSIFY_SERVING_KEY = 'classification' REGRESS_SERVING_KEY = 'regression' PREDICT_SERVING_KEY = 'predict' @estimator_export('estimator.Head') @six.add_metaclass(abc.ABCMeta) class Head(object): """Interface for the head/top of a model. Head sits on top of the model network and handles computing the outputs of the network. Given logits (or output of a hidden layer), a Head knows how to compute predictions, loss, train_op, metrics and export outputs. It is meant to: 1. Simplify writing model_fn and to make model_fn more configurable for Estimator. 2. Simpilfy creating loss and metrics for the train and test loop in Eager execution. 3. Support wide range of machine learning models. Since most heads can work with logits, they can support DNN, RNN, Wide, Wide&Deep, Global objectives, Gradient boosted trees and many other types of machine learning models. Common usage: Here is simplified model_fn to build a DNN regression model. ```python def _my_dnn_model_fn(features, labels, mode, params, config=None): # Optionally your callers can pass head to model_fn as a param. head = tf.estimator.RegressionHead(...) feature_columns = tf.feature_column.numeric_column(...) feature_layer = tf_keras.layers.DenseFeatures(feature_columns) inputs = feature_layer(features) # Compute logits with tf_keras.layers API hidden_layer0 = tf_keras.layers.Dense( units=1000, activation="relu")(inputs) hidden_layer1 = tf_keras.layers.Dense( units=500, activation="relu")(hidden_layer0) logits = tf_keras.layers.Dense( units=head.logits_dimension, activation=None)(hidden_layer1) # Or use Keras model for logits computation model = tf_keras.Sequential() model.add(tf_keras.layers.Dense(units=1000, activation="relu")) model.add(tf_keras.layers.Dense(units=500, activation="relu")) model.add(tf_keras.layers.Dense( units=head.logits_dimension, activation=None)) logits = model(inputs) return head.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ``` """ @abc.abstractproperty def name(self): """The name of this head. Returns: A string. """ raise NotImplementedError('Calling an abstract method.') @abc.abstractproperty def logits_dimension(self): """Size of the last dimension of the logits `Tensor`. Often is the number of classes, labels, or real values to be predicted. Typically, logits is of shape `[batch_size, logits_dimension]`. Returns: The expected size of the `logits` tensor. """ raise NotImplementedError('Calling an abstract method.') @abc.abstractproperty def loss_reduction(self): """One of `tf.losses.Reduction`. Describes how to reduce training loss over batch, such as mean or sum. Returns: The type of loss reduction used in the head. """ raise NotImplementedError('Calling an abstract method.') @abc.abstractmethod def loss(self, labels, logits, features=None, mode=None, regularization_losses=None): """Returns a loss `Tensor` from provided arguments. Note that, the args of `features` and `mode` are most likely not used, but some Head implementations may require them. Args: labels: Labels `Tensor`, or `dict` mapping string label names to `Tensor` objects of the label values. logits: Logits `Tensor` to be used for loss construction. features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. Often to be used to fetch example-weight tensor. mode: Estimator's `ModeKeys`. To be used in case loss calculation is different in Train and Eval mode. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. Returns: A scalar `Tensor` representing regularized training loss used in train and eval. """ raise NotImplementedError('Calling an abstract method.') @abc.abstractmethod def predictions(self, logits, keys=None): """Returns a `dict` of predictions from provided logits. Args: logits: Logits `Tensor` to be used for prediction construction. keys: A list of `string` for prediction keys. Defaults to `None`, meaning if not specified, predictions will be created for all the pre-defined valid keys in the head. Returns: A `dict` of predicted `Tensor` keyed by prediction name. """ raise NotImplementedError('Calling an abstract method.') @abc.abstractmethod def metrics(self, regularization_losses=None): """Returns a `dict` of metric objects. Args: regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. Returns: A `dict` of metrics keyed by string name. The value is an instance of `Metric` class. """ raise NotImplementedError('Calling an abstract method.') @abc.abstractmethod def update_metrics(self, eval_metrics, features, logits, labels, mode=None, regularization_losses=None): """Updates metric objects and returns a `dict` of the updated metrics. Args: eval_metrics: A `dict` of metrics to be updated. features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. Often to be used to fetch example-weight tensor. logits: logits `Tensor` to be used for metrics update. labels: Labels `Tensor`, or `dict` mapping string label names to `Tensor` objects of the label values. mode: Estimator's `ModeKeys`. In most cases, this arg is not used and can be removed in the method implementation. regularization_losses: A list of additional scalar losses to be added to the training and evaluation loss, such as regularization losses. Note that, the `mode` arg is not used in the `tf.estimator.*Head`. If the update of the metrics doesn't rely on `mode`, it can be safely ignored in the method signature. Returns: A `dict` of updated metrics keyed by name. The value is an instance of `Metric` class. """ raise NotImplementedError('Calling an abstract method.') def _summary_key(self, key): return '{}/{}'.format(key, self.name) if self.name else key def create_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): """Returns `EstimatorSpec` that a model_fn can return. It is recommended to pass all args via name. Args: features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. Often to be used to fetch example-weight tensor. mode: Estimator's `ModeKeys`. logits: Logits `Tensor` to be used by the head. labels: Labels `Tensor`, or `dict` mapping string label names to `Tensor` objects of the label values. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns an op to optimize the model with the loss in TRAIN mode. Used if `optimizer` is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in TRAIN mode. By default, it is `None` in other modes. If you want to optimize loss yourself, you can pass `lambda _: tf.no_op()` and then use `EstimatorSpec.loss` to compute and apply gradients. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. Returns: `EstimatorSpec`. """ # Not all subclasses of Head will have implemented # _create_tpu_estimator_spec. If it is implemented, we can convert it to # the normal `EstimatorSpec` by calling the method of # `_TPUEstimatorSpec.as_estimator_spec()`. try: tpu_estimator_spec = ( self._create_tpu_estimator_spec( features=features, mode=mode, logits=logits, labels=labels, optimizer=optimizer, trainable_variables=trainable_variables, train_op_fn=train_op_fn, update_ops=update_ops, regularization_losses=regularization_losses)) return tpu_estimator_spec.as_estimator_spec() except NotImplementedError: raise NotImplementedError( 'Subclasses of Head must implement `create_estimator_spec()` or ' '_create_tpu_estimator_spec().') def _create_tpu_estimator_spec( self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None, ): """Returns `model_fn._TPUEstimatorSpec` that a model_fn can return. Args: features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. Often to be used to fetch example-weight tensor. mode: Estimator's `ModeKeys`. logits: Logits `Tensor` to be used by the head. labels: Labels `Tensor`, or `dict` mapping string label names to `Tensor` objects of the label values. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns an op to optimize the model with the loss in TRAIN mode. Used if `optimizer` is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in TRAIN mode. By default, it is `None` in other modes. If you want to optimize loss yourself, you can pass `lambda _: tf.no_op()` and then use `EstimatorSpec.loss` to compute and apply gradients. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. Returns: A `model_fn._TPUEstimatorSpec' instance. """ raise NotImplementedError( 'TPUEstimatorSpec not available for this model head.') # TODO(b/119617064): unify eager and graph implementations # Note that, tensor shape checking is slow in Eager mode. To amend it, the # tensor static shape is used for checking. The duplication of shape checking # for eager mode in the following helper functions can be safely removed # if there's some way to get around it in the future. # Label shape error messages. _LABEL_NONE_ERR_MSG = ( 'You must provide a labels Tensor. Given: None. ' 'Suggested troubleshooting steps: Check that your data contains your label ' 'feature. Check that your input_fn properly parses and returns labels.') _SPARSE_LABEL_ERR_MSG = ( 'SparseTensor labels are not supported. Labels must be a Tensor of shape ' '[D0, D1, ..., DN, {}], e.g. [batch_size, {}].Suggested Fix (1): Check the' ' label feature in your data. Each example must contain {} value(s). If ' 'not, your choice of label was probably incorrect. Suggested Fix (2): In ' 'your input_fn, use tf.sparse_tensor_to_dense() to turn labels into a ' 'Tensor.') _MISMATCHED_LABEL_DIM_ERR_MSG = ( 'Mismatched label shape. Expected labels dimension={}. Received {}. ' 'Suggested Fix: If your classifier expects one-hot encoding label, check ' 'your n_classes argument to the estimator and/or the shape of your label. ' 'Otherwise, check the shape of your label.') _LABEL_SHAPE_ERR_MSG = ( 'labels shape must be [D0, D1, ... DN, {}]. Suggested Fix: check your ' 'n_classes argument to the head and/or the shape of your label.') _VALIDATION_ERROR_MSG = '{} should be a list or a tuple. Given type: {}.' def check_dense_labels_match_logits_and_reshape(labels, logits, expected_labels_dimension): """Checks labels shape matches logits, and reshapes if needed. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Then labels shape must be [D0, D1, ... DN, expected_labels_dimension]. If expected_labels_dimension=1, labels could be [D0, D1, ... DN] and this method reshapes them to [D0, D1, ... DN, 1]. Args: labels: labels Tensor. logits: logits Tensor. expected_labels_dimension: Integer. Returns: Validated and reshaped labels Tensor. Raises: ValueError: If labels is a SparseTensor. ValueError: If labels shape is statically defined and fails validation. OpError: If labels shape is not statically defined and fails validation. """ if labels is None: raise ValueError(_LABEL_NONE_ERR_MSG) with ops.name_scope('labels', values=(labels, logits)) as scope: labels = tf.compat.v1.convert_to_tensor_or_sparse_tensor(labels) if isinstance(labels, tf.sparse.SparseTensor): raise ValueError( _SPARSE_LABEL_ERR_MSG.format(expected_labels_dimension, expected_labels_dimension, expected_labels_dimension)) # Eager mode. if tf.executing_eagerly(): labels_rank = labels._rank() # pylint: disable=protected-access logits_rank = logits._rank() # pylint: disable=protected-access if (labels_rank is not None and logits_rank is not None and labels_rank == logits_rank - 1): labels = tf.compat.v1.expand_dims(labels, -1) labels_rank += 1 labels_shape = labels._shape_tuple() # pylint: disable=protected-access if labels_rank < 2: raise ValueError('labels must have rank at least 2. Received rank {}, ' 'shape {}'.format(labels_rank, labels_shape)) if labels_shape[-1] != expected_labels_dimension: raise ValueError( _MISMATCHED_LABEL_DIM_ERR_MSG.format(expected_labels_dimension, labels_shape[-1])) logits_shape = logits._shape_tuple() # pylint: disable=protected-access expected_labels_shape = logits_shape[:-1] + (expected_labels_dimension,) if expected_labels_shape != labels_shape: raise ValueError( '{}, expected_labels_shape: {}. labels_shape: {}.'.format( _LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension), expected_labels_shape, labels_shape)) return labels # Graph mode. if (labels.shape.ndims is not None and logits.shape.ndims is not None and labels.shape.ndims == logits.shape.ndims - 1): labels = tf.compat.v1.expand_dims(labels, -1) assert_rank = tf.compat.v1.debugging.assert_rank_at_least( labels, 2, message=_LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension)) with tf.control_dependencies([assert_rank]): static_shape = labels.shape if static_shape.ndims is not None: final_dim = static_shape[-1] if (final_dim is not None) and (final_dim != expected_labels_dimension): raise ValueError( _MISMATCHED_LABEL_DIM_ERR_MSG.format(expected_labels_dimension, final_dim)) logits_shape = tf.compat.v1.shape(logits) expected_labels_shape = tf.concat( [logits_shape[:-1], [expected_labels_dimension]], axis=0) labels_shape = tf.compat.v1.shape(labels) assert_dimension = tf.compat.v1.debugging.assert_equal( expected_labels_shape, labels_shape, message=_LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension), data=[ 'expected_labels_shape: ', expected_labels_shape, 'labels_shape: ', labels_shape ]) with tf.control_dependencies([assert_dimension]): return tf.identity(labels, name=scope) def get_weights_and_check_match_logits(features, weight_column, logits, allow_per_logit_weights=False): """Fetches weights from features and checks that the shape matches logits. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape can be either: * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`. * [D0, D1, ... DN, 1] * [D0, D1, ... DN]: In this case, weights is reshaped into [D0, D1, ... DN, 1] to work with weight broadcasting rules. Args: features: The features dict that contains weights. weight_column: The weight column. If not given, this method returns 1. logits: logits Tensor. allow_per_logit_weights: Boolean. Whether we allow weights along the logits dimension, namely shape `[D0, D1, ... DN, logits_dimension]`. Returns: Validated and reshaped weights Tensor. Raises: ValueError: If the weights `Tensor` cannot be cast into float. """ if allow_per_logit_weights: err_msg = ('weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' '[D0, D1, ... DN, logits_dimension]') else: err_msg = ('weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]') with ops.name_scope( 'weights', values=tuple(six.itervalues(features)) + (logits,)) as scope: # Fetch the weights. if weight_column is None: return 1. # TODO(b/117839674): update feature_column if isinstance(weight_column, six.string_types): weight_column = tf.feature_column.numeric_column( key=weight_column, shape=(1,)) if not isinstance(weight_column, (feature_column_lib.NumericColumn, _NumericColumn)): raise TypeError('Weight column must be either a string or NumericColumn.' ' Given type: {}.'.format(type(weight_column))) weights = weight_column._get_dense_tensor( # pylint: disable=protected-access _LazyBuilder(features)) if not (weights.dtype.is_floating or weights.dtype.is_integer): raise ValueError('Weight column should be castable to float. ' 'Given dtype: {}'.format(weights.dtype)) weights = tf.cast(weights, name='weights', dtype=tf.dtypes.float32) # Validate the weights shape. # Eager mode. if tf.executing_eagerly(): weights_shape = weights._shape_tuple() # pylint: disable=protected-access logits_shape = logits._shape_tuple() # pylint: disable=protected-access weights_rank = weights._rank() # pylint: disable=protected-access logits_rank = logits._rank() # pylint: disable=protected-access if (weights_rank is not None and logits_rank is not None and weights_rank == logits_rank - 1): if logits_shape[:-1] != weights_shape: raise ValueError('{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) return tf.compat.v1.expand_dims(weights, -1, name=scope) supported_weights_shape = logits_shape[:-1] + (1,) if allow_per_logit_weights: if (logits_shape != weights_shape and supported_weights_shape != weights_shape): raise ValueError('{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) else: if supported_weights_shape != weights_shape: raise ValueError('{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) return weights # Graph mode. weights_shape = tf.compat.v1.shape(weights, name='weights_shape') logits_shape = tf.compat.v1.shape(logits, name='logits_shape') if (weights.shape.ndims is not None and logits.shape.ndims is not None and weights.shape.ndims == logits.shape.ndims - 1): assert_dimension = tf.compat.v1.debugging.assert_equal( logits_shape[:-1], weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with tf.control_dependencies([assert_dimension]): return tf.compat.v1.expand_dims(weights, -1, name=scope) supported_weights_shape = tf.concat([logits_shape[:-1], [1]], axis=0) if allow_per_logit_weights: condition = tf.math.reduce_any([ tf.reduce_all(tf.math.equal(logits_shape, weights_shape)), tf.reduce_all(tf.math.equal(supported_weights_shape, weights_shape)) ]) assert_dimension = tf.debugging.Assert( condition=condition, data=[ err_msg, 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) else: assert_dimension = tf.compat.v1.debugging.assert_equal( supported_weights_shape, weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with tf.control_dependencies([assert_dimension]): return tf.identity(weights, name=scope) def check_logits_final_dim(logits, expected_logits_dimension): """Checks that logits shape is [D0, D1, ... DN, logits_dimension].""" with ops.name_scope('logits', values=(logits,)) as scope: logits = tf.cast(logits, tf.dtypes.float32) # Eager mode if tf.executing_eagerly(): logits_shape = logits._shape_tuple() # pylint: disable=protected-access logits_rank = logits._rank() # pylint: disable=protected-access if logits_rank < 2: raise ValueError('logits must have rank at least 2. Received rank {}, ' 'shape {}'.format(logits_rank, logits_shape)) if (isinstance(expected_logits_dimension, int) and logits_shape[-1] != expected_logits_dimension): raise ValueError( 'logits shape must be [D0, D1, ... DN, logits_dimension], ' 'got {}.'.format(logits_shape)) return logits # Graph mode logits_shape = tf.compat.v1.shape(logits) assert_rank = tf.compat.v1.debugging.assert_rank_at_least( logits, 2, data=[logits_shape], message='logits shape must be [D0, D1, ... DN, logits_dimension]') with tf.control_dependencies([assert_rank]): static_shape = logits.shape if static_shape.ndims is not None and static_shape[-1] is not None: if (isinstance(expected_logits_dimension, int) and static_shape[-1] != expected_logits_dimension): raise ValueError( 'logits shape must be [D0, D1, ... DN, logits_dimension], ' 'got {}.'.format(static_shape)) return logits assert_dimension = tf.compat.v1.debugging.assert_equal( expected_logits_dimension, logits_shape[-1], data=[logits_shape], message='logits shape must be [D0, D1, ... DN, logits_dimension]') with tf.control_dependencies([assert_dimension]): return tf.identity(logits, name=scope) def validate_loss_fn_args(loss_fn): """Validates loss_fn arguments. Required arguments: labels, logits. Optional arguments: features, loss_reduction. Args: loss_fn: The loss function. Raises: ValueError: If the signature is unexpected. """ loss_fn_args = function_utils.fn_args(loss_fn) for required_arg in ['labels', 'logits']: if required_arg not in loss_fn_args: raise ValueError('loss_fn must contain argument: {}. ' 'Given arguments: {}'.format(required_arg, loss_fn_args)) invalid_args = list( set(loss_fn_args) - set(['labels', 'logits', 'features', 'loss_reduction'])) if invalid_args: raise ValueError('loss_fn has unexpected args: {}'.format(invalid_args)) def validate_loss_reduction(loss_reduction): if (loss_reduction not in tf.losses.Reduction.all() or loss_reduction == tf.losses.Reduction.NONE): raise ValueError( 'Invalid loss_reduction: {}. See `tf.losses.Reduction` for valid ' 'options.'.format(loss_reduction)) def validate_update_ops(update_ops=None): if update_ops is not None and not isinstance(update_ops, (list, tuple)): raise ValueError( _VALIDATION_ERROR_MSG.format('update_ops', type(update_ops))) def validate_v2_optimizer(optimizer): if not isinstance( optimizer, (tf_keras.optimizers.Optimizer, tf_keras.optimizers.legacy.Optimizer)): raise ValueError( 'The given optimizer is not a tf_keras.optimizers.Optimizer ' f'instance. Received optimizer of type {type(optimizer)}') def validate_trainable_variables(trainable_variables=None): if trainable_variables is None: raise ValueError('trainable_variables cannot be None. Given {}'.format( trainable_variables)) if not isinstance(trainable_variables, (list, tuple)): raise ValueError( _VALIDATION_ERROR_MSG.format('trainable_variables', type(trainable_variables))) def validate_n_classes(n_classes): """Validates n_classes argument. Required arguments: n_classes. Args: n_classes: The number of classes. Raises: ValueError: If n_classes is <= 2 and n_classes is a Python integer. Returns: n_classes in its original type. """ if isinstance(n_classes, int) and (n_classes <= 2): raise ValueError('n_classes must be > 2: %s.' % n_classes) n_classes_as_tensor = ops.convert_to_tensor(n_classes) assert_n_classes = tf.compat.v1.debugging.assert_greater( n_classes_as_tensor, 2, message='n_classes must be greater than 2') with tf.control_dependencies([assert_n_classes]): tf.no_op() # Return n_classes in its original type, so that any code # using the accessor logits_dimension() has the original type. return n_classes def call_loss_fn(loss_fn, labels, logits, features, expected_loss_dim=1): """Calls loss_fn and checks the returned shape. For shape checking, eager uses the static dimension to improve performance. Args: loss_fn: The loss function. labels: Processed labels Tensor. logits: Logits Tensor of shape [D0, D1, ... DN, logits_dimension]. features: Features dict. expected_loss_dim: The expected last dimension of loss Tensor. Returns: Loss Tensor with shape [D0, D1, ... DN, expected_loss_dim]. Raises: ValueError: If the loss tensor shape is unexpected. """ loss_fn_args = function_utils.fn_args(loss_fn) kwargs = {} if 'features' in loss_fn_args: kwargs['features'] = features with ops.name_scope( 'call_loss_fn', values=[labels, logits] + list(six.itervalues(features))): unweighted_loss = loss_fn(labels=labels, logits=logits, **kwargs) # Eager mode. if tf.executing_eagerly(): loss_shape = unweighted_loss._shape_tuple() # pylint: disable=protected-access logits_shape = logits._shape_tuple() # pylint: disable=protected-access expected_loss_shape = logits_shape[:-1] + (expected_loss_dim,) if loss_shape != expected_loss_shape: raise ValueError( 'loss_fn must return Tensor of shape ' '[D0, D1, ... DN, {}]. '.format(expected_loss_dim), 'logits_shape: ', logits_shape, 'loss_shape: ', loss_shape) return unweighted_loss # Graph mode. logits_shape = tf.compat.v1.shape(logits, name='logits_shape') expected_loss_shape = tf.concat([logits_shape[:-1], [expected_loss_dim]], axis=0, name='expected_loss_shape') loss_shape = tf.compat.v1.shape(unweighted_loss, name='loss_shape') check_loss_shape_op = tf.debugging.Assert( tf.reduce_all(tf.math.equal(loss_shape, expected_loss_shape)), data=[ 'loss_fn must return Tensor of shape ' '[D0, D1, ... DN, {}]. '.format(expected_loss_dim), 'logits_shape: ', logits_shape, 'loss_shape: ', loss_shape ], name='check_loss_shape') with tf.control_dependencies([check_loss_shape_op]): return tf.identity(unweighted_loss) def check_prediction_keys(pred_keys, valid_keys): for key in pred_keys: if key not in valid_keys: raise ValueError('Prediction key must be in PredictionKeys, given: {}.' 'Valid prediction keys include {}.'.format( key, valid_keys)) def all_class_ids(logits, n_classes): batch_size = tf.compat.v1.shape(logits)[0] class_id_list = tf.range(n_classes) return tf.tile( input=tf.compat.v1.expand_dims(input=class_id_list, axis=0), multiples=[batch_size, 1]) def all_classes(logits, n_classes, label_vocabulary=None): batch_size = tf.compat.v1.shape(logits)[0] if label_vocabulary: classes_list = tf.convert_to_tensor([label_vocabulary]) else: classes_list = tf.expand_dims(tf.range(n_classes), axis=0) classes_list = tf.strings.as_string(classes_list) return tf.tile(input=classes_list, multiples=[batch_size, 1]) def classification_output(scores, n_classes, label_vocabulary=None): return export_output.ClassificationOutput( scores=scores, # `ClassificationOutput` requires string classes. classes=all_classes(scores, n_classes, label_vocabulary)) def check_label_range(labels, n_classes, message=None): """Check if labels are in the range of [0, n_classes).""" with ops.name_scope('check_label_range', values=(labels,)): # Eager mode if tf.executing_eagerly(): assert_less = tf.reduce_all(tf.math.less_equal(labels, n_classes - 1)) if not assert_less: raise ValueError(message or 'Labels must be <= {} - 1'.format(n_classes)) assert_greater = tf.reduce_all(tf.math.greater_equal(labels, 0)) if not assert_greater: raise ValueError(message or 'Labels must be >= 0') return labels # Graph mode assert_less = tf.compat.v1.debugging.assert_less_equal( labels, ops.convert_to_tensor(n_classes - 1, dtype=labels.dtype), message=message or 'Labels must be <= n_classes - 1') assert_greater = tf.compat.v1.debugging.assert_non_negative( labels, message=message or 'Labels must be >= 0') with tf.control_dependencies((assert_less, assert_greater)): return tf.identity(labels) def update_metric_with_broadcast_weights(eval_metric, values, weights): values = tf.cast(values, dtype=tf.dtypes.float32) if weights is not None: weights = tf.compat.v2.__internal__.ops.broadcast_weights(weights, values) eval_metric.update_state(values=values, sample_weight=weights) def create_eval_metrics_tuple(fn, kwargs): """Creates TPU eval metrics tuple. Helper function to make eval_metric tuple (eval_metric_fn, fn_kwargs) used by `TPUEstimator`. TPUEstimator requires that `eval_metric_fn` take exclusively Tensor arguments. This helper can help create such a function from a more generic function that can take both Tensor and non-Tensor arguments. Args: fn: A eval_metric_fn that takes both Tensor and non-Tensor arguments. This function must return a dict of form {'metric name': (metric_tensor, eval_op)} kwargs: Dict of arguments for `fn`. Returns: `eval_metric` tuple that can be passed to a `model_fn._TPUEstimatorSpec`. """ tensor_kwargs = {} nontensor_kwargs = {} for k, v in six.iteritems(kwargs): if tf.is_tensor(v): tensor_kwargs[k] = v else: nontensor_kwargs[k] = v def _fn(**tensors): return fn(**dict(nontensor_kwargs, **tensors)) return (_fn, tensor_kwargs) def create_estimator_spec_train_op( head_name, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularized_training_loss=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE): """Create train_op for estimator_spec. Args: head_name: The name of the head. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularized_training_loss: A scalar for total training loss that includes all regularization losses. If you're not using optimizer to generate train op, make sure to scale the loss correctly before passing it in. The loss typically needs to be scaled down by the number of workers. loss_reduction: One of `tf_keras.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: A train op for EstimatorSpec. """ del head_name validate_update_ops(update_ops) with ops.name_scope(''): # Reset all previous name_scope. # Add training as the name_scope to be compatible with Keras. with ops.name_scope('training'): if optimizer is not None: if train_op_fn is not None: raise ValueError('train_op_fn and optimizer cannot both be set.') validate_v2_optimizer(optimizer) validate_trainable_variables(trainable_variables) # Scale loss by number of replicas. if loss_reduction == tf.losses.Reduction.SUM_OVER_BATCH_SIZE: num_replicas = tf.distribute.get_strategy().num_replicas_in_sync if num_replicas > 1: regularized_training_loss *= (1. / num_replicas) train_op = optimizer.get_updates(regularized_training_loss, trainable_variables)[0] elif train_op_fn is not None: train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') if update_ops is not None: train_op = tf.group(train_op, *update_ops) return train_op def create_estimator_spec_summary(regularized_training_loss, regularization_losses=None, summary_key_fn=None): """Create summary for estimator_spec.""" with ops.name_scope(''): keys = metric_keys.MetricKeys loss_key = summary_key_fn(keys.LOSS) if summary_key_fn else keys.LOSS tf.compat.v1.summary.scalar(loss_key, regularized_training_loss) if regularization_losses is not None: regularization_loss = tf.math.add_n(regularization_losses) regularization_loss_key = ( summary_key_fn(keys.LOSS_REGULARIZATION) if summary_key_fn else keys.LOSS_REGULARIZATION) tf.compat.v1.summary.scalar(regularization_loss_key, regularization_loss) ================================================ FILE: tensorflow_estimator/python/estimator/head/base_head_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for base_head.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.head import base_head from tensorflow_estimator.python.estimator.head import binary_class_head as head_lib from tensorflow_estimator.python.estimator.head import head_utils as test_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys _DEFAULT_SERVING_KEY = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY def _assert_simple_summaries(test_case, expected_summaries, summary_str, tol=1e-6): """Assert summary the specified simple values. Args: test_case: test case. expected_summaries: Dict of expected tags and simple values. summary_str: Serialized `summary_pb2.Summary`. tol: Tolerance for relative and absolute. """ summary = tf.compat.v1.summary.Summary() summary.ParseFromString(summary_str) test_case.assertAllClose( expected_summaries, {v.tag: v.simple_value for v in summary.value}, rtol=tol, atol=tol) def _assert_no_hooks(test_case, spec): test_case.assertAllEqual([], spec.training_chief_hooks) test_case.assertAllEqual([], spec.training_hooks) @test_util.run_all_in_graph_and_eager_modes class CreateEstimatorSpecTest(tf.test.TestCase): class _HeadWithTPUSupport(base_head.Head): """Head that overrides _create_tpu_estimator_spec.""" def name(self): return 'HeadWithTPUSupport' def logits_dimension(self): return None def loss_reduction(self): return None def loss(self, features, mode, logits, labels): return None def predictions(self, logits): return None def metrics(self, regularization_losses=None): return None def update_metrics(self, eval_metrics, features, logits, labels, mode=None, regularization_losses=None): return None def _create_tpu_estimator_spec( self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None, ): return model_fn._TPUEstimatorSpec( mode=ModeKeys.EVAL, loss=tf.constant(0.0, dtype=tf.dtypes.float32)) class _HeadWithOutTPUSupport(base_head.Head): """Head that overrides create_estimator_spec.""" def name(self): return 'HeadWithOutTPUSupport' def logits_dimension(self): return None def loss_reduction(self): return None def loss(self, features, mode, logits, labels): return None def predictions(self, logits): return None def metrics(self, regularization_losses=None): return None def update_metrics(self, eval_metrics, features, logits, labels, mode=None, regularization_losses=None): return None def create_estimator_spec( self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None, ): return model_fn.EstimatorSpec( mode=ModeKeys.EVAL, loss=tf.constant(0.0, dtype=tf.dtypes.float32)) class _InvalidHead(base_head.Head): """Head that overrides neither estimator_spec functions.""" def name(self): return 'InvalidHead' def logits_dimension(self): return None def loss_reduction(self): return None def loss(self, features, mode, logits, labels): return None def predictions(self, logits): return None def metrics(self, regularization_losses=None): return None def update_metrics(self, eval_metrics, features, logits, labels, mode=None, regularization_losses=None): return None def test_head_override_tpu_estimator_spec(self): """Test for `_Head` that overrides _create_tpu_estimator_spec.""" head = self._HeadWithTPUSupport() tpu_spec = head._create_tpu_estimator_spec( features=None, mode=None, logits=None) self.assertTrue(isinstance(tpu_spec, model_fn._TPUEstimatorSpec)) est_spec = head.create_estimator_spec(features=None, mode=None, logits=None) self.assertTrue(isinstance(est_spec, model_fn.EstimatorSpec)) def test_head_override_estimator_spec(self): """Test for `Head` that overrides create_estimator_spec.""" head = self._HeadWithOutTPUSupport() with self.assertRaisesRegexp( NotImplementedError, 'TPUEstimatorSpec not available for this model head.'): _ = head._create_tpu_estimator_spec(features=None, mode=None, logits=None) est_spec = head.create_estimator_spec(features=None, mode=None, logits=None) self.assertTrue(isinstance(est_spec, model_fn.EstimatorSpec)) def test_invalid_head_class(self): head = self._InvalidHead() with self.assertRaisesRegexp( NotImplementedError, 'TPUEstimatorSpec not available for this model head.'): _ = head._create_tpu_estimator_spec(features=None, mode=None, logits=None) with self.assertRaisesRegexp( NotImplementedError, r'Subclasses of Head must implement `create_estimator_spec\(\)` or ' r'_create_tpu_estimator_spec\(\).'): _ = head.create_estimator_spec(features=None, mode=None, logits=None) @test_util.deprecated_graph_mode_only def test_tensor_shape_checking_in_graph_mode(self): """Test for shape checking of tensor with partially defined shape.""" labels_placeholder = tf.compat.v1.placeholder( dtype=tf.dtypes.float32, shape=(None, 1)) logits_placeholder = tf.compat.v1.placeholder( dtype=tf.dtypes.float32, shape=(None, 1)) labels_input = np.array([[-10.], [10.]], dtype=np.float32) logits_input = np.array([[1.], [0.]], dtype=np.float32) loss = np.array([[1.], [2.]], dtype=np.float32) def _loss_fn(labels, logits): check_labels = tf.debugging.Assert( tf.reduce_all(tf.math.equal(labels, labels_input)), data=[labels]) check_logits = tf.debugging.Assert( tf.reduce_all(tf.math.equal(logits, logits_input)), data=[logits]) with tf.control_dependencies([check_labels, check_logits]): return tf.constant(loss) unweighted_loss = base_head.call_loss_fn( loss_fn=_loss_fn, labels=labels_placeholder, logits=logits_placeholder, features={'x': np.array(((42,),), dtype=np.int32)}) with self.cached_session(): self.assertAllClose( unweighted_loss.eval({ labels_placeholder: labels_input, logits_placeholder: logits_input }), loss) @test_util.deprecated_graph_mode_only def test_optimizer_v2_variable_name(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} class _Optimizer(tf_keras.optimizers.legacy.Optimizer): def init(self, name, **kwargs): super(_Optimizer, self).__init__(name, **kwargs) def get_updates(self, loss, params): del params variable = tf.Variable( name='my_variable', dtype=tf.dtypes.float32, initial_value=0.) self._weights.append(variable) return [variable] def get_config(self): config = super(_Optimizer, self).get_config() return config # Create estimator spec. optimizer = _Optimizer('my_optimizer') old_opt_variable_name_prefix = 'training/' + optimizer.__class__.__name__ spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=optimizer, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) optimizer_variables = optimizer.variables() var_values = sess.run(optimizer_variables) self.assertEqual(0., var_values[0]) for var in optimizer_variables: self.assertNotIn(old_opt_variable_name_prefix, var.name) @test_util.deprecated_graph_mode_only def test_head_with_invalid_optimizer(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} with self.assertRaisesRegex( ValueError, 'The given optimizer is not a tf_keras.optimizers.Optimizer'): # Create estimator spec. head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=tf.compat.v1.train.AdamOptimizer()) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/head/binary_class_head.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Binary class head.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import lookup_ops from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.head import base_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys from tensorflow_estimator.python.estimator.util import tf_keras_v2 @estimator_export('estimator.BinaryClassHead') class BinaryClassHead(base_head.Head): """Creates a `Head` for single label binary classification. Uses `sigmoid_cross_entropy_with_logits` loss. The head expects `logits` with shape `[D0, D1, ... DN, 1]`. In many applications, the shape is `[batch_size, 1]`. `labels` must be a dense `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, `labels` must be float `Tensor` with values in the interval `[0, 1]`. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. The loss is the weighted sum over the input dimensions. Namely, if the input labels have shape `[batch_size, 1]`, the loss is the weighted sum over `batch_size`. Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features, loss_reduction)` as arguments and returns loss with shape `[D0, D1, ... DN, 1]`. `loss_fn` must support float `labels` with shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to the input labels before passing them to `loss_fn`. Usage: >>> head = tf.estimator.BinaryClassHead() >>> logits = np.array(((45,), (-41,),), dtype=np.float32) >>> labels = np.array(((1,), (1,),), dtype=np.int32) >>> features = {'x': np.array(((42,),), dtype=np.float32)} >>> # expected_loss = sum(cross_entropy(labels, logits)) / batch_size >>> # = sum(0, 41) / 2 = 41 / 2 = 20.50 >>> loss = head.loss(labels, logits, features=features) >>> print('{:.2f}'.format(loss.numpy())) 20.50 >>> eval_metrics = head.metrics() >>> updated_metrics = head.update_metrics( ... eval_metrics, features, logits, labels) >>> for k in sorted(updated_metrics): ... print('{} : {:.2f}'.format(k, updated_metrics[k].result().numpy())) accuracy : 0.50 accuracy_baseline : 1.00 auc : 0.00 auc_precision_recall : 1.00 average_loss : 20.50 label/mean : 1.00 precision : 1.00 prediction/mean : 0.50 recall : 0.50 >>> preds = head.predictions(logits) >>> print(preds['logits']) tf.Tensor( [[ 45.] [-41.]], shape=(2, 1), dtype=float32) Usage with a canned estimator: ```python my_head = tf.estimator.BinaryClassHead() my_estimator = tf.estimator.DNNEstimator( head=my_head, hidden_units=..., feature_columns=...) ``` It can also be used with a custom `model_fn`. Example: ```python def _my_model_fn(features, labels, mode): my_head = tf.estimator.BinaryClassHead() logits = tf_keras.Model(...)(features) return my_head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=tf_keras.optimizers.Adagrad(lr=0.1), logits=logits) my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn) ``` Args: weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. thresholds: Iterable of floats in the range `(0, 1)`. For binary classification metrics such as precision and recall, an eval metric is generated for each threshold value. This threshold is applied to the logistic values to determine the binary classification (i.e., above the threshold is `true`, below is `false`. label_vocabulary: A list or tuple of strings representing possible label values. If it is not given, that means labels are already encoded within [0, 1]. If given, labels must be string type and have any value in `label_vocabulary`. Note that errors will be raised if `label_vocabulary` is not provided but labels are strings. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Decides how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by `batch size * label_dimension`. loss_fn: Optional loss function. name: Name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. """ def __init__(self, weight_column=None, thresholds=None, label_vocabulary=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, loss_fn=None, name=None): if label_vocabulary is not None and not isinstance(label_vocabulary, (list, tuple)): raise ValueError( 'label_vocabulary should be a list or a tuple. Given type: {}'.format( type(label_vocabulary))) thresholds = tuple(thresholds) if thresholds else tuple() for threshold in thresholds: if (threshold <= 0.0) or (threshold >= 1.0): raise ValueError('thresholds not in (0, 1): {}.'.format((thresholds,))) base_head.validate_loss_reduction(loss_reduction) if loss_fn: base_head.validate_loss_fn_args(loss_fn) self._weight_column = weight_column self._thresholds = thresholds self._label_vocabulary = label_vocabulary self._loss_reduction = loss_reduction self._loss_fn = loss_fn self._name = name # Metric keys. keys = metric_keys.MetricKeys self._loss_mean_key = self._summary_key(keys.LOSS_MEAN) self._accuracy_key = self._summary_key(keys.ACCURACY) self._precision_key = self._summary_key(keys.PRECISION) self._recall_key = self._summary_key(keys.RECALL) self._prediction_mean_key = self._summary_key(keys.PREDICTION_MEAN) self._label_mean_key = self._summary_key(keys.LABEL_MEAN) self._accuracy_baseline_key = self._summary_key(keys.ACCURACY_BASELINE) self._auc_key = self._summary_key(keys.AUC) self._auc_pr_key = self._summary_key(keys.AUC_PR) self._loss_regularization_key = self._summary_key(keys.LOSS_REGULARIZATION) accuracy_keys = [] precision_keys = [] recall_keys = [] for threshold in self._thresholds: accuracy_keys.append( self._summary_key(keys.ACCURACY_AT_THRESHOLD % threshold)) precision_keys.append( self._summary_key(keys.PRECISION_AT_THRESHOLD % threshold)) recall_keys.append( self._summary_key(keys.RECALL_AT_THRESHOLD % threshold)) self._accuracy_keys = tuple(accuracy_keys) self._precision_keys = tuple(precision_keys) self._recall_keys = tuple(recall_keys) @property def name(self): """See `base_head.Head` for details.""" return self._name @property def logits_dimension(self): """See `base_head.Head` for details.""" return 1 @property def loss_reduction(self): """See `base_head.Head` for details.""" return self._loss_reduction # Attributes for lookup tables in Eager execution. Note that for Graph # execution, the lookup tables are created on demand to make sure the lookup # table is in the same graph as its input tensors for `train` and `eval` of # Estimator (as Estimator recreates graphs for `train`, `eval` and # `predict`). _cached_class_id_table = None _cached_class_string_table = None @property def _class_id_table(self): """Creates a lookup table for class_id. In eager execution, this lookup table will be lazily created on the first call of `self._class_id_table`, and cached for later use; In graph execution, it will be created on demand. Returns: A hash table for lookup. """ if self._cached_class_id_table is None or not tf.executing_eagerly(): self._cached_class_id_table = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup') return self._cached_class_id_table @property def _class_string_table(self): """Creates a lookup table for class_string. In eager execution, this lookup table will be lazily created on the first call of `self._class_string_table` and cached for later use; In graph execution, it will be created on demand. Returns: A hash table for lookup. """ if (self._cached_class_string_table is None or not tf.executing_eagerly()): self._cached_class_string_table = ( lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup')) return self._cached_class_string_table def _processed_labels(self, logits, labels): """Converts labels to integer id space.""" labels = base_head.check_dense_labels_match_logits_and_reshape( labels=labels, logits=logits, expected_labels_dimension=1) if self._label_vocabulary is not None: labels = self._class_id_table.lookup(labels) labels = tf.cast(labels, dtype=tf.dtypes.float32) return base_head.check_label_range(labels, n_classes=2) def _unweighted_loss_and_weights(self, logits, labels, features): """Computes unweighted loss and weights.""" if self._loss_fn: unweighted_loss = base_head.call_loss_fn( loss_fn=self._loss_fn, labels=labels, logits=logits, features=features, expected_loss_dim=1) else: unweighted_loss = tf.compat.v1.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) weights = base_head.get_weights_and_check_match_logits( features=features, weight_column=self._weight_column, logits=logits) return unweighted_loss, weights def loss(self, labels, logits, features=None, mode=None, regularization_losses=None): """Returns regularized training loss. See `base_head.Head` for details.""" del mode # Unused for this head. with ops.name_scope( 'losses', values=(logits, labels, regularization_losses, features)): logits = base_head.check_logits_final_dim(logits, self.logits_dimension) labels = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, labels, features) training_loss = tf_keras_v2.__internal__.losses.compute_weighted_loss( unweighted_loss, sample_weight=weights, reduction=self._loss_reduction) regularization_loss = tf.math.add_n( regularization_losses) if regularization_losses is not None else None regularized_training_loss = ( training_loss + regularization_loss if regularization_loss is not None else training_loss) return regularized_training_loss def predictions(self, logits, keys=None): """Return predictions based on keys. See `base_head.Head` for details. Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. keys: a list or tuple of prediction keys. Each key can be either the class variable of prediction_keys.PredictionKeys or its string value, such as: prediction_keys.PredictionKeys.CLASSES or 'classes'. If not specified, it will return the predictions for all valid keys. Returns: A dict of predictions. """ pred_keys = prediction_keys.PredictionKeys valid_keys = [ pred_keys.LOGITS, pred_keys.LOGISTIC, pred_keys.PROBABILITIES, pred_keys.CLASS_IDS, pred_keys.CLASSES, pred_keys.ALL_CLASS_IDS, pred_keys.ALL_CLASSES ] if keys: base_head.check_prediction_keys(keys, valid_keys) else: keys = valid_keys logits = base_head.check_logits_final_dim(logits, self.logits_dimension) predictions = {} with ops.name_scope('predictions', values=(logits,)): if pred_keys.LOGITS in keys: predictions[pred_keys.LOGITS] = logits if pred_keys.LOGISTIC in keys: logistic = tf.math.sigmoid(logits, name=pred_keys.LOGISTIC) predictions[pred_keys.LOGISTIC] = logistic two_class_logits = tf.concat((tf.compat.v1.zeros_like(logits), logits), axis=-1, name='two_class_logits') if pred_keys.PROBABILITIES in keys: probabilities = tf.compat.v1.nn.softmax( two_class_logits, name=pred_keys.PROBABILITIES) predictions[pred_keys.PROBABILITIES] = probabilities if pred_keys.CLASS_IDS in keys or pred_keys.CLASSES in keys: class_ids = tf.compat.v1.math.argmax( two_class_logits, axis=-1, name=pred_keys.CLASS_IDS) class_ids = tf.compat.v1.expand_dims(class_ids, axis=-1) if pred_keys.CLASS_IDS in keys: predictions[pred_keys.CLASS_IDS] = class_ids if pred_keys.CLASSES in keys: if self._label_vocabulary is not None: classes = self._class_string_table.lookup(class_ids) else: classes = tf.strings.as_string(class_ids, name='str_classes') predictions[pred_keys.CLASSES] = classes if pred_keys.ALL_CLASS_IDS in keys: predictions[pred_keys.ALL_CLASS_IDS] = base_head.all_class_ids( logits, n_classes=2) if pred_keys.ALL_CLASSES in keys: predictions[pred_keys.ALL_CLASSES] = base_head.all_classes( logits, n_classes=2, label_vocabulary=self._label_vocabulary) return predictions def metrics(self, regularization_losses=None): """Creates metrics. See `base_head.Head` for details.""" keys = metric_keys.MetricKeys with ops.name_scope('metrics', values=(regularization_losses,)): # Mean metric. eval_metrics = {} eval_metrics[self._loss_mean_key] = tf_keras.metrics.Mean( name=keys.LOSS_MEAN) eval_metrics[self._accuracy_key] = tf_keras.metrics.Accuracy( name=keys.ACCURACY) eval_metrics[self._precision_key] = tf_keras.metrics.Precision( name=keys.PRECISION) eval_metrics[self._recall_key] = tf_keras.metrics.Recall( name=keys.RECALL) eval_metrics[self._prediction_mean_key] = tf_keras.metrics.Mean( name=keys.PREDICTION_MEAN) eval_metrics[self._label_mean_key] = tf_keras.metrics.Mean( name=keys.LABEL_MEAN) eval_metrics[self._accuracy_baseline_key] = tf_keras.metrics.Mean( name=keys.ACCURACY_BASELINE) # The default summation_method is "interpolation" in the AUC metric. eval_metrics[self._auc_key] = tf_keras.metrics.AUC(name=keys.AUC) eval_metrics[self._auc_pr_key] = tf_keras.metrics.AUC( curve='PR', name=keys.AUC_PR) if regularization_losses is not None: eval_metrics[self._loss_regularization_key] = tf_keras.metrics.Mean( name=keys.LOSS_REGULARIZATION) for i, threshold in enumerate(self._thresholds): eval_metrics[self._accuracy_keys[i]] = tf_keras.metrics.BinaryAccuracy( name=self._accuracy_keys[i], threshold=threshold) eval_metrics[self._precision_keys[i]] = tf_keras.metrics.Precision( name=self._precision_keys[i], thresholds=threshold) eval_metrics[self._recall_keys[i]] = tf_keras.metrics.Recall( name=self._recall_keys[i], thresholds=threshold) return eval_metrics def _update_accuracy_baseline(self, eval_metrics): """Update accuracy baseline metric based on labels mean metric. This is the best the model could do by always predicting one class. For example, suppose the labels = [0, 1, 0, 1, 1]. So the label_mean.total = 3, label_mean.count = 5, and label_mean = label_mean.total / label_mean.count = 3 / 5 = 0.6 By always predicting one class, there are two cases: (1) predicted_labels_0 = [0, 0, 0, 0, 0], accuracy_0 = 2 / 5 = 0.4 (2) predicted_labels_1 = [1, 1, 1, 1, 1], accuracy_1 = 3 / 5 = 0.6 So the accuracy_baseline = max(accuracy_0, accuracy_1) = 0.6, = max(label_mean, 1 - label_mean) To update the total and count of accuracy_baseline, accuracy_baseline = max(label_mean, 1 - label_mean) = max(label_mean.total / label_mean.count, 1 - label_mean.total / label_mean.count) = max(label_mean.total / label_mean.count, (label_mean.count - label_mean.total) / label_mean.count) So accuracy_baseline.total = max(label_mean.total, (label_mean.count - label_mean.total)) accuracy_baseline.count = label_mean.count Args: eval_metrics: A `dict` of metrics to be updated. """ label_mean_metric = eval_metrics[self._label_mean_key] accuracy_baseline_metric = eval_metrics[self._accuracy_baseline_key] accuracy_baseline_metric.add_update(tf.no_op()) accuracy_baseline_metric.total = tf.math.maximum( label_mean_metric.total, label_mean_metric.count - label_mean_metric.total) accuracy_baseline_metric.count = label_mean_metric.count def _update_auc(self, auc_metric, labels, predictions, weights=None): predictions = tf.cast(predictions, dtype=tf.dtypes.float32) if weights is not None: weights = tf.compat.v2.__internal__.ops.broadcast_weights(weights, predictions) auc_metric.update_state( y_true=labels, y_pred=predictions, sample_weight=weights) def update_metrics(self, eval_metrics, features, logits, labels, regularization_losses=None): """Updates eval metrics. See `base_head.Head` for details.""" preds = self.predictions(logits) class_ids = preds[prediction_keys.PredictionKeys.CLASS_IDS] logits = base_head.check_logits_final_dim(logits, self.logits_dimension) labels = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, labels, features) # Update metrics. eval_metrics[self._loss_mean_key].update_state( values=unweighted_loss, sample_weight=weights) eval_metrics[self._accuracy_key].update_state( y_true=labels, y_pred=class_ids, sample_weight=weights) eval_metrics[self._precision_key].update_state( y_true=labels, y_pred=class_ids, sample_weight=weights) eval_metrics[self._recall_key].update_state( y_true=labels, y_pred=class_ids, sample_weight=weights) logistic_key = prediction_keys.PredictionKeys.LOGISTIC predictions = self.predictions(logits, [logistic_key]) logistic = predictions[logistic_key] base_head.update_metric_with_broadcast_weights( eval_metrics[self._prediction_mean_key], logistic, weights) base_head.update_metric_with_broadcast_weights( eval_metrics[self._label_mean_key], labels, weights) self._update_accuracy_baseline(eval_metrics) self._update_auc( auc_metric=eval_metrics[self._auc_key], labels=labels, predictions=logistic, weights=weights) self._update_auc( auc_metric=eval_metrics[self._auc_pr_key], labels=labels, predictions=logistic, weights=weights) if regularization_losses is not None: regularization_loss = tf.math.add_n(regularization_losses) eval_metrics[self._loss_regularization_key].update_state( values=regularization_loss) for i in range(len(self._thresholds)): eval_metrics[self._accuracy_keys[i]].update_state( y_true=labels, y_pred=logistic, sample_weight=weights) eval_metrics[self._precision_keys[i]].update_state( y_true=labels, y_pred=logistic, sample_weight=weights) eval_metrics[self._recall_keys[i]].update_state( y_true=labels, y_pred=logistic, sample_weight=weights) return eval_metrics def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): """Returns an `EstimatorSpec`. Args: features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. Often to be used to fetch example-weight tensor. mode: Estimator's `ModeKeys`. logits: Logits `Tensor` with shape `[D0, D1, ... DN, 1]`. For many applications, the shape is `[batch_size, 1]`. labels: Labels integer or string `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to set `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to avoid scaling errors. Returns: `EstimatorSpec`. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. """ with ops.name_scope(self._name, 'head'): # Predict. pred_keys = prediction_keys.PredictionKeys predictions = self.predictions(logits) if mode == ModeKeys.PREDICT: probabilities = predictions[pred_keys.PROBABILITIES] logistic = predictions[pred_keys.LOGISTIC] classifier_output = base_head.classification_output( scores=probabilities, n_classes=2, label_vocabulary=self._label_vocabulary) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.PREDICT, predictions=predictions, export_outputs={ base_head.DEFAULT_SERVING_KEY: classifier_output, base_head.CLASSIFY_SERVING_KEY: classifier_output, base_head.REGRESS_SERVING_KEY: export_output.RegressionOutput(value=logistic), base_head.PREDICT_SERVING_KEY: export_output.PredictOutput(predictions) }) regularized_training_loss = self.loss( logits=logits, labels=labels, features=features, mode=mode, regularization_losses=regularization_losses) # Eval. if mode == ModeKeys.EVAL: eval_metrics = self.metrics(regularization_losses=regularization_losses) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.EVAL, predictions=predictions, loss=regularized_training_loss, eval_metrics=base_head.create_eval_metrics_tuple( self.update_metrics, { 'eval_metrics': eval_metrics, 'features': features, 'logits': logits, 'labels': labels, 'regularization_losses': regularization_losses })) # Train. train_op = base_head.create_estimator_spec_train_op( head_name=self._name, optimizer=optimizer, train_op_fn=train_op_fn, update_ops=update_ops, trainable_variables=trainable_variables, regularized_training_loss=regularized_training_loss, loss_reduction=self._loss_reduction) # Create summary. base_head.create_estimator_spec_summary( regularized_training_loss=regularized_training_loss, regularization_losses=regularization_losses, summary_key_fn=self._summary_key) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, train_op=train_op) ================================================ FILE: tensorflow_estimator/python/estimator/head/binary_class_head_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for binary_class_head.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import dnn_testing_utils from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.head import binary_class_head as head_lib from tensorflow_estimator.python.estimator.head import head_utils as test_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys @test_util.run_all_in_graph_and_eager_modes class BinaryClassHeadTest(tf.test.TestCase): def test_threshold_too_small(self): with self.assertRaisesRegexp(ValueError, r'thresholds not in \(0, 1\)'): head_lib.BinaryClassHead(thresholds=(0., 0.5)) def test_threshold_too_large(self): with self.assertRaisesRegexp(ValueError, r'thresholds not in \(0, 1\)'): head_lib.BinaryClassHead(thresholds=(0.5, 1.)) def test_invalid_loss_reduction(self): with self.assertRaisesRegexp( ValueError, r'Invalid loss_reduction: invalid_loss_reduction'): head_lib.BinaryClassHead(loss_reduction='invalid_loss_reduction') with self.assertRaisesRegexp(ValueError, r'Invalid loss_reduction: none'): head_lib.BinaryClassHead(loss_reduction=tf.losses.Reduction.NONE) def test_loss_fn_arg_labels_missing(self): def _loss_fn(logits): del logits # Unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: labels\. ' r'Given arguments: \(\'logits\',\)'): head_lib.BinaryClassHead(loss_fn=_loss_fn) def test_loss_fn_arg_logits_missing(self): def _loss_fn(labels): del labels # unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: logits\. ' r'Given arguments: \(\'labels\',\)'): head_lib.BinaryClassHead(loss_fn=_loss_fn) def test_loss_fn_arg_features_ok(self): def _loss_fn(labels, logits, features): del labels, logits, features # Unused head_lib.BinaryClassHead(loss_fn=_loss_fn) def test_loss_fn_arg_invalid(self): def _loss_fn(labels, logits, name=None): del labels, logits, name # Unused with self.assertRaisesRegexp(ValueError, r'loss_fn has unexpected args: \[\'name\'\]'): head_lib.BinaryClassHead(loss_fn=_loss_fn) def test_invalid_logits_shape(self): head = head_lib.BinaryClassHead() self.assertEqual(1, head.logits_dimension) # Logits should be shape (batch_size, 1). logits_2x2 = np.array(((45., 44.), (41., 42.),)) pred_key = prediction_keys.PredictionKeys.PROBABILITIES # Static shape. with self.assertRaisesRegexp(ValueError, 'logits shape'): preds = head.predictions(logits_2x2, [pred_key]) self.evaluate(preds[pred_key]) if tf.executing_eagerly(): return # Dynamic shape only works in Graph mode. logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': np.array(((42.,),))}, mode=ModeKeys.PREDICT, logits=logits_placeholder, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.predictions[pred_key].eval({logits_placeholder: logits_2x2}) def test_invalid_labels_shape(self): head = head_lib.BinaryClassHead() self.assertEqual(1, head.logits_dimension) # Labels and logits should be shape (batch_size, 1). labels_2x2 = np.array(((45., 44.), (41., 42.),)) logits_2x1 = np.array(((45.,), (41.,),)) features = {'x': np.array(((42.,),))} # Static shape. with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): training_loss = head.loss( logits=logits_2x1, labels=labels_2x2, features=features, mode=ModeKeys.EVAL) self.evaluate(training_loss) if tf.executing_eagerly(): return # Dynamic shape only works in Graph mode. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.loss( logits=logits_placeholder, labels=labels_placeholder, features=features, mode=ModeKeys.EVAL) with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'): training_loss.eval({ logits_placeholder: logits_2x1, labels_placeholder: labels_2x2 }) def test_incompatible_labels_shape(self): head = head_lib.BinaryClassHead() self.assertEqual(1, head.logits_dimension) # Both logits and labels should be shape (batch_size, 1). values_2x1 = np.array(((0.,), (1.,),)) values_3x1 = np.array(((0.,), (1.,), (0.,),)) features = {'x': values_2x1} # Static shape for eager mode. if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'labels shape'): head.loss( logits=values_2x1, labels=values_3x1, features=features, mode=ModeKeys.EVAL) with self.assertRaisesRegexp(ValueError, 'labels shape'): head.loss( logits=values_3x1, labels=values_2x1, features=features, mode=ModeKeys.EVAL) return # Static shape for Graph mode. with self.assertRaisesRegexp(ValueError, 'logits and labels must have the same shape'): head.loss( logits=values_2x1, labels=values_3x1, features=features, mode=ModeKeys.EVAL) with self.assertRaisesRegexp(ValueError, 'logits and labels must have the same shape'): head.loss( logits=values_3x1, labels=values_2x1, features=features, mode=ModeKeys.EVAL) # Dynamic shape only works in Graph mode. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.loss( logits=logits_placeholder, labels=labels_placeholder, features=features, mode=ModeKeys.EVAL) with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[3 1\] \[labels_shape: \] \[2 1\]'): training_loss.eval({ labels_placeholder: values_2x1, logits_placeholder: values_3x1 }) with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'): training_loss.eval({ labels_placeholder: values_3x1, logits_placeholder: values_2x1 }) def test_predict(self): head = head_lib.BinaryClassHead() self.assertEqual(1, head.logits_dimension) logits = [[0.3], [-0.4]] expected_logistics = [[0.574443], [0.401312]] expected_probabilities = [[0.425557, 0.574443], [0.598688, 0.401312]] expected_class_ids = [[1], [0]] expected_classes = [[b'1'], [b'0']] expected_all_class_ids = [[0, 1]] * 2 expected_all_classes = [[b'0', b'1']] * 2 expected_export_classes = [[b'0', b'1']] * 2 keys = prediction_keys.PredictionKeys preds = head.predictions(logits, [ keys.LOGITS, keys.LOGISTIC, keys.PROBABILITIES, keys.CLASS_IDS, keys.CLASSES, keys.ALL_CLASS_IDS, keys.ALL_CLASSES ]) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) self.assertAllClose(expected_logistics, self.evaluate(preds[keys.LOGISTIC])) self.assertAllClose(expected_probabilities, self.evaluate(preds[keys.PROBABILITIES])) self.assertAllClose(expected_class_ids, self.evaluate(preds[keys.CLASS_IDS])) self.assertAllClose(expected_all_class_ids, self.evaluate(preds[keys.ALL_CLASS_IDS])) self.assertAllEqual(expected_classes, self.evaluate(preds[keys.CLASSES])) self.assertAllEqual(expected_all_classes, self.evaluate(preds[keys.ALL_CLASSES])) if tf.executing_eagerly(): return # Create estimator spec. spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNone(spec.train_op) self.assertItemsEqual(('classification', 'regression', 'predict', test_lib._DEFAULT_SERVING_KEY), spec.export_outputs.keys()) test_lib._assert_no_hooks(self, spec) # Assert predictions. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(logits, predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose(expected_logistics, predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( expected_probabilities, predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose(expected_class_ids, predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual(expected_classes, predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllClose( expected_all_class_ids, predictions[prediction_keys.PredictionKeys.ALL_CLASS_IDS]) self.assertAllEqual( expected_all_classes, predictions[prediction_keys.PredictionKeys.ALL_CLASSES]) self.assertAllClose( expected_probabilities, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].scores)) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].classes)) self.assertAllClose(expected_logistics, sess.run(spec.export_outputs['regression'].value)) def test_predict_with_vocabulary_list(self): head = head_lib.BinaryClassHead(label_vocabulary=['aang', 'iroh']) logits = [[1.], [0.]] expected_classes = [[b'iroh'], [b'aang']] pred_key = prediction_keys.PredictionKeys.CLASSES if tf.executing_eagerly(): preds = head.predictions(logits, [pred_key]) self.assertAllEqual(expected_classes, preds[pred_key]) return preds = head.predictions(logits, [pred_key]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllEqual(expected_classes, preds[pred_key].eval()) def test_eval_create_loss(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum([0, 41]) / 2 = 20.5 expected_training_loss = 20.5 # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose( expected_training_loss, self.evaluate(training_loss), rtol=1e-2, atol=1e-2) def test_eval_create_loss_loss_fn(self): """Tests head.create_loss for eval mode and custom loss_fn.""" loss = np.array([[1.], [2.]], dtype=np.float32) logits_input = np.array([[-10.], [10.]], dtype=np.float32) labels_input = np.array([[1], [0]], dtype=np.int64) def _loss_fn(labels, logits): check_labels = tf.debugging.Assert( tf.reduce_all(tf.math.equal(labels, labels_input)), data=[labels]) check_logits = tf.debugging.Assert( tf.reduce_all(tf.math.equal(logits, logits_input)), data=[logits]) with tf.control_dependencies([check_labels, check_logits]): return tf.constant(loss) head = head_lib.BinaryClassHead(loss_fn=_loss_fn) actual_training_loss = head.loss( logits=logits_input, labels=labels_input, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL) self.assertAllClose(np.sum(loss) / 2., self.evaluate(actual_training_loss)) def test_eval_create_loss_loss_fn_wrong_shape(self): """Tests custom loss_fn that returns Tensor of unexpected shape.""" loss = np.array([1., 2.], dtype=np.float32) def _loss_fn(labels, logits): del labels, logits # Unused return tf.constant(loss) head = head_lib.BinaryClassHead(loss_fn=_loss_fn) logits = np.array([[-10.], [10.]], dtype=np.float32) labels = np.array([[1], [0]], dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'loss_shape'): head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) else: actual_training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] ' r'\[logits_shape: \] \[2 1\] \[loss_shape: \] \[2\]'): with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) actual_training_loss.eval() def test_eval_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib.BinaryClassHead() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.loss( logits=np.array(((45,), (-41,),), dtype=np.float32), labels=None, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL) def test_eval(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(0, 41) / 2 = 41 / 2 = 20.5 expected_loss = 20.5 keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss, keys.ACCURACY: 1. / 2, keys.PRECISION: 1., keys.RECALL: 1. / 2, keys.PREDICTION_MEAN: 1. / 2, keys.LABEL_MEAN: 2. / 2, keys.ACCURACY_BASELINE: 2. / 2, keys.AUC: 0., keys.AUC_PR: 1., } if tf.executing_eagerly(): eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}) loss = head.loss(labels, logits, features=features, mode=ModeKeys.EVAL) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss) return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss) # Check results of value ops (in `metrics`). self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_eval_metric_ops_with_head_name(self): head = head_lib.BinaryClassHead(name='some_binary_head') logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} keys = metric_keys.MetricKeys expected_metric_keys = [ '{}/some_binary_head'.format(keys.LOSS_MEAN), '{}/some_binary_head'.format(keys.ACCURACY), '{}/some_binary_head'.format(keys.PRECISION), '{}/some_binary_head'.format(keys.RECALL), '{}/some_binary_head'.format(keys.PREDICTION_MEAN), '{}/some_binary_head'.format(keys.LABEL_MEAN), '{}/some_binary_head'.format(keys.ACCURACY_BASELINE), '{}/some_binary_head'.format(keys.AUC), '{}/some_binary_head'.format(keys.AUC_PR), ] eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual(expected_metric_keys, updated_metrics.keys()) def test_eval_with_regularization_losses(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(0, 41) / 2 = 20.5 expected_unregularized_loss = 20.5 expected_regularized_loss = ( expected_unregularized_loss + expected_regularization_loss) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_unregularized_loss, keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.ACCURACY: 1. / 2, keys.PRECISION: 1., keys.RECALL: 1. / 2, keys.PREDICTION_MEAN: 1. / 2, keys.LABEL_MEAN: 2. / 2, keys.ACCURACY_BASELINE: 2. / 2, keys.AUC: 0., keys.AUC_PR: 1., } if tf.executing_eagerly(): eval_metrics = head.metrics(regularization_losses=regularization_losses) updated_metrics = head.update_metrics( eval_metrics, features, logits, labels, regularization_losses=regularization_losses) # Assert metrics. self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}) return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_regularized_loss, loss) # Check results of value ops (in `metrics`). self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_eval_with_vocabulary_list_create_loss(self): head = head_lib.BinaryClassHead(label_vocabulary=['aang', 'iroh']) logits = np.array(((45,), (-41,),), dtype=np.float32) labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum([0, 41]) / 2 = 20.5 expected_training_loss = 20.5 # Create loss. if tf.executing_eagerly(): training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose( expected_training_loss, training_loss, rtol=1e-2, atol=1e-2) return training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) def test_eval_with_vocabulary_list(self): head = head_lib.BinaryClassHead(label_vocabulary=['aang', 'iroh']) logits = np.array(((45,), (-41,),), dtype=np.float32) labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} accuracy_key = metric_keys.MetricKeys.ACCURACY if tf.executing_eagerly(): eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertAllClose(1. / 2, updated_metrics[accuracy_key].result()) return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} sess.run(update_ops) self.assertAllClose(1. / 2, value_ops[accuracy_key].eval()) def test_eval_with_thresholds_create_loss(self): thresholds = [0.25, 0.5, 0.75] head = head_lib.BinaryClassHead(thresholds=thresholds) logits = np.array(((-1,), (1,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # probabilities[i] = 1/(1 + exp(-logits[i])) => # probabilities = [1/(1 + exp(1)), 1/(1 + exp(-1))] = [0.269, 0.731] # unreduced_loss = -ln(probabilities[label[i]])) = [-ln(0.269), -ln(0.731)] # = [1.31304389, 0.31334182] # weighted sum loss = 1.62638571 # loss = 0.813192855 expected_training_loss = 0.813192855 # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose( expected_training_loss, self.evaluate(training_loss), rtol=1e-2, atol=1e-2) def test_eval_with_thresholds(self): thresholds = [0.25, 0.5, 0.75] head = head_lib.BinaryClassHead(thresholds=thresholds) logits = np.array(((-1,), (1,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.int32)} # probabilities[i] = 1/(1 + exp(-logits[i])) => # probabilities = [1/(1 + exp(1)), 1/(1 + exp(-1))] = [0.269, 0.731] # loss = -sum(ln(probabilities[label[i]])) / batch_size # = (-ln(0.269) -ln(0.731)) / 2 # = 1.62652338 / 2 keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: 1.62652338 / 2., keys.ACCURACY: 1. / 2, keys.PRECISION: 1., keys.RECALL: .5, keys.PREDICTION_MEAN: 1. / 2, keys.LABEL_MEAN: 2. / 2, keys.ACCURACY_BASELINE: 2. / 2, keys.AUC: 0., keys.AUC_PR: 1., keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 1., keys.PRECISION_AT_THRESHOLD % thresholds[0]: 1., keys.RECALL_AT_THRESHOLD % thresholds[0]: 1., keys.ACCURACY_AT_THRESHOLD % thresholds[1]: .5, keys.PRECISION_AT_THRESHOLD % thresholds[1]: 1., keys.RECALL_AT_THRESHOLD % thresholds[1]: .5, keys.ACCURACY_AT_THRESHOLD % thresholds[2]: 0., keys.PRECISION_AT_THRESHOLD % thresholds[2]: 0., keys.RECALL_AT_THRESHOLD % thresholds[2]: 0., } tol = 1e-2 if tf.executing_eagerly(): # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose(1.62652338 / 2., self.evaluate(training_loss)) # Eval metrics. eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) # Assert metrics. self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, { k: self.evaluate(updated_metrics[k].result()) for k in updated_metrics }, atol=tol, rtol=tol) return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(1.62652338 / 2., loss) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, atol=tol, rtol=tol) def test_train_create_loss(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} # unreduced_loss = cross_entropy(labels, logits) = [0, 41] # weights default to 1. # training loss = (1 * 0 + 1 * 41) / 2 = 20.5 expected_training_loss = 20.5 # Create loss. if tf.executing_eagerly(): training_loss = head.loss(labels, logits, features) self.assertAllClose(expected_training_loss, training_loss) return training_loss = head.loss(labels, logits, features) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) def test_train_create_loss_loss_reduction(self): """Tests create_loss with loss_reduction.""" head = head_lib.BinaryClassHead(loss_reduction=tf.losses.Reduction.SUM) logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} # unreduced_loss = cross_entropy(labels, logits) = [0, 41] # weights default to 1. # training loss = (1 * 0 + 1 * 41) expected_training_loss = 41. # Create loss. if tf.executing_eagerly(): training_loss = head.loss(labels, logits, features) self.assertAllClose(expected_training_loss, training_loss) return training_loss = head.loss(labels, logits, features) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_training_loss, training_loss.eval()) def test_train_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib.BinaryClassHead() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.loss( logits=np.array(((45,), (-41,),), dtype=np.float32), labels=None, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN) def test_train(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(0, 41) / 2 = 41 / 2 = 20.5 expected_loss = 20.5 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss) return expected_train_result = b'my_train_op' def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str) def test_train_one_dim_create_loss(self): """Tests create_loss with 1D labels and weights (shape [batch_size]).""" head = head_lib.BinaryClassHead(weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.float32) labels_rank_1 = np.array((1., 1., 0.,)) weights_rank_1 = np.array(((1., .1, 1.5,)), dtype=np.float64) features = { 'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32), 'label_weights': weights_rank_1, } # unreduced_loss = cross_entropy(labels, logits) = [0, 41, 44] # weights are reshaped to [3, 1] to match logits. # training loss = (1 * 0 + .1 * 41 + 1.5 * 44) / 3 = 23.366666667 expected_training_loss = 23.366666667 # Create loss. if tf.executing_eagerly(): training_loss = head.loss(labels_rank_1, logits, features) self.assertAllClose(expected_training_loss, training_loss) return training_loss = head.loss(labels_rank_1, logits, features) self.assertAllClose(expected_training_loss, self.evaluate(training_loss)) def test_train_one_dim(self): """Tests train with 1D labels and weights (shape [batch_size]).""" head = head_lib.BinaryClassHead(weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.float32) labels_rank_1 = np.array((1., 1., 0.,)) weights_rank_1 = np.array(((1., .1, 1.5,)), dtype=np.float64) self.assertEqual((3,), labels_rank_1.shape) self.assertEqual((3,), weights_rank_1.shape) features = { 'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32), 'label_weights': weights_rank_1, } # losses = label_weights*cross_entropy(labels, logits) # = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66) # loss = sum(losses) / batch_size = (1 + 4.1 + 66) / 3 = 23.366666667 expected_loss = 23.366666667 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels_rank_1, features=features, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss) return expected_train_result = b'my_train_op' def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertIsNotNone(spec.train_op) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str) def test_train_with_regularization_losses(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(0, 41) / 2 = 20.5 # loss = unregularized_loss + regularization_loss = 22.5 expected_loss = 22.5 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN, regularization_losses=regularization_losses) self.assertAllClose(expected_loss, loss) return expected_train_result = b'my_train_op' def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_REGULARIZATION: (expected_regularization_loss), }, summary_str) def test_float_labels_invalid_values(self): head = head_lib.BinaryClassHead() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[1.2], [0.4]], dtype=np.float32) features = {'x': np.array([[42]], dtype=np.float32)} if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, r'Labels must be <= 2 - 1'): head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) return with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, r'Labels must be <= n_classes - 1'): training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) def test_float_labels_train_create_loss(self): head = head_lib.BinaryClassHead() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[0.8], [0.4]], dtype=np.float32) features = {'x': np.array([[42]], dtype=np.float32)} # loss = cross_entropy(labels, logits) # = -label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]) # = [-0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)), # -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))] # = [0.57407698418, 0.67435524446] # weighted_sum_loss = 0.57407698418 + 0.67435524446 # training_loss = weighted_sum_loss / 2 = 0.62421611432 expected_training_loss = 0.62421611432 # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_training_loss, self.evaluate(training_loss)) def test_float_labels_train(self): head = head_lib.BinaryClassHead() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[0.8], [0.4]], dtype=np.float32) expected_train_result = b'my_train_op' features = {'x': np.array([[42]], dtype=np.float32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(-label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]) # ) / batch_size # = -0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)) / 2 # -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3)) / 2 # = 1.2484322 / 2 = 0.6242161 expected_loss = 0.6242161 # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAlmostEqual( expected_loss, self.evaluate(training_loss), delta=1.e-5) if tf.executing_eagerly(): return def _train_op_fn(loss): with tf.control_dependencies((dnn_testing_utils.assert_close( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32)),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAlmostEqual(expected_loss, loss, delta=1.e-5) self.assertEqual(expected_train_result, train_result) def test_float_labels_eval_create_loss(self): head = head_lib.BinaryClassHead() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[0.8], [0.4]], dtype=np.float32) features = {'x': np.array([[42]], dtype=np.float32)} # unreduced_loss = cross_entropy(labels, logits) # = -label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]) # = [-0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)), # -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))] # = [0.57407698418, 0.67435524446] # weighted_sum_loss = 0.57407698418 + 0.67435524446 # loss = weighted_sum_loss / batch_size = 1.24843222864 / 2 = 0.62421611432 expected_training_loss = 0.62421611432 # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose( expected_training_loss, self.evaluate(training_loss), rtol=1e-2, atol=1e-2) def test_float_labels_eval(self): head = head_lib.BinaryClassHead() logits = np.array([[0.5], [-0.3]], dtype=np.float32) labels = np.array([[0.8], [0.4]], dtype=np.float32) features = {'x': np.array([[42]], dtype=np.float32)} # loss_sum = sum(cross_entropy(labels, logits)) # = sum(-label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i])) # = -0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)) # -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3)) # = 1.2484322 # loss = loss_sum / batch_size = 1.2484322 / 2 = 0.6242161 expected_loss = 0.6242161 # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAlmostEqual( expected_loss, self.evaluate(training_loss), delta=1.e-5) # Eval metrics. loss_mean_key = metric_keys.MetricKeys.LOSS_MEAN if tf.executing_eagerly(): eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertAlmostEqual(expected_loss, updated_metrics[loss_mean_key].result().numpy()) return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAlmostEqual(expected_loss, loss, delta=1.e-5) self.assertAlmostEqual(expected_loss, value_ops[loss_mean_key].eval()) def test_weighted_multi_example_predict(self): """3 examples, 1 batch.""" head = head_lib.BinaryClassHead(weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.int32) pred_keys = prediction_keys.PredictionKeys keys = [ pred_keys.LOGITS, pred_keys.LOGISTIC, pred_keys.PROBABILITIES, pred_keys.CLASS_IDS, pred_keys.CLASSES ] predictions = head.predictions(logits, keys) self.assertAllClose( logits.astype(np.float32), self.evaluate(predictions[pred_keys.LOGITS])) self.assertAllClose( tf.math.sigmoid(logits.astype(np.float32)), self.evaluate(predictions[pred_keys.LOGISTIC])) self.assertAllClose([[0., 1.], [1., 0.], [0., 1.]], self.evaluate(predictions[pred_keys.PROBABILITIES])) self.assertAllClose([[1], [0], [1]], self.evaluate(predictions[pred_keys.CLASS_IDS])) self.assertAllEqual([[b'1'], [b'0'], [b'1']], self.evaluate(predictions[pred_keys.CLASSES])) def test_weighted_multi_example_eval(self): """3 examples, 1 batch.""" head = head_lib.BinaryClassHead(weight_column='label_weights') logits = np.array(((45,), (-41,), (44,)), dtype=np.int32) labels = np.array(((1,), (1,), (0,)), dtype=np.int32) features = { 'x': np.array(((42,), (43,), (44,)), dtype=np.int32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32) } # label_mean = (1*1 + .1*1 + 1.5*0)/(1 + .1 + 1.5) = 1.1/2.6 # = .42307692307 expected_label_mean = .42307692307 # losses = label_weights*cross_entropy(labels, logits) # = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66) # loss = sum(losses) / batch_size = (1 + 4.1 + 66) / 3 = 70.1 / 3 = 23.36667 expected_loss = 23.366666667 keys = metric_keys.MetricKeys expected_metrics = { # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5) # = 70.1/2.6 = 26.9615384615 keys.LOSS_MEAN: 26.9615384615, # accuracy = (1*1 + .1*0 + 1.5*0)/(1 + .1 + 1.5) = 1/2.6 = .38461538461 keys.ACCURACY: .38461538461, keys.PRECISION: 1. / 2.5, keys.RECALL: 1. / 1.1, # prediction_mean = (1*1 + .1*0 + 1.5*1)/(1 + .1 + 1.5) = 2.5/2.6 # = .96153846153 keys.PREDICTION_MEAN: .96153846153, keys.LABEL_MEAN: expected_label_mean, keys.ACCURACY_BASELINE: 1 - expected_label_mean, keys.AUC: .45454565, keys.AUC_PR: .4010627, } if tf.executing_eagerly(): eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}) loss = head.loss(labels, logits, features=features, mode=ModeKeys.EVAL) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss) return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss) # Check results of value ops (in `metrics`). self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_weighted_multi_example_train(self): """3 examples, 1 batch.""" head = head_lib.BinaryClassHead(weight_column='label_weights') # Create estimator spec. logits = np.array(((45,), (-41,), (44,)), dtype=np.float32) features = { 'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float64), } labels = np.array(((1.,), (1.,), (0.,))) expected_train_result = b'my_train_op' # losses = label_weights*cross_entropy(labels, logits) # = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66) # loss = sum(losses) / batch_size = (1 + 4.1 + 66) / 3 = 23.366666667 expected_loss = 23.366666667 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss) return def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertIsNotNone(spec.train_op) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries( self, {metric_keys.MetricKeys.LOSS: expected_loss}, summary_str) def test_multi_dim_weighted_train_create_loss(self): """Logits and labels of shape [2, 2, 1], weights [2, 2].""" head = head_lib.BinaryClassHead(weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) features = {'weights': weights} # unreduced_loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]]. # Weights are reshaped to [2, 2, 1] to match logits. # training_loss = (1*10 + 1.5*0 + 2*0 + 2.5*12) / 2*2 = 40 / 4 = 10 expected_training_loss = 10. tol = 1e-2 # Create loss. if tf.executing_eagerly(): training_loss = head.loss(labels, logits, features, mode=ModeKeys.TRAIN) self.assertAllClose( expected_training_loss, training_loss, rtol=tol, atol=tol) return training_loss = head.loss(labels, logits, features) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) def test_multi_dim_weighted_train(self): """Logits and labels of shape [2, 2, 1], weights [2, 2].""" head = head_lib.BinaryClassHead(weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) features = {'weights': weights} # losses = cross_entropy(labels, logits) = [[10, 0], [0, 12]]. # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40 # loss = weighted_sum_loss / batch_size = 40 / (2*2) = 10 expected_loss = 10. tol = 1e-2 # Create loss. if tf.executing_eagerly(): training_loss = head.loss(labels, logits, features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, training_loss, rtol=tol, atol=tol) return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) def test_multi_dim_train_weights_wrong_inner_dim(self): """Logits and labels of shape [2, 2, 1], weights [2, 1].""" head = head_lib.BinaryClassHead(weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[1.], [2.]], dtype=np.float32) if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'weights shape'): head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) return def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \] \[2 2 1\] \[weights_shape: \] \[2 1\]'): spec.loss.eval() def test_multi_dim_train_weights_wrong_outer_dim(self): """Logits and labels of shape [2, 2, 1], weights [2, 2, 2].""" head = head_lib.BinaryClassHead(weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[[1., 1.1], [1.5, 1.6]], [[2., 2.1], [2.5, 2.6]]]) if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'weights shape'): head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) return weights_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights_placeholder}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \]\s\[2 2 1\]\s\[weights_shape: \]\s\[2 2 2\]'): spec.loss.eval({weights_placeholder: weights}) def test_multi_dim_weighted_eval(self): """Logits and labels of shape [2, 2, 1], weights [2, 2].""" head = head_lib.BinaryClassHead(weight_column='weights') logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32) labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # losses = cross_entropy(labels, logits) = [[10, 0], [0, 12]]. # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40 # loss = weighted_sum_loss / batch_size = 40 / (2*2) = 10. weighted_sum_loss = 40. expected_loss = 10. keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: weighted_sum_loss / np.sum(weights), keys.ACCURACY: (1. * 0. + 1.5 * 1. + 2. * 1. + 2.5 * 0.) / np.sum(weights), keys.PRECISION: 2.0 / 3.0, keys.RECALL: 2.0 / 4.5, keys.PREDICTION_MEAN: (1. * 1 + 1.5 * 0 + 2. * 1 + 2.5 * 0) / np.sum(weights), keys.LABEL_MEAN: (1. * 0 + 1.5 * 0 + 2. * 1 + 2.5 * 1) / np.sum(weights), keys.ACCURACY_BASELINE: (1. * 0 + 1.5 * 0 + 2. * 1 + 2.5 * 1) / np.sum(weights), keys.AUC: 0.5222, keys.AUC_PR: 0.6582, } tol = 1e-2 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) eval_metrics = head.metrics() updated_metrics = head.update_metrics( eval_metrics, features={'weights': weights}, logits=logits, labels=labels) # Assert metrics. self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}, rtol=tol, atol=tol) return # Create estimator spec. spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) @test_util.deprecated_graph_mode_only class BinaryClassHeadForEstimator(tf.test.TestCase): """Tests for create_estimator_spec running in Graph mode only.""" def test_invalid_trainable_variables(self): head = head_lib.BinaryClassHead() class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params return [ tf.strings.join([ tf.constant('my_train_op'), tf.strings.as_string(loss, precision=2) ]) ] def get_config(self): config = super(_Optimizer, self).get_config() return config with self.assertRaisesRegexp(ValueError, r'trainable_variables cannot be None'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (-41,),), dtype=np.float32), labels=np.array(((1,), (1,),), dtype=np.float64), optimizer=_Optimizer('my_optimizer'), trainable_variables=None) with self.assertRaisesRegexp( ValueError, r'trainable_variables should be a list or a tuple'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (-41,),), dtype=np.float32), labels=np.array(((1,), (1,),), dtype=np.float64), optimizer=_Optimizer('my_optimizer'), trainable_variables={ 'var_list': [tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)] }) def test_train_with_optimizer(self): head = head_lib.BinaryClassHead() logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42,),), dtype=np.float32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(0, 41) / 2 = 41 / 2 = 20.5 expected_loss = 20.5 class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return [tf.constant(expected_train_result)] def get_config(self): config = super(_Optimizer, self).get_config() return config # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=_Optimizer('my_optimizer'), trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) def test_train_with_update_ops(self): with tf.Graph().as_default(): w = tf.Variable(1) update_op = w.assign_add(1) t = tf.Variable('') expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return t.assign(expected_train_result) head = head_lib.BinaryClassHead() spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (-41,),), dtype=np.float32), labels=np.array(((1,), (1,),), dtype=np.float64), train_op_fn=_train_op_fn, update_ops=[update_op], trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) sess.run(spec.train_op) w_value, t_value = sess.run([w, t]) self.assertEqual(2, w_value) self.assertEqual(expected_train_result, t_value) def test_train_summaries_with_head_name(self): head = head_lib.BinaryClassHead(name='some_binary_head') logits = np.array(((45,), (-41,),), dtype=np.float32) labels = np.array(((1,), (1,),), dtype=np.float64) features = {'x': np.array(((42,),), dtype=np.float32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(0, 41) / 2 = 20.5 expected_loss = 20.5 def _train_op_fn(loss): del loss return tf.no_op() # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) summary_str = sess.run(spec.scaffold.summary_op) test_lib._assert_simple_summaries(self, { '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS): expected_loss, }, summary_str) def test_lookup_tables_in_graph(self): head = head_lib.BinaryClassHead(label_vocabulary=['aang', 'iroh']) feature_columns = [tf.feature_column.numeric_column('x')] est = dnn.DNNEstimatorV2( head=head, hidden_units=(2, 2), feature_columns=feature_columns, batch_norm=True) def input_fn(): return ({ 'x': np.array(((42,), (43,),), dtype=np.int32) }, [[b'iroh'], [b'iroh']]) # Train. num_steps = 1 est.train(input_fn, steps=num_steps) # Eval. eval_results = est.evaluate(input_fn, steps=num_steps) self.assertEqual(num_steps, eval_results[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, six.iterkeys(eval_results)) # Predict. est.predict(input_fn) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/head/head_utils.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utilities for heads and unit tests.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow_estimator.python.estimator.head import binary_class_head from tensorflow_estimator.python.estimator.head import multi_class_head _DEFAULT_SERVING_KEY = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY def binary_or_multi_class_head(n_classes, weight_column, label_vocabulary, loss_reduction): """Creates either binary or multi-class head. Args: n_classes: Number of label classes. weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Defines how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`. Returns: A `Head` instance. """ if n_classes == 2: head = binary_class_head.BinaryClassHead( weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) else: head = multi_class_head.MultiClassHead( n_classes, weight_column=weight_column, label_vocabulary=label_vocabulary, loss_reduction=loss_reduction) return head def _initialize_variables(test_case, scaffold): scaffold.finalize() test_case.assertIsNone(scaffold.init_feed_dict) test_case.assertIsNone(scaffold.init_fn) scaffold.init_op.run() scaffold.ready_for_local_init_op.eval() scaffold.local_init_op.run() scaffold.ready_op.eval() test_case.assertIsNotNone(scaffold.saver) def _assert_simple_summaries(test_case, expected_summaries, summary_str, tol=1e-6): """Assert summary the specified simple values. Args: test_case: test case. expected_summaries: Dict of expected tags and simple values. summary_str: Serialized `summary_pb2.Summary`. tol: Tolerance for relative and absolute. """ summary = tf.compat.v1.summary.Summary() summary.ParseFromString(summary_str) test_case.assertAllClose( expected_summaries, {v.tag: v.simple_value for v in summary.value}, rtol=tol, atol=tol) def _assert_no_hooks(test_case, spec): test_case.assertAllEqual([], spec.training_chief_hooks) test_case.assertAllEqual([], spec.training_hooks) ================================================ FILE: tensorflow_estimator/python/estimator/head/multi_class_head.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Multi class head.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import lookup_ops from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.head import base_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys from tensorflow_estimator.python.estimator.util import tf_keras_v2 @estimator_export('estimator.MultiClassHead') class MultiClassHead(base_head.Head): """Creates a `Head` for multi class classification. Uses `sparse_softmax_cross_entropy` loss. The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. In many applications, the shape is `[batch_size, n_classes]`. `labels` must be a dense `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string `Tensor` with values from the vocabulary. If `label_vocabulary` is not given, `labels` must be an integer `Tensor` with values specifying the class index. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. The loss is the weighted sum over the input dimensions. Namely, if the input labels have shape `[batch_size, 1]`, the loss is the weighted sum over `batch_size`. Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features, loss_reduction)` as arguments and returns unreduced loss with shape `[D0, D1, ... DN, 1]`. `loss_fn` must support integer `labels` with shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to the input labels before passing them to `loss_fn`. Usage: >>> n_classes = 3 >>> head = tf.estimator.MultiClassHead(n_classes) >>> logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) >>> labels = np.array(((1,), (1,)), dtype=np.int64) >>> features = {'x': np.array(((42,),), dtype=np.int32)} >>> # expected_loss = sum(cross_entropy(labels, logits)) / batch_size >>> # = sum(10, 0) / 2 = 5. >>> loss = head.loss(labels, logits, features=features) >>> print('{:.2f}'.format(loss.numpy())) 5.00 >>> eval_metrics = head.metrics() >>> updated_metrics = head.update_metrics( ... eval_metrics, features, logits, labels) >>> for k in sorted(updated_metrics): ... print('{} : {:.2f}'.format(k, updated_metrics[k].result().numpy())) accuracy : 0.50 average_loss : 5.00 >>> preds = head.predictions(logits) >>> print(preds['logits']) tf.Tensor( [[10. 0. 0.] [ 0. 10. 0.]], shape=(2, 3), dtype=float32) Usage with a canned estimator: ```python my_head = tf.estimator.MultiClassHead(n_classes=3) my_estimator = tf.estimator.DNNEstimator( head=my_head, hidden_units=..., feature_columns=...) ``` It can also be used with a custom `model_fn`. Example: ```python def _my_model_fn(features, labels, mode): my_head = tf.estimator.MultiClassHead(n_classes=3) logits = tf_keras.Model(...)(features) return my_head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=tf_keras.optimizers.Adagrad(lr=0.1), logits=logits) my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn) ``` Args: n_classes: Number of classes, must be greater than 2 (for 2 classes, use `BinaryClassHead`). weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. label_vocabulary: A list or tuple of strings representing possible label values. If it is not given, that means labels are already encoded as an integer within [0, n_classes). If given, labels must be of string type and have any value in `label_vocabulary`. Note that errors will be raised if `label_vocabulary` is not provided but labels are strings. If both `n_classes` and `label_vocabulary` are provided, `label_vocabulary` should contain exactly `n_classes` items. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Decides how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by `batch size * label_dimension`. loss_fn: Optional loss function. name: Name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. """ def __init__(self, n_classes, weight_column=None, label_vocabulary=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, loss_fn=None, name=None): if n_classes is None: raise ValueError('n_classes cannot be None') if label_vocabulary is not None and not isinstance(label_vocabulary, (list, tuple)): raise ValueError( 'label_vocabulary should be a list or a tuple. Given type: {}'.format( type(label_vocabulary))) if label_vocabulary is not None and len(label_vocabulary) != n_classes: raise ValueError( '"label_vocabulary" does not have "n_classes" items. ' 'len(label_vocabulary)={}, n_classes={}, label_vocabulary={}'.format( len(label_vocabulary), n_classes, label_vocabulary)) base_head.validate_loss_reduction(loss_reduction) if loss_fn: base_head.validate_loss_fn_args(loss_fn) self._n_classes = base_head.validate_n_classes(n_classes) self._weight_column = weight_column self._label_vocabulary = label_vocabulary self._loss_reduction = loss_reduction self._loss_fn = loss_fn self._name = name # Metric keys. keys = metric_keys.MetricKeys self._loss_mean_key = self._summary_key(keys.LOSS_MEAN) self._accuracy_key = self._summary_key(keys.ACCURACY) self._loss_regularization_key = self._summary_key(keys.LOSS_REGULARIZATION) @property def name(self): """See `base_head.Head` for details.""" return self._name @property def logits_dimension(self): """See `base_head.Head` for details.""" return self._n_classes @property def loss_reduction(self): """See `base_head.Head` for details.""" return self._loss_reduction # Attributes for lookup tables in Eager execution. Note that for Graph # execution, the lookup tables are created on demanded to make sure the # lookup table is in the same graph as its input tensors for `train` and # 'eval' of Estimator (as Estimator recreates graphs for `train`, `eval` and # `predict`). _cached_class_id_table = None _cached_class_string_table = None @property def _class_id_table(self): """Creates a lookup table for class_id. In eager execution, this lookup table will be lazily created on the first call of `self._class_id_table`, and cached for later use; In graph execution, it will be created on demand. Returns: A hash table for lookup. """ if self._cached_class_id_table is None or not tf.executing_eagerly(): self._cached_class_id_table = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup') return self._cached_class_id_table @property def _class_string_table(self): """Creates a lookup table for class_string. In eager execution, this lookup table will be lazily created on the first call of `self._class_string_table` and cached for later use; In graph execution, it will be created on demand. Returns: A hash table for lookup. """ if (self._cached_class_string_table is None or not tf.executing_eagerly()): self._cached_class_string_table = ( lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup')) return self._cached_class_string_table def _processed_labels(self, logits, labels): """Converts labels to integer id space.""" labels = base_head.check_dense_labels_match_logits_and_reshape( labels=labels, logits=logits, expected_labels_dimension=1) if self._label_vocabulary is None: if not labels.dtype.is_integer: raise ValueError( 'Labels dtype should be integer. Instead got {}.'.format( labels.dtype)) label_ids = labels else: if labels.dtype != tf.dtypes.string: raise ValueError('Labels dtype should be string if there is a ' 'vocabulary. Instead got {}'.format(labels.dtype)) label_ids = self._class_id_table.lookup(labels) return base_head.check_label_range(label_ids, self._n_classes) def _unweighted_loss_and_weights(self, logits, label_ids, features): """Computes loss spec.""" if self._loss_fn: unweighted_loss = base_head.call_loss_fn( loss_fn=self._loss_fn, labels=label_ids, logits=logits, features=features, expected_loss_dim=1) else: unweighted_loss = tf.compat.v1.losses.sparse_softmax_cross_entropy( labels=label_ids, logits=logits, reduction=tf.compat.v1.losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. unweighted_loss = tf.compat.v1.expand_dims(unweighted_loss, axis=-1) weights = base_head.get_weights_and_check_match_logits( features=features, weight_column=self._weight_column, logits=logits) return unweighted_loss, weights def loss(self, labels, logits, features=None, mode=None, regularization_losses=None): """Returns regularized training loss. See `base_head.Head` for details.""" del mode # Unused for this head. with ops.name_scope( 'losses', values=(logits, labels, regularization_losses, features)): logits = base_head.check_logits_final_dim(logits, self.logits_dimension) label_ids = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, label_ids, features) training_loss = tf_keras_v2.__internal__.losses.compute_weighted_loss( unweighted_loss, sample_weight=weights, reduction=self._loss_reduction) regularization_loss = tf.math.add_n( regularization_losses) if regularization_losses is not None else None regularized_training_loss = ( training_loss + regularization_loss if regularization_loss is not None else training_loss) return regularized_training_loss def predictions(self, logits, keys=None): """Return predictions based on keys. See `base_head.Head` for details. Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. keys: a list or tuple of prediction keys. Each key can be either the class variable of prediction_keys.PredictionKeys or its string value, such as: prediction_keys.PredictionKeys.CLASSES or 'classes'. If not specified, it will return the predictions for all valid keys. Returns: A dict of predictions. """ pred_keys = prediction_keys.PredictionKeys valid_keys = [ pred_keys.LOGITS, pred_keys.PROBABILITIES, pred_keys.CLASS_IDS, pred_keys.CLASSES, pred_keys.ALL_CLASS_IDS, pred_keys.ALL_CLASSES ] if keys: base_head.check_prediction_keys(keys, valid_keys) else: keys = valid_keys logits = base_head.check_logits_final_dim(logits, self.logits_dimension) predictions = {} with ops.name_scope('predictions', values=(logits,)): if pred_keys.LOGITS in keys: predictions[pred_keys.LOGITS] = logits if pred_keys.PROBABILITIES in keys: probabilities = tf.compat.v1.nn.softmax( logits, name=pred_keys.PROBABILITIES) predictions[pred_keys.PROBABILITIES] = probabilities if pred_keys.CLASS_IDS in keys or pred_keys.CLASSES in keys: # class_ids's shape is [D0, D1, ... DN]. class_ids = tf.compat.v1.math.argmax( logits, axis=-1, name=pred_keys.CLASS_IDS) # Expand to [batch_size, 1]. class_ids = tf.compat.v1.expand_dims(class_ids, axis=-1) if pred_keys.CLASS_IDS in keys: predictions[pred_keys.CLASS_IDS] = class_ids if pred_keys.CLASSES in keys: if self._label_vocabulary: classes = self._class_string_table.lookup(class_ids) else: classes = tf.strings.as_string(class_ids, name='str_classes') predictions[pred_keys.CLASSES] = classes if pred_keys.ALL_CLASS_IDS in keys: predictions[pred_keys.ALL_CLASS_IDS] = base_head.all_class_ids( logits, n_classes=self._n_classes) if pred_keys.ALL_CLASSES in keys: predictions[pred_keys.ALL_CLASSES] = base_head.all_classes( logits, n_classes=self._n_classes, label_vocabulary=self._label_vocabulary) return predictions def metrics(self, regularization_losses=None): """Creates metrics. See `base_head.Head` for details.""" keys = metric_keys.MetricKeys with ops.name_scope('metrics', values=(regularization_losses,)): # Mean metric. eval_metrics = {} eval_metrics[self._loss_mean_key] = tf_keras.metrics.Mean( name=keys.LOSS_MEAN) if regularization_losses is not None: eval_metrics[self._loss_regularization_key] = tf_keras.metrics.Mean( name=keys.LOSS_REGULARIZATION) # Accuracy metric. eval_metrics[self._accuracy_key] = tf_keras.metrics.Accuracy( name=keys.ACCURACY) return eval_metrics def update_metrics(self, eval_metrics, features, logits, labels, regularization_losses=None): """Updates eval metrics. See `base_head.Head` for details.""" preds = self.predictions(logits) class_ids = preds[prediction_keys.PredictionKeys.CLASS_IDS] logits = base_head.check_logits_final_dim(logits, self.logits_dimension) label_ids = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, label_ids, features) # Update metrics. eval_metrics[self._loss_mean_key].update_state( values=unweighted_loss, sample_weight=weights) eval_metrics[self._accuracy_key].update_state( y_true=label_ids, y_pred=class_ids, sample_weight=weights) if regularization_losses is not None: regularization_loss = tf.math.add_n(regularization_losses) eval_metrics[self._loss_regularization_key].update_state( values=regularization_loss) return eval_metrics def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): """Returns a `model_fn._TPUEstimatorSpec`. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. mode: Estimator's `ModeKeys`. logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. labels: Labels integer or string `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to use the default `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to avoid scaling errors. Returns: A `model_fn._TPUEstimatorSpec` instance. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. """ with ops.name_scope(self._name, 'head'): # Predict. pred_keys = prediction_keys.PredictionKeys predictions = self.predictions(logits) if mode == ModeKeys.PREDICT: probabilities = predictions[pred_keys.PROBABILITIES] classifier_output = base_head.classification_output( scores=probabilities, n_classes=self._n_classes, label_vocabulary=self._label_vocabulary) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.PREDICT, predictions=predictions, export_outputs={ base_head.DEFAULT_SERVING_KEY: classifier_output, base_head.CLASSIFY_SERVING_KEY: classifier_output, base_head.PREDICT_SERVING_KEY: export_output.PredictOutput(predictions) }) regularized_training_loss = self.loss( logits=logits, labels=labels, features=features, mode=mode, regularization_losses=regularization_losses) # Eval. if mode == ModeKeys.EVAL: eval_metrics = self.metrics(regularization_losses=regularization_losses) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.EVAL, predictions=predictions, loss=regularized_training_loss, eval_metrics=base_head.create_eval_metrics_tuple( self.update_metrics, { 'eval_metrics': eval_metrics, 'features': features, 'logits': logits, 'labels': labels, 'regularization_losses': regularization_losses })) # Train. train_op = base_head.create_estimator_spec_train_op( head_name=self._name, optimizer=optimizer, train_op_fn=train_op_fn, update_ops=update_ops, trainable_variables=trainable_variables, regularized_training_loss=regularized_training_loss, loss_reduction=self._loss_reduction) # Create summary. base_head.create_estimator_spec_summary( regularized_training_loss=regularized_training_loss, regularization_losses=regularization_losses, summary_key_fn=self._summary_key) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, train_op=train_op) ================================================ FILE: tensorflow_estimator/python/estimator/head/multi_class_head_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for multi_class_head.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.head import head_utils as test_lib from tensorflow_estimator.python.estimator.head import multi_class_head as head_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys @test_util.run_all_in_graph_and_eager_modes class MultiClassHead(tf.test.TestCase): def test_n_classes_is_none(self): with self.assertRaisesRegexp(ValueError, 'n_classes cannot be None'): head_lib.MultiClassHead(n_classes=None) def test_n_classes_is_2(self): with self.assertRaisesRegexp(ValueError, 'n_classes must be > 2'): head_lib.MultiClassHead(n_classes=2) def test_invalid_loss_reduction(self): with self.assertRaisesRegexp( ValueError, r'Invalid loss_reduction: invalid_loss_reduction'): head_lib.MultiClassHead( n_classes=3, loss_reduction='invalid_loss_reduction') with self.assertRaisesRegexp(ValueError, r'Invalid loss_reduction: none'): head_lib.MultiClassHead( n_classes=3, loss_reduction=tf.losses.Reduction.NONE) def test_loss_fn_arg_labels_missing(self): def _loss_fn(logits): del logits # Unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: labels\. ' r'Given arguments: \(\'logits\',\)'): head_lib.MultiClassHead(n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_logits_missing(self): def _loss_fn(labels): del labels # unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: logits\. ' r'Given arguments: \(\'labels\',\)'): head_lib.MultiClassHead(n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_features_ok(self): def _loss_fn(labels, logits, features): del labels, logits, features # Unused head_lib.MultiClassHead(n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_invalid(self): def _loss_fn(labels, logits, name=None): del labels, logits, name # Unused with self.assertRaisesRegexp(ValueError, r'loss_fn has unexpected args: \[\'name\'\]'): head_lib.MultiClassHead(n_classes=3, loss_fn=_loss_fn) def test_invalid_logits_shape(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) # Logits should be shape (batch_size, 3). logits_2x2 = np.array(( (45., 44.), (41., 42.), )) pred_key = prediction_keys.PredictionKeys.PROBABILITIES # Static shape. with self.assertRaisesRegexp(ValueError, 'logits shape'): preds = head.predictions(logits_2x2, [pred_key]) self.evaluate(preds[pred_key]) if tf.executing_eagerly(): return # Dynamic shape only works in Graph mode. logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': np.array(( (30.,), (42.,), ))}, mode=ModeKeys.PREDICT, logits=logits_placeholder, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.predictions[pred_key].eval({logits_placeholder: logits_2x2}) def test_invalid_labels_shape(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) # Logits should be shape (batch_size, 3). # Labels should be shape (batch_size, 1). labels_2x2 = np.array(( (1, 2), (0, 1), ), dtype=int) logits_2x3 = np.array(( (1., 2., 3.), (1., 2., 3.), )) features = {'x': np.array(((42.,),))} # Static shape. with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): training_loss = head.loss( logits=logits_2x3, labels=labels_2x2, features=features, mode=ModeKeys.EVAL) self.evaluate(training_loss) if tf.executing_eagerly(): return # Dynamic shape only works in Graph mode. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.int64) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.loss( logits=logits_placeholder, labels=labels_placeholder, features=features, mode=ModeKeys.EVAL) with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'): training_loss.eval({ logits_placeholder: logits_2x3, labels_placeholder: labels_2x2 }) def test_invalid_labels_type(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) # Logits should be shape (batch_size, 3). # Labels should be shape (batch_size, 1). labels_2x1 = np.array(( (1.,), (1.,), )) logits_2x3 = np.array(( (1., 2., 3.), (1., 2., 3.), )) features = {'x': np.array(((42.,),))} # Static shape. with self.assertRaisesRegexp(ValueError, 'Labels dtype'): head.loss( logits=logits_2x3, labels=labels_2x1, features=features, mode=ModeKeys.EVAL) if tf.executing_eagerly(): return # Dynamic shape only works in Graph mode. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) with self.assertRaisesRegexp(ValueError, 'Labels dtype'): head.loss( logits=logits_placeholder, labels=labels_placeholder, features=features, mode=ModeKeys.EVAL) def test_invalid_labels_values(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) labels_2x1_with_large_id = np.array(( (45,), (1,), ), dtype=int) labels_2x1_with_negative_id = np.array(( (-5,), (1,), ), dtype=int) logits_2x3 = np.array(( (1., 2., 4.), (1., 2., 3.), )) features = {'x': np.array(((42.,),))} if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'Labels must be <= 3 - 1'): training_loss = head.loss( logits=logits_2x3, labels=labels_2x1_with_large_id, features=features, mode=ModeKeys.EVAL) with self.assertRaisesRegexp(ValueError, 'Labels must be >= 0'): training_loss = head.loss( logits=logits_2x3, labels=labels_2x1_with_negative_id, features=features, mode=ModeKeys.EVAL) return # Dynamic shape only works in Graph mode. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.int64) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.loss( logits=logits_placeholder, labels=labels_placeholder, features=features, mode=ModeKeys.EVAL) with self.cached_session(): with self.assertRaisesOpError('Labels must be <= n_classes - 1'): training_loss.eval({ labels_placeholder: labels_2x1_with_large_id, logits_placeholder: logits_2x3 }) with self.cached_session(): with self.assertRaisesOpError('Labels must be >= 0'): training_loss.eval({ labels_placeholder: labels_2x1_with_negative_id, logits_placeholder: logits_2x3 }) def test_invalid_labels_sparse_tensor(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) labels_2x1 = tf.sparse.SparseTensor( values=['english', 'italian'], indices=[[0, 0], [1, 0]], dense_shape=[2, 1]) logits_2x3 = np.array(( (1., 2., 4.), (1., 2., 3.), )) with self.assertRaisesRegexp(ValueError, 'SparseTensor labels are not supported.'): loss = head.loss( logits=logits_2x3, labels=labels_2x1, features={'x': np.array(((42.,),))}, mode=ModeKeys.EVAL) self.evaluate(loss) def test_incompatible_labels_shape(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) # Logits should be shape (batch_size, 3). # Labels should be shape (batch_size, 1). # Here batch sizes are different. values_3x1 = np.array(( (1,), (1,), (1,), )) values_2x3 = np.array(( (1., 2., 3.), (1., 2., 3.), )) features = {'x': values_2x3} # Static shape. # Eager mode. if tf.executing_eagerly(): with self.assertRaisesRegex(ValueError, 'labels shape'): head.loss( logits=values_2x3, labels=values_3x1, features=features, mode=ModeKeys.EVAL) return # Graph mode. with self.assertRaisesRegex(ValueError, r'shape.*\(3,\).*\(2, 3\)'): head.loss( logits=values_2x3, labels=values_3x1, features=features, mode=ModeKeys.EVAL) # Dynamic shape only works in Graph mode. labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.int64) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) training_loss = head.loss( logits=logits_placeholder, labels=labels_placeholder, features=features, mode=ModeKeys.EVAL) with self.cached_session(): with self.assertRaisesRegex( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'): training_loss.eval({ labels_placeholder: values_3x1, logits_placeholder: values_2x3 }) def test_predict(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) logits = [[1., 0., 0.], [0., 0., 1.]] expected_probabilities = [[0.576117, 0.2119416, 0.2119416], [0.2119416, 0.2119416, 0.576117]] expected_class_ids = [[0], [2]] expected_all_class_ids = [[0, 1, 2]] * 2 expected_classes = [[b'0'], [b'2']] expected_all_classes = [[b'0', b'1', b'2']] * 2 expected_export_classes = [[b'0', b'1', b'2']] * 2 keys = prediction_keys.PredictionKeys preds = head.predictions(logits) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) self.assertAllClose(expected_probabilities, self.evaluate(preds[keys.PROBABILITIES])) self.assertAllClose(expected_class_ids, self.evaluate(preds[keys.CLASS_IDS])) self.assertAllEqual(expected_classes, self.evaluate(preds[keys.CLASSES])) self.assertAllClose(expected_all_class_ids, self.evaluate(preds[keys.ALL_CLASS_IDS])) self.assertAllEqual(expected_all_classes, self.evaluate(preds[keys.ALL_CLASSES])) if tf.executing_eagerly(): return spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertItemsEqual( (test_lib._DEFAULT_SERVING_KEY, 'predict', 'classification'), spec.export_outputs.keys()) # Assert predictions and export_outputs. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(logits, predictions[keys.LOGITS]) self.assertAllClose(expected_probabilities, predictions[keys.PROBABILITIES]) self.assertAllClose(expected_class_ids, predictions[keys.CLASS_IDS]) self.assertAllEqual(expected_classes, predictions[keys.CLASSES]) self.assertAllClose(expected_all_class_ids, predictions[keys.ALL_CLASS_IDS]) self.assertAllEqual(expected_all_classes, predictions[keys.ALL_CLASSES]) self.assertAllClose( expected_probabilities, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].scores)) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].classes)) def test_predict_with_tensor_n_classes(self): n_classes = tf.constant(3, dtype=tf.dtypes.int32) head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) logits = [[1., 0., 0.], [0., 0., 1.]] expected_probabilities = [[0.576117, 0.2119416, 0.2119416], [0.2119416, 0.2119416, 0.576117]] expected_class_ids = [[0], [2]] expected_all_class_ids = [[0, 1, 2]] * 2 expected_classes = [[b'0'], [b'2']] expected_all_classes = [[b'0', b'1', b'2']] * 2 expected_export_classes = [[b'0', b'1', b'2']] * 2 keys = prediction_keys.PredictionKeys preds = head.predictions(logits) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) self.assertAllClose(expected_probabilities, self.evaluate(preds[keys.PROBABILITIES])) self.assertAllClose(expected_class_ids, self.evaluate(preds[keys.CLASS_IDS])) self.assertAllEqual(expected_classes, self.evaluate(preds[keys.CLASSES])) self.assertAllClose(expected_all_class_ids, self.evaluate(preds[keys.ALL_CLASS_IDS])) self.assertAllEqual(expected_all_classes, self.evaluate(preds[keys.ALL_CLASSES])) if tf.executing_eagerly(): return spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertItemsEqual( (test_lib._DEFAULT_SERVING_KEY, 'predict', 'classification'), spec.export_outputs.keys()) # Assert predictions and export_outputs. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(logits, predictions[keys.LOGITS]) self.assertAllClose(expected_probabilities, predictions[keys.PROBABILITIES]) self.assertAllClose(expected_class_ids, predictions[keys.CLASS_IDS]) self.assertAllEqual(expected_classes, predictions[keys.CLASSES]) self.assertAllClose(expected_all_class_ids, predictions[keys.ALL_CLASS_IDS]) self.assertAllEqual(expected_all_classes, predictions[keys.ALL_CLASSES]) self.assertAllClose( expected_probabilities, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].scores)) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].classes)) def test_predict_with_invalid_keys(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) logits = [[1., 0., 0.], [0., 0., 1.]] with self.assertRaisesRegexp( ValueError, r'Prediction key must be in PredictionKeys, given: some_invalid_key'): preds = head.predictions(logits, ['some_invalid_key']) self.evaluate(preds) def test_predict_with_vocabulary_list(self): n_classes = 3 head = head_lib.MultiClassHead( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[1., 0., 0.], [0., 0., 1.]] expected_classes = [[b'aang'], [b'zuko']] expected_export_classes = [[b'aang', b'iroh', b'zuko']] * 2 pred_key = prediction_keys.PredictionKeys.CLASSES if tf.executing_eagerly(): preds = head.predictions(logits, [pred_key]) self.assertAllEqual(expected_classes, preds[prediction_keys.PredictionKeys.CLASSES]) return spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertAllEqual(expected_classes, sess.run(spec.predictions[pred_key])) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].classes)) def test_weight_should_not_impact_prediction(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes, weight_column='label_weights') logits = [[1., 0., 0.], [0., 0., 1.]] expected_probabilities = [[0.576117, 0.2119416, 0.2119416], [0.2119416, 0.2119416, 0.576117]] weights_2x1 = [[1.], [2.]] features = { 'x': np.array(((42,),), dtype=np.int32), 'label_weights': weights_2x1, } keys = prediction_keys.PredictionKeys preds = head.predictions(logits, [keys.LOGITS, keys.PROBABILITIES]) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) self.assertAllClose(expected_probabilities, self.evaluate(preds[keys.PROBABILITIES])) if tf.executing_eagerly(): return spec = head.create_estimator_spec( features=features, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) predictions = sess.run(spec.predictions) self.assertAllClose(logits, predictions[keys.LOGITS]) self.assertAllClose(expected_probabilities, predictions[keys.PROBABILITIES]) def test_eval_create_loss(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) # logits: [2, 3], labels: [2, 1] logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size = 10 / 2 = 5. expected_training_loss = 5. # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose( expected_training_loss, self.evaluate(training_loss), rtol=1e-2, atol=1e-2) def test_eval_create_loss_loss_fn(self): """Tests head.loss for eval mode and custom loss_fn.""" loss = np.array([[1.], [2.]], dtype=np.float32) logits_input = np.array([[-10., 10., 0.], [-15., 10., 0]], dtype=np.float32) labels_input = np.array([[1], [2]], dtype=np.int64) def _loss_fn(labels, logits): check_labels = tf.debugging.Assert( tf.reduce_all(tf.math.equal(labels, labels_input)), data=[labels]) check_logits = tf.debugging.Assert( tf.reduce_all(tf.math.equal(logits, logits_input)), data=[logits]) with tf.control_dependencies([check_labels, check_logits]): return tf.constant(loss) head = head_lib.MultiClassHead(n_classes=3, loss_fn=_loss_fn) actual_training_loss = head.loss( logits=logits_input, labels=labels_input, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL) self.assertAllClose(np.sum(loss) / 2., self.evaluate(actual_training_loss)) def test_eval_create_loss_loss_fn_wrong_shape(self): """Tests custom loss_fn that returns Tensor of unexpected shape.""" loss = np.array([1., 2.], dtype=np.float32) def _loss_fn(labels, logits): del labels, logits # Unused return tf.constant(loss) head = head_lib.MultiClassHead(n_classes=3, loss_fn=_loss_fn) logits = np.array([[-10., 10., 0.], [-15., 10., 0.]], dtype=np.float32) labels = np.array([[1], [2]], dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'loss_shape'): head.loss(logits=logits, labels=labels, features=features) else: actual_training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] ' r'\[logits_shape: \] \[2 3\] \[loss_shape: \] \[2\]'): self.evaluate(actual_training_loss) def test_eval_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib.MultiClassHead(n_classes=3) with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.loss( logits=np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32), labels=None, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL) def test_eval(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. expected_loss = 5. keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss, keys.ACCURACY: 0.5, # 1 of 2 labels is correct. } tol = 1e-2 if tf.executing_eagerly(): eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}, rtol=tol, atol=tol) loss = head.loss(labels, logits, features=features, mode=ModeKeys.EVAL) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_eval_metric_ops_with_head_name(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes, name='some_multiclass_head') logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} expected_metric_keys = [ '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS_MEAN), '{}/some_multiclass_head'.format(metric_keys.MetricKeys.ACCURACY) ] eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual(expected_metric_keys, updated_metrics.keys()) def test_eval_with_regularization_losses(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. expected_unregularized_loss = 5. expected_regularized_loss = ( expected_unregularized_loss + expected_regularization_loss) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_unregularized_loss, keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.ACCURACY: 0.5, # 1 of 2 labels is correct. } tol = 1e-2 if tf.executing_eagerly(): eval_metrics = head.metrics(regularization_losses=regularization_losses) updated_metrics = head.update_metrics( eval_metrics, features, logits, labels, regularization_losses=regularization_losses) # Assert metrics. self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}, rtol=tol, atol=tol) return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_regularized_loss, loss, rtol=tol, atol=tol) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_eval_with_label_vocabulary_create_loss(self): n_classes = 3 head = head_lib.MultiClassHead( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[10., 0, 0], [0, 10, 0]] labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size = [5.0, 0]. expected_training_loss = 5. if tf.executing_eagerly(): training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose( expected_training_loss, training_loss, rtol=1e-2, atol=1e-2) else: training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_eval_with_label_vocabulary(self): n_classes = 3 head = head_lib.MultiClassHead( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[10., 0, 0], [0, 10, 0]] labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. expected_loss = 5. keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss, keys.ACCURACY: 0.5, # 1 of 2 labels is correct. } tol = 1e-2 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose( expected_loss, self.evaluate(loss), rtol=tol, atol=tol) eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}, rtol=tol, atol=tol) return spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_weighted_multi_example_eval(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes, weight_column='label_weights') # Create estimator spec. logits = np.array(( (10, 0, 0), (0, 10, 0), (0, 0, 10), ), dtype=np.float32) labels = np.array(((1,), (2,), (2,)), dtype=np.int64) weights_3x1 = np.array(((1.,), (2.,), (3.,)), dtype=np.float64) # weighted_loss = sum(cross_entropy(labels, logits) * weights) # = sum([10, 10, 0] * [1, 2, 3]) # = sum([10, 20, 0]) = 30. # loss = weighted_loss / batch_size = 30 / 3 = 10 # loss_mean = weighted_loss / sum(weights) = 30 / 6 = 5 expected_loss = 10. features = { 'x': np.array(((42,),), dtype=np.int32), 'label_weights': weights_3x1 } keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: 30. / np.sum(weights_3x1), # Weighted accuracy is 1 * 3.0 / sum weights = 0.5 keys.ACCURACY: 0.5, } tol = 1e-2 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}, rtol=tol, atol=tol) return spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_train_create_loss(self): head = head_lib.MultiClassHead(n_classes=3) logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # unreduced_loss = cross_entropy(labels, logits) = [10, 0]. expected_unreduced_loss = [[10.], [0.]] # Weights default to 1. expected_weights = 1. # training_loss = (1 * 10 + 1 * 0) / 2 = 5. expected_training_loss = 5. tol = 1e-2 if tf.executing_eagerly(): training_loss = head.loss(labels, logits, features) self.assertAllClose( expected_training_loss, training_loss, rtol=tol, atol=tol) unreduced_loss, actual_weights = head._unweighted_loss_and_weights( logits, labels, features) self.assertAllClose( expected_unreduced_loss, unreduced_loss, rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights) return training_loss = head.loss(labels, logits, features) unreduced_loss, actual_weights = head._unweighted_loss_and_weights( logits, labels, features) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) self.assertAllClose( expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights) def test_train_create_loss_loss_reduction(self): """Tests create_loss with loss_reduction.""" head = head_lib.MultiClassHead( n_classes=3, loss_reduction=tf.losses.Reduction.SUM) logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # unreduced_loss = cross_entropy(labels, logits) = [10, 0]. expected_unreduced_loss = [[10.], [0.]] # Weights default to 1. expected_weights = 1. # training_loss = 1 * 10 + 1 * 0 expected_training_loss = 10. tol = 1e-2 if tf.executing_eagerly(): training_loss = head.loss(labels, logits, features) self.assertAllClose( expected_training_loss, training_loss, rtol=tol, atol=tol) unreduced_loss, actual_weights = head._unweighted_loss_and_weights( logits, labels, features) self.assertAllClose( expected_unreduced_loss, unreduced_loss, rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights) return training_loss = head.loss(labels, logits, features) unreduced_loss, actual_weights = head._unweighted_loss_and_weights( logits, labels, features) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) self.assertAllClose( expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol) self.assertAllClose(expected_weights, actual_weights) def test_train_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib.MultiClassHead(n_classes=3) with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.loss( logits=np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32), labels=None, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN) def test_train(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. expected_loss = 5. tol = 1e-2 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str, tol) def test_train_with_regularization_losses(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. # loss = unregularized_loss + regularization_loss = 7. expected_loss = 7. tol = 1e-2 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN, regularization_losses=regularization_losses) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_REGULARIZATION: (expected_regularization_loss), }, summary_str, tol) def test_train_one_dim_create_loss(self): """Tests create_loss with 1D labels and weights (shape [batch_size]).""" head = head_lib.MultiClassHead(n_classes=3, weight_column='label_weights') logits = np.array(( (10, 0, 0), (0, 10, 0), (0, 0, 10), ), dtype=np.float32) labels_rank_1 = np.array(( 1, 2, 2, ), dtype=np.int64) weights_rank_1 = np.array(( 1., 2., 3., ), dtype=np.float64) features = { 'x': np.array(((42,),), dtype=np.float32), 'label_weights': weights_rank_1 } # unreduced_loss = cross_entropy(labels, logits) = [10, 10, 0]. # weights are reshaped to [3, 1] to match logits. # training_loss = sum(1 * 10 + 2 * 10 + 3 * 0) / batch_size = 30. / 3 = 10. expected_training_loss = 10. tol = 1e-2 if tf.executing_eagerly(): training_loss = head.loss(labels_rank_1, logits, features) self.assertAllClose( expected_training_loss, training_loss, rtol=tol, atol=tol) return training_loss = head.loss(labels_rank_1, logits, features) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) def test_train_one_dim(self): """Tests train with 1D labels and weights (shape [batch_size]).""" head = head_lib.MultiClassHead(n_classes=3, weight_column='label_weights') logits = np.array(( (10, 0, 0), (0, 10, 0), (0, 0, 10), ), dtype=np.float32) labels_rank_1 = np.array(( 1, 2, 2, ), dtype=np.int64) weights_rank_1 = np.array(( 1., 2., 3., ), dtype=np.float64) self.assertEqual((3,), labels_rank_1.shape) self.assertEqual((3,), weights_rank_1.shape) # loss = sum(cross_entropy(labels, logits) * [1, 2, 3]) / batch_size # = sum([10, 10, 0] * [1, 2, 3]) / 3 = 30 / 3 = 10. expected_loss = 10. features = { 'x': np.array(((42,),), dtype=np.float32), 'label_weights': weights_rank_1 } tol = 1e-2 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels_rank_1, features=features, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str, tol) def test_train_with_vocabulary_create_loss(self): n_classes = 3 head = head_lib.MultiClassHead( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[10., 0, 0], [0, 10, 0]] labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size = 10 / 2 = 5. expected_training_loss = 5. if tf.executing_eagerly(): training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose( expected_training_loss, training_loss, rtol=1e-2, atol=1e-2) return training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2) def test_train_with_vocabulary(self): n_classes = 3 head = head_lib.MultiClassHead( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) logits = [[10., 0, 0], [0, 10, 0]] labels = [[b'iroh'], [b'iroh']] features = {'x': np.array(((42,),), dtype=np.int32)} # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. expected_loss = 5. tol = 1e-2 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) return def _train_op_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss = sess.run(spec.loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) def test_weighted_multi_example_train(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes, weight_column='label_weights') logits = np.array(( (10, 0, 0), (0, 10, 0), (0, 0, 10), ), dtype=np.float32) labels = np.array(((1,), (2,), (2,)), dtype=np.int64) weights_3x1 = np.array(((1.,), (2.,), (3.,)), dtype=np.float64) expected_train_result = 'my_train_op' # loss = sum(cross_entropy(labels, logits) * [1, 2, 3]) / batch_size # = sum([10, 10, 0] * [1, 2, 3]) / 3 = 30 / 3 = 10 expected_loss = 10. tol = 1e-2 features = { 'x': np.array(((42,),), dtype=np.float32), 'label_weights': weights_3x1 } if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) return def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str, tol) def test_multi_dim_weighted_train_create_loss(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" head = head_lib.MultiClassHead(n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # unreduced_loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]]. # weights are reshaped to [2, 2, 1] to match logits. # training_loss = sum(1*0 + 1.5*12 + 2*0 + 2.5*15) / batch_size # = 55.5 / (2*2) = 13.875 expected_training_loss = 13.875 tol = 1e-2 if tf.executing_eagerly(): training_loss = head.loss(labels, logits, features={'weights': weights}) self.assertAllClose( expected_training_loss, training_loss, rtol=tol, atol=tol) return training_loss = head.loss(labels, logits, features={'weights': weights}) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose( expected_training_loss, training_loss.eval(), rtol=tol, atol=tol) def test_multi_dim_weighted_train(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" head = head_lib.MultiClassHead(n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) tol = 1e-2 # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]]. # weighted_sum_loss = (1*0 + 1.5*12 + 2*0 + 2.5*15) = 55.5 # training_loss = weighted_sum_loss / batch_size = 55.5 / (2*2) = 13.875 expected_loss = 13.875 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) def test_multi_dim_train_weights_wrong_inner_dim(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 1].""" head = head_lib.MultiClassHead(n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[1.], [2.]], dtype=np.float32) if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'weights shape'): head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) return def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'): spec.loss.eval() def test_multi_dim_train_weights_wrong_outer_dim(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 3].""" head = head_lib.MultiClassHead(n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[[1., 1.1, 1.2], [1.5, 1.6, 1.7]], [[2., 2.1, 2.2], [2.5, 2.6, 2.7]]]) if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'weights shape'): head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) return weights_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) def _no_op_train_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights_placeholder}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 3\]'): spec.loss.eval({weights_placeholder: weights}) def test_multi_dim_weighted_eval(self): """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2].""" head = head_lib.MultiClassHead(n_classes=3, weight_column='weights') logits = np.array([[[10, 0, 0], [12, 0, 0]], [[0, 10, 0], [0, 15, 0]]], dtype=np.float32) labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]]. # weighted_sum_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5 # training_loss = weighted_sum_loss / batch_size = 55.5 / (2*2) = 13.875 expected_loss = 13.875 keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: 55.5 / np.sum(weights), keys.ACCURACY: (1. * 1. + 1.5 * 0. + 2. * 1. + 2.5 * 0.) / np.sum(weights), } tol = 1e-2 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) eval_metrics = head.metrics() updated_metrics = head.update_metrics( eval_metrics, features={'weights': weights}, logits=logits, labels=labels) # Assert metrics. self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}, rtol=tol, atol=tol) return # Create estimator spec. spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) @test_util.deprecated_graph_mode_only class MultiClassHeadForEstimator(tf.test.TestCase): """Tests for create_estimator_spec running in Graph mode only.""" def test_invalid_trainable_variables(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params return [ tf.strings.join([ tf.constant('my_train_op'), tf.strings.as_string(loss, precision=2) ]) ] def get_config(self): config = super(_Optimizer, self).get_config() return config with self.assertRaisesRegexp(ValueError, r'trainable_variables cannot be None'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32), labels=np.array(((1,), (1,)), dtype=np.int64), optimizer=_Optimizer('my_optimizer'), trainable_variables=None) with self.assertRaisesRegexp( ValueError, r'trainable_variables should be a list or a tuple'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32), labels=np.array(((1,), (1,)), dtype=np.int64), optimizer=_Optimizer('my_optimizer'), trainable_variables={ 'var_list': [tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)] }) def test_train_with_optimizer(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes) logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} expected_train_result = 'my_train_op' class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params return [ tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=2) ]) ] def get_config(self): config = super(_Optimizer, self).get_config() return config # loss = sum(cross_entropy(labels, logits)) / batch_size # = sum(10, 0) / 2 = 5. expected_loss = 5. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=_Optimizer('my_optimizer'), trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) tol = 1e-2 with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) def test_train_with_update_ops(self): n_classes = 3 with tf.Graph().as_default(): w = tf.Variable(1) update_op = w.assign_add(1) t = tf.Variable('') expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return t.assign(expected_train_result) head = head_lib.MultiClassHead(n_classes) spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32), labels=np.array(((1,), (1,)), dtype=np.int64), train_op_fn=_train_op_fn, update_ops=[update_op], trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) sess.run(spec.train_op) w_value, t_value = sess.run([w, t]) self.assertEqual(2, w_value) self.assertEqual(expected_train_result, t_value) def test_train_summaries_with_head_name(self): n_classes = 3 head = head_lib.MultiClassHead(n_classes, name='some_multiclass_head') logits = np.array(( (10, 0, 0), (0, 10, 0), ), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) # loss = sum(cross_entropy(labels, logits)) / batch_size= sum(10, 0) / 2 = 5 expected_loss = 5. features = {'x': np.array(((42,),), dtype=np.int32)} def _train_op_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert summaries. tol = 1e-2 with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) summary_str = sess.run(spec.scaffold.summary_op) test_lib._assert_simple_summaries( self, { '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS): expected_loss, }, summary_str, tol) def test_lookup_tables_in_graph(self): n_classes = 3 head = head_lib.MultiClassHead( n_classes, label_vocabulary=['aang', 'iroh', 'zuko']) feature_columns = [tf.feature_column.numeric_column('x')] # Create dnn estimator. est = dnn.DNNEstimatorV2( head=head, hidden_units=(2, 2), feature_columns=feature_columns) def input_fn(): return ({ 'x': np.array(( (42,), (43,), ), dtype=np.int32) }, [[b'iroh'], [b'iroh']]) # Train. num_steps = 1 est.train(input_fn, steps=num_steps) # Eval. eval_results = est.evaluate(input_fn, steps=num_steps) self.assertEqual(num_steps, eval_results[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(eval_results)) # Predict. est.predict(input_fn) def test_missmatch_n_classes_label_vocabulary(self): with self.assertRaises(ValueError): head_lib.MultiClassHead( n_classes=3, label_vocabulary=['a', 'b', 'c', 'd']) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/head/multi_head.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Multi head class.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import tensorflow as tf from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.head import base_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys def _no_op_train_fn(loss): del loss return tf.no_op() def _default_export_output(export_outputs, head_name): """Extracts the default export output from the given export_outputs dict.""" if len(export_outputs) == 1: return next(six.itervalues(export_outputs)) try: return export_outputs[base_head.DEFAULT_SERVING_KEY] except KeyError: raise ValueError( '{} did not specify default export_outputs. ' 'Given: {} ' 'Suggested fix: Use one of the heads in tf.estimator, or include ' 'key {} in export_outputs.'.format(head_name, export_outputs, base_head.DEFAULT_SERVING_KEY)) @estimator_export('estimator.MultiHead') class MultiHead(base_head.Head): """Creates a `Head` for multi-objective learning. This class merges the output of multiple `Head` objects. Specifically: * For training, sums losses of each head, calls `train_op_fn` with this final loss. * For eval, merges metrics by adding `head.name` suffix to the keys in eval metrics, such as `precision/head1.name`, `precision/head2.name`. * For prediction, merges predictions and updates keys in prediction dict to a 2-tuple, `(head.name, prediction_key)`. Merges `export_outputs` such that by default the first head is served. Usage: >>> head1 = tf.estimator.MultiLabelHead(n_classes=2, name='head1') >>> head2 = tf.estimator.MultiLabelHead(n_classes=3, name='head2') >>> multi_head = tf.estimator.MultiHead([head1, head2]) >>> logits = { ... 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), ... 'head2': np.array([[20., -20., 20.], [-30., 20., -20.]], ... dtype=np.float32),} >>> labels = { ... 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), ... 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),} >>> features = {'x': np.array(((42,),), dtype=np.float32)} >>> # For large logits, sigmoid cross entropy loss is approximated as: >>> # loss = labels * (logits < 0) * (-logits) + >>> # (1 - labels) * (logits > 0) * logits => >>> # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]] >>> # loss1 = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75 >>> # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]] >>> # loss2 = ((20 + 20 + 20) / 3 + (30 + 0 + 0) / 3) / 2 = 15.00 >>> # loss = loss1 + loss2 = 8.75 + 15.00 = 23.75 >>> loss = multi_head.loss(labels, logits, features=features) >>> print('{:.2f}'.format(loss.numpy())) 23.75 >>> eval_metrics = multi_head.metrics() >>> updated_metrics = multi_head.update_metrics( ... eval_metrics, features, logits, labels) >>> for k in sorted(updated_metrics): ... print('{} : {:.2f}'.format(k, updated_metrics[k].result().numpy())) auc/head1 : 0.17 auc/head2 : 0.33 auc_precision_recall/head1 : 0.60 auc_precision_recall/head2 : 0.40 average_loss/head1 : 8.75 average_loss/head2 : 15.00 loss/head1 : 8.75 loss/head2 : 15.00 >>> preds = multi_head.predictions(logits) >>> print(preds[('head1', 'logits')]) tf.Tensor( [[-10. 10.] [-15. 10.]], shape=(2, 2), dtype=float32) Usage with a canned estimator: ```python # In `input_fn`, specify labels as a dict keyed by head name: def input_fn(): features = ... labels1 = ... labels2 = ... return features, {'head1.name': labels1, 'head2.name': labels2} # In `model_fn`, specify logits as a dict keyed by head name: def model_fn(features, labels, mode): # Create simple heads and specify head name. head1 = tf.estimator.MultiClassHead(n_classes=3, name='head1') head2 = tf.estimator.BinaryClassHead(name='head2') # Create MultiHead from two simple heads. head = tf.estimator.MultiHead([head1, head2]) # Create logits for each head, and combine them into a dict. logits1, logits2 = logit_fn() logits = {'head1.name': logits1, 'head2.name': logits2} # Return the merged EstimatorSpec return head.create_estimator_spec(..., logits=logits, ...) # Create an estimator with this model_fn. estimator = tf.estimator.Estimator(model_fn=model_fn) estimator.train(input_fn=input_fn) ``` Also supports `logits` as a `Tensor` of shape `[D0, D1, ... DN, logits_dimension]`. It will split the `Tensor` along the last dimension and distribute it appropriately among the heads. E.g.: ```python # Input logits. logits = np.array([[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32) # Suppose head1 and head2 have the following logits dimension. head1.logits_dimension = 2 head2.logits_dimension = 3 # After splitting, the result will be: logits_dict = {'head1_name': [[-1., 1.], [-1.5, 1.]], 'head2_name': [[2., -2., 2.], [-3., 2., -2.]]} ``` Usage: ```python def model_fn(features, labels, mode): # Create simple heads and specify head name. head1 = tf.estimator.MultiClassHead(n_classes=3, name='head1') head2 = tf.estimator.BinaryClassHead(name='head2') # Create multi-head from two simple heads. head = tf.estimator.MultiHead([head1, head2]) # Create logits for the multihead. The result of logits is a `Tensor`. logits = logit_fn(logits_dimension=head.logits_dimension) # Return the merged EstimatorSpec return head.create_estimator_spec(..., logits=logits, ...) ``` Args: heads: List or tuple of `Head` instances. All heads must have `name` specified. The first head in the list is the default used at serving time. head_weights: Optional list of weights, same length as `heads`. Used when merging losses to calculate the weighted sum of losses from each head. If `None`, all losses are weighted equally. """ def __init__(self, heads, head_weights=None): if not heads: raise ValueError('Must specify heads. Given: {}'.format(heads)) if head_weights: if len(head_weights) != len(heads): raise ValueError( 'heads and head_weights must have the same size. ' 'Given len(heads): {}. Given len(head_weights): {}.'.format( len(heads), len(head_weights))) self._logits_dimension = 0 for head in heads: if head.name is None: raise ValueError( 'All given heads must have name specified. Given: {}'.format(head)) self._logits_dimension += head.logits_dimension self._heads = tuple(heads) self._head_weights = tuple(head_weights) if head_weights else tuple() # Metric keys. keys = metric_keys.MetricKeys self._loss_regularization_key = self._summary_key(keys.LOSS_REGULARIZATION) loss_keys = [] for head in self._heads: loss_keys.append('{}/{}'.format(keys.LOSS, head.name)) self._loss_keys = tuple(loss_keys) @property def name(self): """See `base_head.Head` for details.""" return '_'.join([h.name for h in self._heads]) @property def logits_dimension(self): """See `base_head.Head` for details.""" return self._logits_dimension @property def loss_reduction(self): """See `base_head.Head` for details.""" loss_reductions = [head.loss_reduction for head in self._heads] if len(set(loss_reductions)) > 1: raise ValueError( 'The loss_reduction must be the same for different heads. ' 'Given: {}'.format(loss_reductions)) return loss_reductions[0] def _split_logits(self, logits): """Splits logits along the last dimension and returns a dict. If the input logits is not a dict, splitting is applied based on the logits dimension of each head. For example: ```python # head1.logits_dimension = 2 # head2.logits_dimension = 3 head1 = tf.estimator.MultiLabelHead(n_classes=2, name='head1_name') head2 = tf.estimator.MultiClassHead(n_classes=3, name='head2_name') multi_head = tf.estimator.MultiHead([head1, head2]) # Input logits logits = np.array([[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32) # As logits is not a dict, _split_logits is applied and returns the # logits_dict as logits_dict = {'head1_name': [[-1., 1.], [-1.5, 1.]], 'head2_name': [[2., -2., 2.], [-3., 2., -2.]]} ``` Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. Returns: logits_dict: A dict of logits for each head. """ logits_dict = {} with ops.name_scope('split_logits', values=[logits]): logits = ops.convert_to_tensor(logits) logits_dimensions = [head.logits_dimension for head in self._heads] total_logits_dimension = sum(logits_dimensions) logits_tensor_shape = logits.shape.as_list() last_dimension_size = logits_tensor_shape[-1] if last_dimension_size is not None: if last_dimension_size != total_logits_dimension: raise ValueError( 'Could not split logits of shape %r among the heads with ' 'individual logits dimensions: %r. The last dimension of the ' 'logits tensor should equal %d but is %d.' % ((logits_tensor_shape, logits_dimensions, last_dimension_size, total_logits_dimension))) # TODO(b/119617064): unify eager and graph implementations if tf.executing_eagerly(): logits_shape = logits._shape_tuple() # pylint: disable=protected-access batch_shape = logits_shape[:-1] else: batch_shape = tf.compat.v1.shape(logits)[:-1] zeros_like_batch_shape = tf.compat.v1.zeros_like(batch_shape) minus_ones_like_batch_shape = -1 * tf.compat.v1.ones_like(batch_shape) begin_idx = 0 for head in self._heads: begin_tensor = tf.concat([zeros_like_batch_shape, [begin_idx]], axis=0) size_tensor = tf.concat( [minus_ones_like_batch_shape, [head.logits_dimension]], axis=0) logits_dict[head.name] = tf.slice( logits, begin=begin_tensor, size=size_tensor) begin_idx += head.logits_dimension return logits_dict def _check_logits_and_labels(self, logits, labels=None): """Validates the keys of logits and labels.""" head_names = [] for head in self._heads: head_names.append(head.name) # Checks logits keys and splits it if it's not a dict if isinstance(logits, dict): logits_missing_names = list(set(head_names) - set(list(logits))) if logits_missing_names: raise ValueError('logits has missing values for head(s): {}'.format( logits_missing_names)) logits_dict = logits else: logits_dict = self._split_logits(logits) # Checks labels type and its keys if labels is not None: if not isinstance(labels, dict): raise ValueError('labels must be a dict. Given: {}'.format(labels)) labels_missing_names = list(set(head_names) - set(list(labels))) if labels_missing_names: raise ValueError('labels has missing values for head(s): {}'.format( labels_missing_names)) return logits_dict def loss(self, labels, logits, features=None, mode=None, regularization_losses=None): """Returns regularized training loss. See `base_head.Head` for details.""" logits_dict = self._check_logits_and_labels(logits, labels) training_losses = [] for head in self._heads: training_loss = head.loss( logits=logits_dict[head.name], labels=labels[head.name], features=features, mode=mode) training_losses.append(training_loss) training_losses = tuple(training_losses) with ops.name_scope( 'merge_losses', values=training_losses + (self._head_weights or tuple())): if self._head_weights: head_weighted_training_losses = [] for training_loss, head_weight in zip(training_losses, self._head_weights): head_weighted_training_losses.append( tf.math.multiply(training_loss, head_weight)) training_losses = head_weighted_training_losses merged_training_loss = tf.math.add_n(training_losses) regularization_loss = tf.math.add_n( regularization_losses) if regularization_losses is not None else None regularized_training_loss = ( merged_training_loss + regularization_loss if regularization_loss is not None else merged_training_loss) return regularized_training_loss def predictions(self, logits, keys=None): """Create predictions. See `base_head.Head` for details.""" logits_dict = self._check_logits_and_labels(logits) predictions = {} with ops.name_scope('merge_pred'): for head in self._heads: head_preds = head.predictions(logits=logits_dict[head.name]) for k, v in six.iteritems(head_preds): predictions[(head.name, k)] = v return predictions def metrics(self, regularization_losses=None): """Creates metrics. See `base_head.Head` for details.""" eval_metrics = {} keys = metric_keys.MetricKeys # Add regularization loss metric for multi_head. if regularization_losses is not None: eval_metrics[self._loss_regularization_key] = tf_keras.metrics.Mean( name=keys.LOSS_REGULARIZATION) with ops.name_scope('merge_eval'): # Loss metric is not added by default in each head. for loss_key in self._loss_keys: eval_metrics[loss_key] = tf_keras.metrics.Mean(name=loss_key) return eval_metrics def update_metrics(self, eval_metrics, features, logits, labels, regularization_losses=None): """Updates eval metrics. See `base_head.Head` for details.""" logits_dict = self._check_logits_and_labels(logits, labels) # Update regularization loss metric if regularization_losses is not None: regularization_loss = tf.math.add_n(regularization_losses) eval_metrics[self._loss_regularization_key].update_state( values=regularization_loss) # Update metrics for each head for i, head in enumerate(self._heads): head_logits = logits_dict[head.name] head_labels = labels[head.name] # Update loss metrics training_loss = head.loss( logits=head_logits, labels=head_labels, features=features) eval_metrics[self._loss_keys[i]].update_state(values=training_loss) # Update existing metrics in each head head_metrics = head.metrics() updated_metrics = head.update_metrics(head_metrics, features, head_logits, head_labels) eval_metrics.update(updated_metrics or {}) return eval_metrics def create_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): """Returns a `model_fn.EstimatorSpec`. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. mode: Estimator's `ModeKeys`. logits: Input `dict` keyed by head name, or logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the `Tensor` shape is `[batch_size, logits_dimension]`. If logits is a `Tensor`, it will split the `Tensor` along the last dimension and distribute it appropriately among the heads. Check `MultiHead` for examples. labels: Input `dict` keyed by head name. For each head, the label value can be integer or string `Tensor` with shape matching its corresponding `logits`.`labels` is a required argument when `mode` equals `TRAIN` or `EVAL`. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results, in each head, users need to use the default `loss_reduction=SUM_OVER_BATCH_SIZE` to avoid scaling errors. Compared to the regularization losses for each head, this loss is to regularize the merged loss of all heads in multi head, and will be added to the overall training loss of multi head. Returns: A `model_fn.EstimatorSpec` instance. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. If `mode` is not in Estimator's `ModeKeys`. """ with ops.name_scope(self.name, 'multi_head'): logits_dict = self._check_logits_and_labels(logits, labels) # Get all estimator spec. all_estimator_spec = [] for head in self._heads: all_estimator_spec.append( head.create_estimator_spec( features=features, mode=mode, logits=logits_dict[head.name], labels=labels[head.name] if labels else None, train_op_fn=_no_op_train_fn)) # Predict. predictions = self.predictions(logits) if mode == ModeKeys.PREDICT: export_outputs = self._merge_predict_export_outputs(all_estimator_spec) return model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=predictions, export_outputs=export_outputs) loss = self.loss(labels, logits, features, mode, regularization_losses) # Eval. if mode == ModeKeys.EVAL: eval_metrics = self.metrics(regularization_losses=regularization_losses) updated_metrics = self.update_metrics( eval_metrics, features, logits, labels, regularization_losses=regularization_losses) return model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions=predictions, loss=loss, eval_metric_ops=updated_metrics) # Train. if mode == ModeKeys.TRAIN: train_op = base_head.create_estimator_spec_train_op( head_name=self.name, optimizer=optimizer, train_op_fn=train_op_fn, update_ops=update_ops, trainable_variables=trainable_variables, regularized_training_loss=loss, loss_reduction=self.loss_reduction) # Create summary. base_head.create_estimator_spec_summary(loss, regularization_losses) # eval_metrics. eval_metrics = {} for spec in all_estimator_spec: eval_metrics.update(spec.eval_metric_ops or {}) # predictions can be used to access the logits in `TRAIN` mode return model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=loss, train_op=train_op, predictions=predictions, eval_metric_ops=eval_metrics) raise ValueError('mode={} unrecognized'.format(mode)) def _merge_predict_export_outputs(self, all_estimator_spec): """Merges list of `EstimatorSpec` export_outputs for PREDICT. For each individual head, its DEFAULT_SERVING_KEY and PREDICT_SERVING_KEY are extracted and merged for `export_outputs` in PREDICT mode of `EstimatorSpec`. By default, the first head is served. Args: all_estimator_spec: list of `EstimatorSpec` for the individual heads. Returns: A dict of merged export_outputs from all heads for PREDICT. """ # The first head is used for serving by default. export_outputs = { base_head.DEFAULT_SERVING_KEY: _default_export_output(all_estimator_spec[0].export_outputs, self._heads[0].name), } merged_predict_outputs = {} for head, spec in zip(self._heads, all_estimator_spec): for k, v in six.iteritems(spec.export_outputs): # Collect default serving key for export_outputs key = ( head.name if k == base_head.DEFAULT_SERVING_KEY else '{}/{}'.format( head.name, k)) export_outputs[key] = v # Collect predict serving key for merged_predict_outputs if (k == base_head.PREDICT_SERVING_KEY and isinstance(v, export_output.PredictOutput)): for kp, vp in six.iteritems(v.outputs): merged_predict_outputs['{}/{}'.format(head.name, kp)] = vp export_outputs[base_head.PREDICT_SERVING_KEY] = ( export_output.PredictOutput(merged_predict_outputs)) return export_outputs ================================================ FILE: tensorflow_estimator/python/estimator/head/multi_head_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for multi_head.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.head import head_utils as test_lib from tensorflow_estimator.python.estimator.head import multi_head as multi_head_lib from tensorflow_estimator.python.estimator.head import multi_label_head from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys @test_util.run_all_in_graph_and_eager_modes class MultiHeadTest(tf.test.TestCase): def test_no_heads(self): with self.assertRaisesRegexp(ValueError, r'Must specify heads\. Given: \[\]'): multi_head_lib.MultiHead(heads=[]) def test_head_name_missing(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3) with self.assertRaisesRegexp(ValueError, r'All given heads must have name specified\.'): multi_head_lib.MultiHead([head1, head2]) def test_head_weights_wrong_size(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2') with self.assertRaisesRegexp( ValueError, r'heads and head_weights must have the same size\. ' r'Given len\(heads\): 2. Given len\(head_weights\): 1\.'): multi_head_lib.MultiHead([head1, head2], head_weights=[1.]) def test_name(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2]) self.assertEqual('head1_head2', multi_head.name) def test_predict_two_heads_logits_dict(self): """Tests predict with logits as dict.""" head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2]) logits = { 'head1': np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32), 'head2': np.array([[2., -2., 2.], [-3., 2., -2.]], dtype=np.float32) } expected_probabilities = { 'head1': tf.math.sigmoid(logits['head1']), 'head2': tf.math.sigmoid(logits['head2']), } pred_keys = prediction_keys.PredictionKeys predictions = multi_head.predictions(logits) self.assertAllClose(logits['head1'], self.evaluate(predictions[('head1', pred_keys.LOGITS)])) self.assertAllClose(logits['head2'], self.evaluate(predictions[('head2', pred_keys.LOGITS)])) self.assertAllClose( expected_probabilities['head1'], self.evaluate(predictions[('head1', pred_keys.PROBABILITIES)])) self.assertAllClose( expected_probabilities['head2'], self.evaluate(predictions[('head2', pred_keys.PROBABILITIES)])) if tf.executing_eagerly(): return spec = multi_head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) self.assertItemsEqual((test_lib._DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification', 'head1/predict', 'head2', 'head2/classification', 'head2/predict'), spec.export_outputs.keys()) # Assert predictions and export_outputs. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(logits['head1'], predictions[('head1', pred_keys.LOGITS)]) self.assertAllClose(logits['head2'], predictions[('head2', pred_keys.LOGITS)]) self.assertAllClose(expected_probabilities['head1'], predictions[('head1', pred_keys.PROBABILITIES)]) self.assertAllClose(expected_probabilities['head2'], predictions[('head2', pred_keys.PROBABILITIES)]) self.assertAllClose( expected_probabilities['head1'], sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].scores)) self.assertAllClose(expected_probabilities['head1'], sess.run(spec.export_outputs['head1'].scores)) self.assertAllClose(expected_probabilities['head2'], sess.run(spec.export_outputs['head2'].scores)) self.assertAllClose( expected_probabilities['head1'], sess.run( spec.export_outputs['predict'].outputs['head1/probabilities'])) self.assertAllClose( expected_probabilities['head2'], sess.run( spec.export_outputs['predict'].outputs['head2/probabilities'])) self.assertAllClose( expected_probabilities['head1'], sess.run( spec.export_outputs['head1/predict'].outputs['probabilities'])) self.assertAllClose( expected_probabilities['head2'], sess.run( spec.export_outputs['head2/predict'].outputs['probabilities'])) def test_predict_two_heads_logits_tensor(self): """Tests predict with logits as Tensor.""" head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2]) logits = np.array([[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32) expected_logits1 = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32) expected_logits2 = np.array([[2., -2., 2.], [-3., 2., -2.]], dtype=np.float32) expected_probabilities = { 'head1': tf.math.sigmoid(expected_logits1), 'head2': tf.math.sigmoid(expected_logits2), } pred_keys = prediction_keys.PredictionKeys predictions = multi_head.predictions(logits) self.assertAllClose(expected_logits1, self.evaluate(predictions[('head1', pred_keys.LOGITS)])) self.assertAllClose(expected_logits2, self.evaluate(predictions[('head2', pred_keys.LOGITS)])) self.assertAllClose( expected_probabilities['head1'], self.evaluate(predictions[('head1', pred_keys.PROBABILITIES)])) self.assertAllClose( expected_probabilities['head2'], self.evaluate(predictions[('head2', pred_keys.PROBABILITIES)])) if tf.executing_eagerly(): return spec = multi_head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) self.assertItemsEqual((test_lib._DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification', 'head1/predict', 'head2', 'head2/classification', 'head2/predict'), spec.export_outputs.keys()) # Assert predictions and export_outputs. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(expected_logits1, predictions[('head1', pred_keys.LOGITS)]) self.assertAllClose(expected_logits2, predictions[('head2', pred_keys.LOGITS)]) self.assertAllClose(expected_probabilities['head1'], predictions[('head1', pred_keys.PROBABILITIES)]) self.assertAllClose(expected_probabilities['head2'], predictions[('head2', pred_keys.PROBABILITIES)]) self.assertAllClose( expected_probabilities['head1'], sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].scores)) self.assertAllClose(expected_probabilities['head1'], sess.run(spec.export_outputs['head1'].scores)) self.assertAllClose(expected_probabilities['head2'], sess.run(spec.export_outputs['head2'].scores)) def test_predict_two_heads_logits_tensor_multi_dim(self): """Tests predict with multi-dimensional logits of shape [2, 2, 5].""" head1 = regression_head.RegressionHead(label_dimension=2, name='head1') head2 = regression_head.RegressionHead(label_dimension=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2]) logits = np.array([[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]], [[-1.5, 1., -3., 2., -2.], [-1.5, 1., -3., 2., -2.]]], dtype=np.float32) expected_logits1 = np.array( [[[-1., 1.], [-1., 1.]], [[-1.5, 1.], [-1.5, 1.]]], dtype=np.float32) expected_logits2 = np.array( [[[2., -2., 2.], [2., -2., 2.]], [[-3., 2., -2.], [-3., 2., -2.]]], dtype=np.float32) pred_keys = prediction_keys.PredictionKeys predictions = multi_head.predictions(logits) self.assertAllClose( expected_logits1, self.evaluate(predictions[('head1', pred_keys.PREDICTIONS)])) self.assertAllClose( expected_logits2, self.evaluate(predictions[('head2', pred_keys.PREDICTIONS)])) if tf.executing_eagerly(): return spec = multi_head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits) self.assertItemsEqual( (test_lib._DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/regression', 'head1/predict', 'head2', 'head2/regression', 'head2/predict'), spec.export_outputs.keys()) # Assert predictions and export_outputs. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllClose(expected_logits1, predictions[('head1', pred_keys.PREDICTIONS)]) self.assertAllClose(expected_logits2, predictions[('head2', pred_keys.PREDICTIONS)]) self.assertAllClose( expected_logits1, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].value)) self.assertAllClose(expected_logits1, sess.run(spec.export_outputs['head1'].value)) self.assertAllClose(expected_logits2, sess.run(spec.export_outputs['head2'].value)) def test_eval_two_heads_with_weights(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2], head_weights=[1., 2.]) logits = { 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), 'head2': np.array([[20., -20., 20.], [-30., 20., -20.]], dtype=np.float32), } labels = { 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } features = {'x': np.array(((42,),), dtype=np.int32)} # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]] # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75 # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]] # loss = ((20 + 20 + 20) / 3 + (30 + 0 + 0) / 3) / 2 = 15 expected_loss_head1 = 8.75 expected_loss_head2 = 15. expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2 tol = 1e-3 keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS + '/head1': expected_loss_head1, keys.LOSS + '/head2': expected_loss_head2, # Average loss over examples. keys.LOSS_MEAN + '/head1': expected_loss_head1, keys.LOSS_MEAN + '/head2': expected_loss_head2, # auc and auc_pr cannot be reliably calculated for only 4-6 samples, but # this assert tests that the algorithm remains consistent. keys.AUC + '/head1': 0.1667, keys.AUC + '/head2': 0.3333, keys.AUC_PR + '/head1': 0.60228, keys.AUC_PR + '/head2': 0.40152, } if tf.executing_eagerly(): loss = multi_head.loss( labels, logits, features=features, mode=ModeKeys.EVAL) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) eval_metrics = multi_head.metrics() updated_metrics = multi_head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}, rtol=tol, atol=tol) return spec = multi_head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_train_loss_one_head(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') multi_head = multi_head_lib.MultiHead([head1]) logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)} labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)} loss = multi_head.loss( labels=labels, logits=logits, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN) tol = 1e-3 # Unreduced loss of the head is [[(10 + 10) / 2], (15 + 0) / 2] # (averaged over classes, averaged over examples). # loss = sum(unreduced_loss) / 2 = sum([10, 7.5]) / 2 = 8.75 self.assertAllClose(8.75, self.evaluate(loss), rtol=tol, atol=tol) def test_train_loss_two_heads_with_weights(self): # Use different example weighting for each head weighting. weights1 = np.array([[1.], [2.]], dtype=np.float32) weights2 = np.array([[2.], [3.]]) head1 = multi_label_head.MultiLabelHead( n_classes=2, name='head1', weight_column='weights1') head2 = multi_label_head.MultiLabelHead( n_classes=3, name='head2', weight_column='weights2') multi_head = multi_head_lib.MultiHead([head1, head2], head_weights=[1., 2.]) logits = { 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), 'head2': np.array([[20., -20., 20.], [-30., 20., -20.]], dtype=np.float32), } labels = { 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } training_loss = multi_head.loss( logits=logits, labels=labels, features={ 'x': np.array(((42,),), dtype=np.int32), 'weights1': weights1, 'weights2': weights2 }, mode=ModeKeys.TRAIN) tol = 1e-3 # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]] # = [10, 7.5] # training_loss = (1 * 10 + 2 * 7.5) / 2 = 12.5 # head-weighted unreduced_loss = 1 * [10, 7.5] # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]] # = [20, 10] # training_loss = (2 * 20 + 3 * 10) / 2 = 35 # head-weighted unreduced_loss = 2 * [20, 10] # head-weighted training_loss = 1 * 12.5 + 2 * 35 = 82.5 self.assertAllClose(82.5, self.evaluate(training_loss), rtol=tol, atol=tol) def test_train_loss_logits_tensor(self): """Tests loss with logits Tensor.""" weights1 = np.array([[1.], [2.]], dtype=np.float32) weights2 = np.array([[2.], [3.]]) head1 = multi_label_head.MultiLabelHead( n_classes=2, name='head1', weight_column='weights1') head2 = multi_label_head.MultiLabelHead( n_classes=3, name='head2', weight_column='weights2') multi_head = multi_head_lib.MultiHead([head1, head2], head_weights=[1., 2.]) logits = np.array( [[-10., 10., 20., -20., 20.], [-15., 10., -30., 20., -20.]], dtype=np.float32) labels = { 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } training_loss = multi_head.loss( logits=logits, labels=labels, features={ 'x': np.array(((42,),), dtype=np.int32), 'weights1': weights1, 'weights2': weights2 }, mode=ModeKeys.TRAIN) tol = 1e-3 # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]] # = [10, 7.5] # training_loss = (1 * 10 + 2 * 7.5) / 2 = 12.5 # head-weighted unreduced_loss = 1 * [10, 7.5] # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]] # = [20, 10] # training_loss = (2 * 20 + 3 * 10) / 2 = 35 # head-weighted unreduced_loss = 2 * [20, 10] # head-weighted training_loss = 1 * 12.5 + 2 * 35 = 82.5 self.assertAllClose(82.5, self.evaluate(training_loss), rtol=tol, atol=tol) def test_train_loss_logits_tensor_wrong_shape(self): """Tests loss with a logits Tensor of the wrong shape.""" weights1 = np.array([[1.], [2.]], dtype=np.float32) weights2 = np.array([[2.], [3.]]) head1 = multi_label_head.MultiLabelHead( n_classes=2, name='head1', weight_column='weights1') head2 = multi_label_head.MultiLabelHead( n_classes=3, name='head2', weight_column='weights2') multi_head = multi_head_lib.MultiHead([head1, head2], head_weights=[1., 2.]) # logits tensor is 2x6 instead of 2x5 logits = np.array( [[-10., 10., 20., -20., 20., 70.], [-15., 10., -30., 20., -20., 80.]], dtype=np.float32) labels = { 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } with self.assertRaisesRegexp(ValueError, r'Could not split logits'): multi_head.loss( features={ 'x': np.array(((42,),), dtype=np.int32), 'weights1': weights1, 'weights2': weights2 }, mode=ModeKeys.TRAIN, logits=logits, labels=labels) def test_train_loss_logits_tensor_multi_dim(self): """Tests loss with multi-dimensional logits of shape [2, 2, 5].""" head1 = regression_head.RegressionHead(label_dimension=2, name='head1') head2 = regression_head.RegressionHead(label_dimension=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2]) logits = np.array([[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]], [[-1.5, 1.5, -2., 2., -2.], [-1.5, 1.5, -2., 2., -2.]]], dtype=np.float32) labels = { 'head1': np.array([[[1., 0.], [1., 0.]], [[1.5, 1.5], [1.5, 1.5]]], dtype=np.float32), 'head2': np.array( [[[0., 1., 0.], [0., 1., 0.]], [[2., 2., 0.], [2., 2., 0.]]], dtype=np.float32), } # Loss for the first head: # loss1 = ((1+1)^2 + (0-1)^2 + (1+1)^2 + (0-1)^2 + # (1.5+1.5)^2 + (1.5-1.5)^2 + (1.5+1.5)^2 + (1.5-1.5)^2) / 8 # = 3.5 # Loss for the second head: # loss2 = ((0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 + # (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2) / 12 # = 6.167 expected_training_loss = 3.5 + 6.167 training_loss = multi_head.loss( logits=logits, labels=labels, features={}, mode=ModeKeys.TRAIN) tol = 1e-3 self.assertAllClose( expected_training_loss, self.evaluate(training_loss), rtol=tol, atol=tol) def test_train_loss_logits_tensor_multi_dim_wrong_shape(self): """Tests loss with a multi-dimensional logits tensor of the wrong shape.""" head1 = regression_head.RegressionHead(label_dimension=2, name='head1') head2 = regression_head.RegressionHead(label_dimension=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2]) # logits tensor is 2x2x4 instead of 2x2x5 logits = np.array([[[-1., 1., 2., -2.], [-1., 1., 2., -2.]], [[-1.5, 1.5, -2., 2.], [-1.5, 1.5, -2., 2.]]], dtype=np.float32) labels = { 'head1': np.array([[[1., 0.], [1., 0.]], [[1.5, 1.5], [1.5, 1.5]]], dtype=np.float32), 'head2': np.array( [[[0., 1., 0.], [0., 1., 0.]], [[2., 2., 0.], [2., 2., 0.]]], dtype=np.float32), } with self.assertRaisesRegexp(ValueError, r'Could not split logits'): multi_head.loss( features={}, mode=ModeKeys.TRAIN, logits=logits, labels=labels) def test_train_one_head(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') multi_head = multi_head_lib.MultiHead([head1]) logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)} expected_probabilities = { 'head1': tf.math.sigmoid(logits['head1']), } labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)} features = {'x': np.array(((42,),), dtype=np.int32)} # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75 expected_loss = 8.75 tol = 1e-3 loss = multi_head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, self.evaluate(loss), rtol=tol, atol=tol) if tf.executing_eagerly(): return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) spec = multi_head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str, predictions = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op, spec.predictions)) self.assertAllClose( logits['head1'], predictions[('head1', prediction_keys.PredictionKeys.LOGITS)]) self.assertAllClose( expected_probabilities['head1'], predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)]) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss, }, summary_str, tol) def test_train_one_head_with_optimizer(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') multi_head = multi_head_lib.MultiHead([head1]) logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)} labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)} features = {'x': np.array(((42,),), dtype=np.int32)} # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75 expected_loss = 8.75 tol = 1e-3 loss = multi_head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, self.evaluate(loss), rtol=tol, atol=tol) if tf.executing_eagerly(): return expected_train_result = 'my_train_op' class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params return [ tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) ] def get_config(self): config = super(_Optimizer, self).get_config() return config spec = multi_head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=_Optimizer('my_optimizer'), trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) def test_train_two_heads_with_weights(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2], head_weights=[1., 2.]) logits = { 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), 'head2': np.array([[20., -20., 20.], [-30., 20., -20.]], dtype=np.float32), } expected_probabilities = { 'head1': tf.math.sigmoid(logits['head1']), 'head2': tf.math.sigmoid(logits['head2']), } labels = { 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } features = {'x': np.array(((42,),), dtype=np.int32)} # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]] # loss = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75 # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]] # loss = ((20 + 20 + 20) / 3 + (30 + 0 + 0) / 3) / 2 = 15 # Average over classes, weighted sum over batch and heads. expected_loss_head1 = 8.75 expected_loss_head2 = 15.0 expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2 tol = 1e-3 loss = multi_head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, self.evaluate(loss), rtol=tol, atol=tol) if tf.executing_eagerly(): return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) spec = multi_head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str, predictions = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op, spec.predictions)) self.assertAllClose( logits['head1'], predictions[('head1', prediction_keys.PredictionKeys.LOGITS)]) self.assertAllClose( expected_probabilities['head1'], predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)]) self.assertAllClose( logits['head2'], predictions[('head2', prediction_keys.PredictionKeys.LOGITS)]) self.assertAllClose( expected_probabilities['head2'], predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)]) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS + '/head1': expected_loss_head1, metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2, }, summary_str, tol) def test_train_with_regularization_losses(self): head1 = multi_label_head.MultiLabelHead(n_classes=2, name='head1') head2 = multi_label_head.MultiLabelHead(n_classes=3, name='head2') multi_head = multi_head_lib.MultiHead([head1, head2], head_weights=[1., 2.]) logits = { 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), 'head2': np.array([[20., -20., 20.], [-30., 20., -20.]], dtype=np.float32), } expected_probabilities = { 'head1': tf.math.sigmoid(logits['head1']), 'head2': tf.math.sigmoid(logits['head2']), } labels = { 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } features = {'x': np.array(((42,),), dtype=np.int32)} regularization_losses = [1.5, 0.5] # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]] # loss1 = ((10 + 10) / 2 + (15 + 0) / 2) / 2 = 8.75 # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]] # loss2 = ((20 + 20 + 20) / 3 + (30 + 0 + 0) / 3) / 2 = 15 # Average over classes, weighted sum over batch and heads. # weights = [1., 2.] # merged_training_loss = 1. * loss1 + 2. * loss2 # training_loss = merged_training_loss + regularization_loss # = 1. * loss1 + 2. * loss2 + sum([1.5, 0.5]) expected_loss_head1 = 8.75 expected_loss_head2 = 15.0 expected_regularization_loss = 2. # training loss. expected_loss = (1. * expected_loss_head1 + 2. * expected_loss_head2 + expected_regularization_loss) tol = 1e-3 loss = multi_head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN, regularization_losses=regularization_losses) self.assertAllClose(expected_loss, self.evaluate(loss), rtol=tol, atol=tol) if tf.executing_eagerly(): return keys = metric_keys.MetricKeys expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) spec = multi_head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str, predictions = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op, spec.predictions)) self.assertAllClose( logits['head1'], predictions[('head1', prediction_keys.PredictionKeys.LOGITS)]) self.assertAllClose( expected_probabilities['head1'], predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)]) self.assertAllClose( logits['head2'], predictions[('head2', prediction_keys.PredictionKeys.LOGITS)]) self.assertAllClose( expected_probabilities['head2'], predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)]) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries( self, { keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.LOSS: expected_loss, keys.LOSS + '/head1': expected_loss_head1, keys.LOSS + '/head2': expected_loss_head2, }, summary_str, tol) @test_util.deprecated_graph_mode_only class MultiHeadForEstimator(tf.test.TestCase): """Tests for create_estimator_spec running in Graph mode only.""" def test_loss_reduction_must_be_same(self): """Tests the loss reduction must be the same for different heads.""" head1 = multi_label_head.MultiLabelHead( n_classes=2, name='head1', loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE) head2 = multi_label_head.MultiLabelHead( n_classes=3, name='head2', loss_reduction=tf.losses.Reduction.AUTO) multi_head = multi_head_lib.MultiHead([head1, head2]) logits = { 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), 'head2': np.array([[20., -20., 20.], [-30., 20., -20.]], dtype=np.float32), } labels = { 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } with self.assertRaisesRegexp(ValueError, 'must be the same'): multi_head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=logits, labels=labels) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/head/multi_label_head.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Multi label head.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import six import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import lookup_ops from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.head import base_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys from tensorflow_estimator.python.estimator.util import tf_keras_v2 @estimator_export('estimator.MultiLabelHead') class MultiLabelHead(base_head.Head): """Creates a `Head` for multi-label classification. Multi-label classification handles the case where each example may have zero or more associated labels, from a discrete set. This is distinct from `MultiClassHead` which has exactly one label per example. Uses `sigmoid_cross_entropy` loss average over classes and weighted sum over the batch. Namely, if the input logits have shape `[batch_size, n_classes]`, the loss is the average over `n_classes` and the weighted sum over `batch_size`. The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. In many applications, the shape is `[batch_size, n_classes]`. Labels can be: * A multi-hot tensor of shape `[D0, D1, ... DN, n_classes]` * An integer `SparseTensor` of class indices. The `dense_shape` must be `[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`. * If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape` must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary` or a multi-hot tensor of shape `[D0, D1, ... DN, n_classes]`. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features)` as arguments and returns unreduced loss with shape `[D0, D1, ... DN, 1]`. `loss_fn` must support indicator `labels` with shape `[D0, D1, ... DN, n_classes]`. Namely, the head applies `label_vocabulary` to the input labels before passing them to `loss_fn`. Usage: >>> n_classes = 2 >>> head = tf.estimator.MultiLabelHead(n_classes) >>> logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) >>> labels = np.array([[1, 0], [1, 1]], dtype=np.int64) >>> features = {'x': np.array([[41], [42]], dtype=np.int32)} >>> # expected_loss = sum(_sigmoid_cross_entropy(labels, logits)) / batch_size >>> # = sum(1.31326169, 0.9514133) / 2 = 1.13 >>> loss = head.loss(labels, logits, features=features) >>> print('{:.2f}'.format(loss.numpy())) 1.13 >>> eval_metrics = head.metrics() >>> updated_metrics = head.update_metrics( ... eval_metrics, features, logits, labels) >>> for k in sorted(updated_metrics): ... print('{} : {:.2f}'.format(k, updated_metrics[k].result().numpy())) auc : 0.33 auc_precision_recall : 0.77 average_loss : 1.13 >>> preds = head.predictions(logits) >>> print(preds['logits']) tf.Tensor( [[-1. 1. ] [-1.5 1.5]], shape=(2, 2), dtype=float32) Usage with a canned estimator: ```python my_head = tf.estimator.MultiLabelHead(n_classes=3) my_estimator = tf.estimator.DNNEstimator( head=my_head, hidden_units=..., feature_columns=...) ``` It can also be used with a custom `model_fn`. Example: ```python def _my_model_fn(features, labels, mode): my_head = tf.estimator.MultiLabelHead(n_classes=3) logits = tf_keras.Model(...)(features) return my_head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=tf_keras.optimizers.Adagrad(lr=0.1), logits=logits) my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn) ``` Args: n_classes: Number of classes, must be greater than 1 (for 1 class, use `BinaryClassHead`). weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. Per-class weighting is not supported. thresholds: Iterable of floats in the range `(0, 1)`. Accuracy, precision and recall metrics are evaluated for each threshold value. The threshold is applied to the predicted probabilities, i.e. above the threshold is `true`, below is `false`. label_vocabulary: A list of strings represents possible label values. If it is not given, that means labels are already encoded as integer within [0, n_classes) or multi-hot Tensor. If given, labels must be SparseTensor `string` type and have any value in `label_vocabulary`. Also there will be errors if vocabulary is not provided and labels are string. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Decides how to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by batch size. loss_fn: Optional loss function. classes_for_class_based_metrics: List of integer class IDs or string class names for which per-class metrics are evaluated. If integers, all must be in the range `[0, n_classes - 1]`. If strings, all must be in `label_vocabulary`. name: Name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. """ def __init__(self, n_classes, weight_column=None, thresholds=None, label_vocabulary=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, loss_fn=None, classes_for_class_based_metrics=None, name=None): if n_classes is None or n_classes < 2: raise ValueError('n_classes must be > 1 for multi-label classification. ' 'Given: {}'.format(n_classes)) thresholds = tuple(thresholds) if thresholds else tuple() for threshold in thresholds: if (threshold <= 0.0) or (threshold >= 1.0): raise ValueError( 'thresholds must be in (0, 1) range. Given: {}'.format(threshold)) if label_vocabulary is not None: if not isinstance(label_vocabulary, (list, tuple)): raise ValueError('label_vocabulary must be a list or tuple. ' 'Given type: {}'.format(type(label_vocabulary))) if len(label_vocabulary) != n_classes: raise ValueError('Length of label_vocabulary must be n_classes ({}). ' 'Given: {}'.format(n_classes, len(label_vocabulary))) if loss_fn: base_head.validate_loss_fn_args(loss_fn) base_head.validate_loss_reduction(loss_reduction) if classes_for_class_based_metrics: classes_for_class_based_metrics = tuple(classes_for_class_based_metrics) if isinstance(classes_for_class_based_metrics[0], six.string_types): if not label_vocabulary: raise ValueError('label_vocabulary must be provided when ' 'classes_for_class_based_metrics are strings.') class_ids = [] for class_string in classes_for_class_based_metrics: class_ids.append(label_vocabulary.index(class_string)) classes_for_class_based_metrics = tuple(class_ids) else: for class_id in classes_for_class_based_metrics: if (class_id < 0) or (class_id >= n_classes): raise ValueError( 'All classes_for_class_based_metrics must be in range [0, {}]. ' 'Given: {}'.format(n_classes - 1, class_id)) else: classes_for_class_based_metrics = tuple() self._n_classes = n_classes self._weight_column = weight_column self._thresholds = thresholds self._label_vocabulary = label_vocabulary self._loss_reduction = loss_reduction self._loss_fn = loss_fn self._classes_for_class_based_metrics = classes_for_class_based_metrics self._name = name # Metric keys. keys = metric_keys.MetricKeys self._loss_mean_key = self._summary_key(keys.LOSS_MEAN) self._auc_key = self._summary_key(keys.AUC) self._auc_pr_key = self._summary_key(keys.AUC_PR) self._loss_regularization_key = self._summary_key(keys.LOSS_REGULARIZATION) accuracy_keys = [] precision_keys = [] recall_keys = [] for threshold in self._thresholds: accuracy_keys.append( self._summary_key(keys.ACCURACY_AT_THRESHOLD % threshold)) precision_keys.append( self._summary_key(keys.PRECISION_AT_THRESHOLD % threshold)) recall_keys.append( self._summary_key(keys.RECALL_AT_THRESHOLD % threshold)) self._accuracy_keys = tuple(accuracy_keys) self._precision_keys = tuple(precision_keys) self._recall_keys = tuple(recall_keys) prob_keys = [] auc_keys = [] auc_pr_keys = [] for class_id in self._classes_for_class_based_metrics: if self._label_vocabulary is None: prob_key = keys.PROBABILITY_MEAN_AT_CLASS % class_id auc_key = keys.AUC_AT_CLASS % class_id auc_pr_key = keys.AUC_PR_AT_CLASS % class_id else: prob_key = ( keys.PROBABILITY_MEAN_AT_NAME % self._label_vocabulary[class_id]) auc_key = keys.AUC_AT_NAME % self._label_vocabulary[class_id] auc_pr_key = keys.AUC_PR_AT_NAME % self._label_vocabulary[class_id] prob_keys.append(self._summary_key(prob_key)) auc_keys.append(self._summary_key(auc_key)) auc_pr_keys.append(self._summary_key(auc_pr_key)) self._prob_keys = tuple(prob_keys) self._auc_keys = tuple(auc_keys) self._auc_pr_keys = tuple(auc_pr_keys) @property def name(self): """See `base_head.Head` for details.""" return self._name @property def logits_dimension(self): """See `base_head.Head` for details.""" return self._n_classes @property def loss_reduction(self): """See `base_head.Head` for details.""" return self._loss_reduction # An attribute for lookup table. Note that for Graph execution, the lookup # table is created on demand to make sure the lookup table is in the same # graph as its input tensors for `train` and `eval` of Estimator (as Estimator # re-creates graphs for `train`, `eval` and `predict`). _cached_class_id_table = None @property def _class_id_table(self): """Creates a lookup table for class_id. In eager execution, this lookup table will be lazily created on the first call of `self._class_id_table`, and cached for later use; In graph execution, it will be created on demand. Returns: A hash table for lookup. """ if self._cached_class_id_table is None or not tf.executing_eagerly(): self._cached_class_id_table = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup') return self._cached_class_id_table def _processed_labels(self, logits, labels): """Converts labels to integer id space.""" if labels is None: raise ValueError(base_head._LABEL_NONE_ERR_MSG) # pylint:disable=protected-access if isinstance(labels, tf.sparse.SparseTensor): label_values = labels.values if labels.dtype == tf.dtypes.string: label_ids_values = self._class_id_table.lookup(label_values) label_ids = tf.sparse.SparseTensor( indices=labels.indices, values=label_ids_values, dense_shape=labels.dense_shape) processed_labels = tf.sparse.to_indicator(label_ids, self._n_classes) else: if not label_values.dtype.is_integer: raise ValueError( 'Labels dtype should be integer. Instead got {}.'.format( label_values.dtype)) err_msg = (r'labels must be an integer SparseTensor with values in ' r'[0, {})'.format(self._n_classes)) label_values = base_head.check_label_range( labels.values, self._n_classes, message=err_msg) if tf.executing_eagerly(): processed_labels = tf.sparse.to_indicator(labels, self._n_classes) else: with tf.control_dependencies([label_values]): processed_labels = tf.sparse.to_indicator(labels, self._n_classes) processed_labels = tf.cast(processed_labels, dtype=tf.dtypes.int64) else: err_msg = ( r'labels must be an integer indicator Tensor with values in [0, 1]') processed_labels = base_head.check_label_range(labels, 2, message=err_msg) return base_head.check_dense_labels_match_logits_and_reshape( labels=processed_labels, logits=logits, expected_labels_dimension=self.logits_dimension) def _unweighted_loss_and_weights(self, logits, processed_labels, features): """Computes loss spec.""" if self._loss_fn: unweighted_loss = base_head.call_loss_fn( loss_fn=self._loss_fn, labels=processed_labels, logits=logits, features=features, expected_loss_dim=1) else: unweighted_loss = tf.compat.v1.losses.sigmoid_cross_entropy( multi_class_labels=processed_labels, logits=logits, reduction=tf.compat.v1.losses.Reduction.NONE) # Averages loss over classes. unweighted_loss = tf.math.reduce_mean( unweighted_loss, axis=-1, keepdims=True) weights = base_head.get_weights_and_check_match_logits( features=features, weight_column=self._weight_column, logits=logits) return unweighted_loss, weights def loss(self, labels, logits, features=None, mode=None, regularization_losses=None): """Returns regularized training loss. See `base_head.Head` for details.""" del mode # Unused for this head. with ops.name_scope( 'losses', values=(logits, labels, regularization_losses, features)): logits = base_head.check_logits_final_dim(logits, self.logits_dimension) processed_labels = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, processed_labels, features) training_loss = tf_keras_v2.__internal__.losses.compute_weighted_loss( unweighted_loss, sample_weight=weights, reduction=self._loss_reduction) regularization_loss = tf.math.add_n( regularization_losses) if regularization_losses is not None else None regularized_training_loss = ( training_loss + regularization_loss if regularization_loss is not None else training_loss) return regularized_training_loss def predictions(self, logits, keys=None): """Return predictions based on keys. See `base_head.Head` for details. Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. keys: a list of prediction keys. Key can be either the class variable of prediction_keys.PredictionKeys or its string value, such as: prediction_keys.PredictionKeys.LOGITS or 'logits'. Returns: A dict of predictions. """ pred_keys = prediction_keys.PredictionKeys valid_keys = [pred_keys.LOGITS, pred_keys.PROBABILITIES, pred_keys.CLASSES] if keys: base_head.check_prediction_keys(keys, valid_keys) else: keys = valid_keys logits = base_head.check_logits_final_dim(logits, self.logits_dimension) predictions = {} with ops.name_scope('predictions', values=(logits,)): if pred_keys.LOGITS in keys: predictions[pred_keys.LOGITS] = logits if pred_keys.PROBABILITIES in keys: probabilities = tf.math.sigmoid(logits, name=pred_keys.PROBABILITIES) predictions[pred_keys.PROBABILITIES] = probabilities if pred_keys.CLASSES in keys: predictions[pred_keys.CLASSES] = base_head.all_classes( logits, self._n_classes, self._label_vocabulary) return predictions def metrics(self, regularization_losses=None): """Creates metrics. See `base_head.Head` for details.""" keys = metric_keys.MetricKeys with ops.name_scope(None, 'metrics', (regularization_losses,)): # Mean metric. eval_metrics = {} eval_metrics[self._loss_mean_key] = tf_keras.metrics.Mean( name=keys.LOSS_MEAN) # The default summation_method is "interpolation" in the AUC metric. eval_metrics[self._auc_key] = tf_keras.metrics.AUC(name=keys.AUC) eval_metrics[self._auc_pr_key] = tf_keras.metrics.AUC( curve='PR', name=keys.AUC_PR) if regularization_losses is not None: eval_metrics[self._loss_regularization_key] = tf_keras.metrics.Mean( name=keys.LOSS_REGULARIZATION) for i, threshold in enumerate(self._thresholds): eval_metrics[self._accuracy_keys[i]] = tf_keras.metrics.BinaryAccuracy( name=self._accuracy_keys[i], threshold=threshold) eval_metrics[self._precision_keys[i]] = ( tf_keras.metrics.Precision( name=self._precision_keys[i], thresholds=threshold)) eval_metrics[self._recall_keys[i]] = tf_keras.metrics.Recall( name=self._recall_keys[i], thresholds=threshold) for i in range(len(self._classes_for_class_based_metrics)): eval_metrics[self._prob_keys[i]] = tf_keras.metrics.Mean( name=self._prob_keys[i]) eval_metrics[self._auc_keys[i]] = tf_keras.metrics.AUC( name=self._auc_keys[i]) eval_metrics[self._auc_pr_keys[i]] = tf_keras.metrics.AUC( curve='PR', name=self._auc_pr_keys[i]) return eval_metrics def update_metrics(self, eval_metrics, features, logits, labels, regularization_losses=None): """Updates eval metrics. See `base_head.Head` for details.""" logits = base_head.check_logits_final_dim(logits, self.logits_dimension) processed_labels = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, processed_labels, features) prob_key = prediction_keys.PredictionKeys.PROBABILITIES predictions = self.predictions(logits, [prob_key]) probabilities = predictions[prob_key] # Update metrics. eval_metrics[self._loss_mean_key].update_state( values=unweighted_loss, sample_weight=weights) eval_metrics[self._auc_key].update_state( y_true=processed_labels, y_pred=probabilities, sample_weight=weights) eval_metrics[self._auc_pr_key].update_state( y_true=processed_labels, y_pred=probabilities, sample_weight=weights) if regularization_losses is not None: regularization_loss = tf.math.add_n(regularization_losses) eval_metrics[self._loss_regularization_key].update_state( values=regularization_loss) for i in range(len(self._thresholds)): eval_metrics[self._accuracy_keys[i]].update_state( y_true=processed_labels, y_pred=probabilities, sample_weight=weights) eval_metrics[self._precision_keys[i]].update_state( y_true=processed_labels, y_pred=probabilities, sample_weight=weights) eval_metrics[self._recall_keys[i]].update_state( y_true=processed_labels, y_pred=probabilities, sample_weight=weights) for i, class_id in enumerate(self._classes_for_class_based_metrics): batch_rank = tf.rank(probabilities) - 1 begin = tf.concat( [tf.zeros([batch_rank], dtype=tf.dtypes.int32), [class_id]], axis=0) size = tf.concat([-1 * tf.ones([batch_rank], dtype=tf.dtypes.int32), [1]], axis=0) class_probabilities = tf.slice(probabilities, begin=begin, size=size) class_labels = tf.slice(processed_labels, begin=begin, size=size) base_head.update_metric_with_broadcast_weights( eval_metrics[self._prob_keys[i]], class_probabilities, weights) eval_metrics[self._auc_keys[i]].update_state( y_true=class_labels, y_pred=class_probabilities, sample_weight=weights) eval_metrics[self._auc_pr_keys[i]].update_state( y_true=class_labels, y_pred=class_probabilities, sample_weight=weights) return eval_metrics def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): """Returns an `model_fn._TPUEstimatorSpec`. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. mode: Estimator's `ModeKeys`. logits: logits `Tensor` with shape `[D0, D1, ... DN, n_classes]`. For many applications, the shape is `[batch_size, n_classes]`. labels: Labels with shape matching `logits`. Can be multi-hot `Tensor` with shape `[D0, D1, ... DN, n_classes]` or `SparseTensor` with `dense_shape` `[D0, D1, ... DN, ?]`. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`.able_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to set `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to avoid scaling errors. Returns: `model_fn._TPUEstimatorSpec`. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. """ with ops.name_scope(self._name, 'head'): # Predict. pred_keys = prediction_keys.PredictionKeys predictions = self.predictions(logits) if mode == ModeKeys.PREDICT: probabilities = predictions[pred_keys.PROBABILITIES] classifier_output = base_head.classification_output( scores=probabilities, n_classes=self._n_classes, label_vocabulary=self._label_vocabulary) return model_fn._TPUEstimatorSpec( # pylint:disable=protected-access mode=ModeKeys.PREDICT, predictions=predictions, export_outputs={ base_head.DEFAULT_SERVING_KEY: classifier_output, base_head.CLASSIFY_SERVING_KEY: classifier_output, base_head.PREDICT_SERVING_KEY: ( export_output.PredictOutput(predictions)) }) regularized_training_loss = self.loss( logits=logits, labels=labels, features=features, mode=mode, regularization_losses=regularization_losses) # Eval. if mode == ModeKeys.EVAL: eval_metrics = self.metrics(regularization_losses=regularization_losses) return model_fn._TPUEstimatorSpec( # pylint:disable=protected-access mode=ModeKeys.EVAL, predictions=predictions, loss=regularized_training_loss, eval_metrics=base_head.create_eval_metrics_tuple( self.update_metrics, { 'eval_metrics': eval_metrics, 'features': features, 'logits': logits, 'labels': labels, 'regularization_losses': regularization_losses })) # Train. train_op = base_head.create_estimator_spec_train_op( head_name=self._name, optimizer=optimizer, train_op_fn=train_op_fn, update_ops=update_ops, trainable_variables=trainable_variables, regularized_training_loss=regularized_training_loss, loss_reduction=self._loss_reduction) # Create summary. base_head.create_estimator_spec_summary( regularized_training_loss=regularized_training_loss, regularization_losses=regularization_losses, summary_key_fn=self._summary_key) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, train_op=train_op) ================================================ FILE: tensorflow_estimator/python/estimator/head/multi_label_head_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for multi_label_head.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import six import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.head import head_utils as test_lib from tensorflow_estimator.python.estimator.head import multi_label_head as head_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys def _sigmoid_cross_entropy(labels, logits): """Returns sigmoid cross entropy averaged over classes.""" sigmoid_logits = 1 / (1 + np.exp(-logits)) unreduced_result = (-labels * np.log(sigmoid_logits) - (1 - labels) * np.log(1 - sigmoid_logits)) # Mean over classes return np.mean(unreduced_result, axis=-1, keepdims=True) @test_util.run_all_in_graph_and_eager_modes class MultiLabelHead(tf.test.TestCase): def test_n_classes_is_none(self): with self.assertRaisesRegexp( ValueError, r'n_classes must be > 1 for multi-label classification\. Given: None'): head_lib.MultiLabelHead(n_classes=None) def test_n_classes_is_1(self): with self.assertRaisesRegexp( ValueError, r'n_classes must be > 1 for multi-label classification\. Given: 1'): head_lib.MultiLabelHead(n_classes=1) def test_threshold_too_small(self): with self.assertRaisesRegexp( ValueError, r'thresholds must be in \(0, 1\) range\. Given: 0\.0'): head_lib.MultiLabelHead(n_classes=2, thresholds=[0., 0.5]) def test_threshold_too_large(self): with self.assertRaisesRegexp( ValueError, r'thresholds must be in \(0, 1\) range\. Given: 1\.0'): head_lib.MultiLabelHead(n_classes=2, thresholds=[0.5, 1.0]) def test_label_vocabulary_dict(self): with self.assertRaisesRegexp( ValueError, r'label_vocabulary must be a list or tuple\. ' r'Given type: <(type|class) \'dict\'>'): head_lib.MultiLabelHead(n_classes=2, label_vocabulary={'foo': 'bar'}) def test_label_vocabulary_wrong_size(self): with self.assertRaisesRegexp( ValueError, r'Length of label_vocabulary must be n_classes \(3\). Given: 2'): head_lib.MultiLabelHead(n_classes=3, label_vocabulary=['foo', 'bar']) def test_invalid_loss_reduction(self): with self.assertRaisesRegexp( ValueError, r'Invalid loss_reduction: invalid_loss_reduction'): head_lib.MultiLabelHead( n_classes=3, loss_reduction='invalid_loss_reduction') with self.assertRaisesRegexp(ValueError, r'Invalid loss_reduction: none'): head_lib.MultiLabelHead( n_classes=3, loss_reduction=tf.losses.Reduction.NONE) def test_loss_fn_arg_labels_missing(self): def _loss_fn(logits): del logits # Unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: labels\. ' r'Given arguments: \(\'logits\',\)'): head_lib.MultiLabelHead(n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_logits_missing(self): def _loss_fn(labels): del labels # unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: logits\. ' r'Given arguments: \(\'labels\',\)'): head_lib.MultiLabelHead(n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_features_ok(self): def _loss_fn(labels, logits, features): del labels, logits, features # Unused head_lib.MultiLabelHead(n_classes=3, loss_fn=_loss_fn) def test_loss_fn_arg_invalid(self): def _loss_fn(labels, logits, name=None): del labels, logits, name # Unused with self.assertRaisesRegexp(ValueError, r'loss_fn has unexpected args: \[\'name\'\]'): head_lib.MultiLabelHead(n_classes=3, loss_fn=_loss_fn) def test_classes_for_class_based_metrics_invalid(self): with self.assertRaisesRegexp( ValueError, r'All classes_for_class_based_metrics must be in range \[0, 2\]\. ' r'Given: -1'): head_lib.MultiLabelHead( n_classes=3, classes_for_class_based_metrics=[2, -1]) def test_classes_for_class_based_metrics_string_invalid(self): with self.assertRaisesRegexp(ValueError, r'\'z\' is not in list'): head_lib.MultiLabelHead( n_classes=3, label_vocabulary=['a', 'b', 'c'], classes_for_class_based_metrics=['c', 'z']) def test_predict(self): n_classes = 4 head = head_lib.MultiLabelHead(n_classes) self.assertEqual(n_classes, head.logits_dimension) logits = np.array([[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32) expected_probabilities = tf.math.sigmoid(logits) expected_export_classes = [[b'0', b'1', b'2', b'3']] * 2 keys = prediction_keys.PredictionKeys preds = head.predictions(logits, [keys.LOGITS, keys.PROBABILITIES, keys.CLASSES]) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) self.assertAllClose(expected_probabilities, self.evaluate(preds[keys.PROBABILITIES])) if tf.executing_eagerly(): return spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertItemsEqual( (test_lib._DEFAULT_SERVING_KEY, 'predict', 'classification'), spec.export_outputs.keys()) # Assert predictions and export_outputs. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllEqual(expected_export_classes, predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllClose(logits, predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( expected_probabilities, predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose( expected_probabilities, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].scores)) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].classes)) def test_weight_should_not_impact_prediction(self): n_classes = 4 head = head_lib.MultiLabelHead(n_classes, weight_column='example_weights') self.assertEqual(n_classes, head.logits_dimension) logits = np.array([[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32) expected_probabilities = tf.math.sigmoid(logits) expected_export_classes = [[b'0', b'1', b'2', b'3']] * 2 weights_2x1 = [[1.], [2.]] features = { 'x': np.array(((42,),), dtype=np.int32), 'example_weights': weights_2x1 } keys = prediction_keys.PredictionKeys preds = head.predictions(logits, [keys.LOGITS, keys.PROBABILITIES, keys.CLASSES]) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) self.assertAllClose(expected_probabilities, self.evaluate(preds[keys.PROBABILITIES])) if tf.executing_eagerly(): return spec = head.create_estimator_spec( features=features, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions and export_outputs. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) predictions = sess.run(spec.predictions) self.assertAllEqual(expected_export_classes, predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllClose(logits, predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( expected_probabilities, predictions[prediction_keys.PredictionKeys.PROBABILITIES]) def test_eval_create_loss(self): """Tests head.loss for eval mode.""" n_classes = 2 head = head_lib.MultiLabelHead(n_classes) logits = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = (labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits))) / 2 expected_training_loss = 0.5 * np.sum( _sigmoid_cross_entropy(labels=labels, logits=logits)) actual_training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose(expected_training_loss, self.evaluate(actual_training_loss)) def test_eval_create_loss_large_logits(self): """Tests head.loss for eval mode and large logits.""" n_classes = 2 head = head_lib.MultiLabelHead(n_classes) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # For large logits, this is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits expected_training_loss = 0.5 * np.sum( np.array([[(10. + 10.) / 2.], [(15. + 0.) / 2.]], dtype=np.float32)) actual_training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) self.assertAllClose( expected_training_loss, self.evaluate(actual_training_loss), atol=1e-4) def test_eval_create_loss_labels_wrong_shape(self): """Tests head.loss for eval mode when labels has the wrong shape.""" n_classes = 2 head = head_lib.MultiLabelHead(n_classes) logits = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32) labels_2x1 = np.array([[1], [1]], dtype=np.int64) labels_2 = np.array([1, 1], dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'Expected labels dimension=2'): head.loss( logits=logits, labels=labels_2x1, features=features, mode=ModeKeys.EVAL) with self.assertRaisesRegexp(ValueError, 'Expected labels dimension=2'): head.loss( logits=logits, labels=labels_2, features=features, mode=ModeKeys.EVAL) else: labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.int64) actual_training_loss = head.loss( logits=logits, labels=labels_placeholder, features=features, mode=ModeKeys.EVAL) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 2\] \[labels_shape: \] \[2 1\]'): actual_training_loss.eval({labels_placeholder: labels_2x1}) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'labels shape must be \[D0, D1, ... DN, 2\]\..*' r'\[Received shape: \] \[2\]'): actual_training_loss.eval({labels_placeholder: labels_2}) def test_eval_create_loss_loss_fn(self): """Tests head.loss for eval mode and custom loss_fn.""" loss = np.array([[1.], [2.]], dtype=np.float32) logits_input = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels_input = np.array([[1, 0], [1, 1]], dtype=np.int64) def _loss_fn(labels, logits): check_labels = tf.debugging.Assert( tf.reduce_all(tf.math.equal(labels, labels_input)), data=[labels]) check_logits = tf.debugging.Assert( tf.reduce_all(tf.math.equal(logits, logits_input)), data=[logits]) with tf.control_dependencies([check_labels, check_logits]): return tf.constant(loss) head = head_lib.MultiLabelHead(n_classes=2, loss_fn=_loss_fn) actual_training_loss = head.loss( logits=logits_input, labels=labels_input, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL) self.assertAllClose(np.sum(loss) / 2., self.evaluate(actual_training_loss)) def test_eval_create_loss_loss_fn_wrong_shape(self): """Tests custom loss_fn that returns Tensor of unexpected shape.""" loss = np.array([1., 2.], dtype=np.float32) def _loss_fn(labels, logits): del labels, logits # Unused return tf.constant(loss) head = head_lib.MultiLabelHead(n_classes=2, loss_fn=_loss_fn) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'loss_shape'): head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) else: actual_training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.EVAL) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] ' r'\[logits_shape: \] \[2 2\] \[loss_shape: \] \[2\]'): self.evaluate(actual_training_loss) def test_eval_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib.MultiLabelHead(n_classes=2) with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.loss( logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), labels=None, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL) def _test_eval(self, head, logits, labels, expected_loss, expected_metrics, features=None, regularization_losses=None): tol = 1e-3 if tf.executing_eagerly(): loss = head.loss( labels, logits, features=features or {}, mode=ModeKeys.EVAL, regularization_losses=regularization_losses) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) eval_metrics = head.metrics(regularization_losses=regularization_losses) updated_metrics = head.update_metrics( eval_metrics, features or {}, logits, labels, regularization_losses=regularization_losses) self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}, rtol=tol, atol=tol) return spec = head.create_estimator_spec( features=features or {}, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertIsNotNone(spec.loss) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} loss, _ = sess.run((spec.loss, update_ops)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) # Check results of value ops (in `metrics`). self.assertAllClose( expected_metrics, {k: value_ops[k].eval() for k in value_ops}, rtol=tol, atol=tol) def test_eval(self): n_classes = 2 head = head_lib.MultiLabelHead(n_classes) logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # Sum over examples, divide by batch_size. expected_loss = 0.5 * np.sum( _sigmoid_cross_entropy(labels=labels, logits=logits)) keys = metric_keys.MetricKeys expected_metrics = { # Average loss over examples. keys.LOSS_MEAN: expected_loss, # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, keys.AUC_PR: 0.7689, } self._test_eval( head=head, logits=logits, labels=labels, expected_loss=expected_loss, expected_metrics=expected_metrics) def test_eval_sparse_labels(self): n_classes = 2 head = head_lib.MultiLabelHead(n_classes) logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) # Equivalent to multi_hot = [[1, 0], [1, 1]] labels = tf.sparse.SparseTensor( values=[0, 0, 1], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64) # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # Sum over examples, divide by batch_size. expected_loss = 0.5 * np.sum( _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits)) keys = metric_keys.MetricKeys expected_metrics = { # Average loss over examples. keys.LOSS_MEAN: expected_loss, # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, keys.AUC_PR: 0.7689, } self._test_eval( head=head, logits=logits, labels=labels, expected_loss=expected_loss, expected_metrics=expected_metrics) def test_eval_with_regularization_losses(self): n_classes = 2 head = head_lib.MultiLabelHead(n_classes) logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = sum( # labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits))) / batch_size expected_unregularized_loss = np.sum( _sigmoid_cross_entropy(labels=labels, logits=logits)) / 2. expected_regularized_loss = ( expected_unregularized_loss + expected_regularization_loss) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_unregularized_loss, keys.LOSS_REGULARIZATION: expected_regularization_loss, # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, keys.AUC_PR: 0.7689, } self._test_eval( head=head, logits=logits, labels=labels, expected_loss=expected_regularized_loss, expected_metrics=expected_metrics, regularization_losses=regularization_losses) def test_eval_with_label_vocabulary(self): n_classes = 2 head = head_lib.MultiLabelHead( n_classes, label_vocabulary=['class0', 'class1']) logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) # Equivalent to multi_hot = [[1, 0], [1, 1]] labels = tf.sparse.SparseTensor( values=['class0', 'class0', 'class1'], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64) # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # Sum over examples, divide by batch_size. expected_loss = 0.5 * np.sum( _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits)) keys = metric_keys.MetricKeys expected_metrics = { # Average loss over examples. keys.LOSS_MEAN: expected_loss, # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, keys.AUC_PR: 0.7689, } self._test_eval( head=head, logits=logits, labels=labels, expected_loss=expected_loss, expected_metrics=expected_metrics) def test_eval_with_label_vocabulary_with_multi_hot_input(self): n_classes = 2 head = head_lib.MultiLabelHead( n_classes, label_vocabulary=['class0', 'class1']) logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64) # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # Sum over examples, divide by batch_size. expected_loss = 0.5 * np.sum( _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits)) keys = metric_keys.MetricKeys expected_metrics = { # Average loss over examples. keys.LOSS_MEAN: expected_loss, # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, keys.AUC_PR: 0.7689, } self._test_eval( head=head, logits=logits, labels=labels_multi_hot, expected_loss=expected_loss, expected_metrics=expected_metrics) def test_eval_with_thresholds(self): n_classes = 2 thresholds = [0.25, 0.5, 0.75] head = head_lib.MultiLabelHead(n_classes, thresholds=thresholds) logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # Sum over examples, divide by batch_size. expected_loss = 0.5 * np.sum( _sigmoid_cross_entropy(labels=labels, logits=logits)) keys = metric_keys.MetricKeys expected_metrics = { # Average loss over examples. keys.LOSS_MEAN: expected_loss, # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, keys.AUC_PR: 0.7689, keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 2. / 4., keys.PRECISION_AT_THRESHOLD % thresholds[0]: 2. / 3., keys.RECALL_AT_THRESHOLD % thresholds[0]: 2. / 3., keys.ACCURACY_AT_THRESHOLD % thresholds[1]: 1. / 4., keys.PRECISION_AT_THRESHOLD % thresholds[1]: 1. / 2., keys.RECALL_AT_THRESHOLD % thresholds[1]: 1. / 3., keys.ACCURACY_AT_THRESHOLD % thresholds[2]: 2. / 4., keys.PRECISION_AT_THRESHOLD % thresholds[2]: 1. / 1., keys.RECALL_AT_THRESHOLD % thresholds[2]: 1. / 3., } self._test_eval( head=head, logits=logits, labels=labels, expected_loss=expected_loss, expected_metrics=expected_metrics) def test_eval_with_classes_for_class_based_metrics(self): head = head_lib.MultiLabelHead( n_classes=2, classes_for_class_based_metrics=[0, 1]) logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # Sum over examples, divide by batch_size. expected_loss = 0.5 * np.sum( _sigmoid_cross_entropy(labels=labels, logits=logits)) keys = metric_keys.MetricKeys expected_metrics = { # Average loss over examples. keys.LOSS_MEAN: expected_loss, # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, keys.AUC_PR: 0.7689, keys.PROBABILITY_MEAN_AT_CLASS % 0: tf.math.reduce_sum(tf.math.sigmoid(logits[:, 0])) / 2., keys.AUC_AT_CLASS % 0: 0., keys.AUC_PR_AT_CLASS % 0: 1., keys.PROBABILITY_MEAN_AT_CLASS % 1: tf.math.reduce_sum(tf.math.sigmoid(logits[:, 1])) / 2., keys.AUC_AT_CLASS % 1: 1., keys.AUC_PR_AT_CLASS % 1: 1., } self._test_eval( head=head, logits=logits, labels=labels, expected_loss=expected_loss, expected_metrics=expected_metrics) def test_eval_with_classes_for_class_based_metrics_string(self): head = head_lib.MultiLabelHead( n_classes=2, label_vocabulary=['a', 'b'], classes_for_class_based_metrics=['a', 'b']) logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) labels = tf.sparse.SparseTensor( values=['a', 'a', 'b'], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) labels_onehot = np.array([[1, 0], [1, 1]], dtype=np.int64) # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # Sum over examples, divide by batch_size. expected_loss = 0.5 * np.sum( _sigmoid_cross_entropy(labels=labels_onehot, logits=logits)) keys = metric_keys.MetricKeys expected_metrics = { # Average loss over examples. keys.LOSS_MEAN: expected_loss, # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, keys.AUC_PR: 0.7689, keys.PROBABILITY_MEAN_AT_NAME % 'a': tf.math.reduce_sum(tf.math.sigmoid(logits[:, 0])) / 2., keys.AUC_AT_NAME % 'a': 0., keys.AUC_PR_AT_NAME % 'a': 1., keys.PROBABILITY_MEAN_AT_NAME % 'b': tf.math.reduce_sum(tf.math.sigmoid(logits[:, 1])) / 2., keys.AUC_AT_NAME % 'b': 1., keys.AUC_PR_AT_NAME % 'b': 1., } self._test_eval( head=head, logits=logits, labels=labels, expected_loss=expected_loss, expected_metrics=expected_metrics) def test_eval_with_weights(self): n_classes = 2 head = head_lib.MultiLabelHead(n_classes, weight_column='example_weights') logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) features = { 'x': np.array([[41], [42]], dtype=np.int32), 'example_weights': np.array([[1.], [2.]], dtype=np.float32), } # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # Average over classes, weighted sum over examples, divide by batch_size. # loss = (1 * (10 + 10) / 2 + 2 * (15 + 0) / 2) / 2 expected_loss = 12.5 keys = metric_keys.MetricKeys expected_metrics = { # Average loss over weighted examples (denominator is sum(weights)). keys.LOSS_MEAN: expected_loss * (2. / 3.), # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.2000, keys.AUC_PR: 0.7280, } self._test_eval( head=head, logits=logits, labels=labels, expected_loss=expected_loss, expected_metrics=expected_metrics, features=features) def test_train_create_loss_large_logits(self): """Tests head.create_loss for train mode and large logits.""" n_classes = 2 head = head_lib.MultiLabelHead(n_classes, weight_column='example_weights') logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) weights = np.array([[1.], [2.]], dtype=np.float32) features = { 'x': np.array(((42,),), dtype=np.int32), 'example_weights': weights } # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # For large logits, this is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits # expected_unreduced_loss = [[(10. + 10.) / 2.], [(15. + 0.) / 2.]] # expected_weights = [[1.], [2.]] expected_training_loss = (1. * (10. + 10.) / 2. + 2. * (15. + 0.) / 2.) / 2. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose( expected_training_loss, self.evaluate(training_loss), atol=1e-4) def test_train_create_loss_loss_reduction(self): """Tests head.create_loss with loss_reduction.""" n_classes = 2 head = head_lib.MultiLabelHead( n_classes, weight_column='example_weights', loss_reduction=tf.losses.Reduction.SUM) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) weights = np.array([[1.], [2.]], dtype=np.float32) # loss = labels * -log(sigmoid(logits)) + # (1 - labels) * -log(1 - sigmoid(logits)) # For large logits, this is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits # expected_unreduced_loss = [[(10. + 10.) / 2.], [(15. + 0.) / 2.]] # expected_weights = [[1.], [2.]] expected_training_loss = (1. * (10. + 10.) + 2. * (15. + 0.)) / 2. training_loss = head.loss( logits=logits, labels=labels, features={ 'x': np.array(((42,),), dtype=np.int32), 'example_weights': weights }, mode=ModeKeys.TRAIN) self.assertAllClose( expected_training_loss, self.evaluate(training_loss), atol=1e-4) def test_train_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib.MultiLabelHead(n_classes=2) with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.loss( logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), labels=None, features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN) def test_train_invalid_indicator_labels(self): head = head_lib.MultiLabelHead(n_classes=2) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) # The value 2 is outside the allowed range. labels = np.array([[2, 0], [1, 1]], dtype=np.int64) if tf.executing_eagerly(): with self.assertRaisesRegexp( ValueError, r'labels must be an integer indicator Tensor with values in ' r'\[0, 1\]'): head.loss( logits=logits, labels=labels, features={}, mode=ModeKeys.TRAIN) return def _train_op_fn(loss): del loss return tf.no_op() with self.cached_session() as sess: with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'labels must be an integer indicator Tensor with values in ' r'\[0, 1\]'): spec = head.create_estimator_spec( features={}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) test_lib._initialize_variables(self, spec.scaffold) sess.run(spec.loss) def test_train_invalid_sparse_labels(self): head = head_lib.MultiLabelHead(n_classes=2) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) # The value 2 is outside the allowed range. labels = tf.sparse.SparseTensor( values=[2, 0, 1], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) if tf.executing_eagerly(): with self.assertRaisesRegexp( ValueError, r'labels must be an integer SparseTensor with values in \[0, 2\)'): head.loss( logits=logits, labels=labels, features={}, mode=ModeKeys.TRAIN) return def _train_op_fn(loss): del loss return tf.no_op() with self.cached_session() as sess: with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'labels must be an integer SparseTensor with values in \[0, 2\)'): spec = head.create_estimator_spec( features={}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) test_lib._initialize_variables(self, spec.scaffold) sess.run(spec.loss) def _test_train(self, head, logits, labels, expected_loss): tol = 1e-3 features = {'x': np.array(((42,),), dtype=np.int32)} if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries( self, {metric_keys.MetricKeys.LOSS: expected_loss}, summary_str, tol) def test_train(self): head = head_lib.MultiLabelHead(n_classes=2) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # Average over classes, sum over examples, divide by batch_size. # loss = ((10 + 10) / 2 + (15 + 0) / 2 ) / 2 expected_loss = 8.75 self._test_train( head=head, logits=logits, labels=labels, expected_loss=expected_loss) def test_train_sparse_labels(self): head = head_lib.MultiLabelHead(n_classes=2) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) # Equivalent to multi_hot = [[1, 0], [1, 1]] labels = tf.sparse.SparseTensor( values=[0, 0, 1], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # Average over classes, sum over examples, divide by batch_size. # loss = ((10 + 10) / 2 + (15 + 0) / 2 ) / 2 expected_loss = 8.75 self._test_train( head=head, logits=logits, labels=labels, expected_loss=expected_loss) def test_train_with_label_vocabulary(self): head = head_lib.MultiLabelHead( n_classes=2, label_vocabulary=['class0', 'class1']) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) # Equivalent to multi_hot = [[1, 0], [1, 1]] labels = tf.sparse.SparseTensor( values=['class0', 'class0', 'class1'], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # Average over classes, sum over examples, divide by batch_size. # loss = ((10 + 10) / 2 + (15 + 0) / 2 ) / 2 expected_loss = 8.75 self._test_train( head=head, logits=logits, labels=labels, expected_loss=expected_loss) def test_train_with_regularization_losses(self): head = head_lib.MultiLabelHead(n_classes=2) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) regularization_losses = [1.5, 0.5] features = {'x': np.array(((42,),), dtype=np.int32)} # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # Average over classes and over batch and add regularization loss. expected_loss = 35. / 4. + 2. expected_summaries = { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_REGULARIZATION: 2., } tol = 1e-3 loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN, regularization_losses=regularization_losses) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, self.evaluate(loss), rtol=tol, atol=tol) if tf.executing_eagerly(): return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries(self, expected_summaries, summary_str, tol) def test_train_with_weights(self): n_classes = 2 head = head_lib.MultiLabelHead(n_classes, weight_column='example_weights') logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) features = { 'x': np.array([[41], [42]], dtype=np.int32), 'example_weights': np.array([[1.], [2.]], dtype=np.float32), } # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # Average over classes, weighted sum over examples, divide by batch_size. # loss = (1 * (10 + 10) / 2 + 2 * (15 + 0) / 2) / 2 expected_loss = 12.5 tol = 1e-3 loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, self.evaluate(loss), rtol=tol, atol=tol) if tf.executing_eagerly(): return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) loss, train_result, summary_str = sess.run( (spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str, tol) def test_multi_dim_weighted_train_create_loss(self): """Logits and labels of shape [2, 2, 3], weights [2, 2].""" head = head_lib.MultiLabelHead(n_classes=3, weight_column='weights') logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) labels = np.array([[[1, 0, 0], [1, 0, 0]], [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # unreduced_loss = # [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 # = [[20/3, 10/3], [4, 8]] # expected_unreduced_loss = [[[20./3.], [10./3.]], [[4.], [8.]]] # weights are reshaped to [2, 2, 1] to match logits. # expected_weights = [[[1.], [1.5]], [[2.], [2.5]]] # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167 expected_training_loss = 9.9167 training_loss = head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) atol = 1.e-3 self.assertAllClose( expected_training_loss, self.evaluate(training_loss), atol=atol) def test_multi_dim_weighted_train(self): """Logits and labels of shape [2, 2, 3], weights [2, 2].""" head = head_lib.MultiLabelHead(n_classes=3, weight_column='weights') logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) labels = np.array([[[1, 0, 0], [1, 0, 0]], [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 # = [[20/3, 10/3], [4, 8]] # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167 expected_loss = 9.9167 atol = 1.e-3 loss = head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, self.evaluate(loss), atol=atol) if tf.executing_eagerly(): return expected_train_result = 'my_train_op' def _train_op_fn(loss): return tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, atol=atol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) def test_multi_dim_weights_wrong_inner_dim(self): """Logits and labels of shape [2, 2, 3], weights [2, 1].""" head = head_lib.MultiLabelHead(n_classes=3, weight_column='weights') logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) labels = np.array([[[1, 0, 0], [1, 0, 0]], [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) weights = np.array([[1.], [2.]], dtype=np.float32) if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'weights shape'): head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) return def _train_op_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'): spec.loss.eval() def test_multi_dim_weights_wrong_outer_dim(self): """Logits and labels of shape [2, 2, 3], weights [2, 2, 3].""" head = head_lib.MultiLabelHead(n_classes=3, weight_column='weights') logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) labels = np.array([[[1, 0, 0], [1, 0, 0]], [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) weights = np.array( [[[1., 1., 1.], [1.5, 1.5, 1.5]], [[2., 2., 2.], [2.5, 2.5, 2.5]]], dtype=np.float32) if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'weights shape'): head.loss( logits=logits, labels=labels, features={'weights': weights}, mode=ModeKeys.TRAIN) return weights_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) def _train_op_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features={'weights': weights_placeholder}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 2 3\]'): spec.loss.eval({weights_placeholder: weights}) def test_multi_dim_weighted_eval(self): """Logits and labels of shape [2, 2, 3], weights [2, 2].""" head = head_lib.MultiLabelHead(n_classes=3, weight_column='weights') logits = np.array([[[-10., 10., -10.], [10., -10., 10.]], [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32) labels = np.array([[[1, 0, 0], [1, 0, 0]], [[0, 1, 1], [0, 1, 1]]], dtype=np.int64) weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32) # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3 # = [[20/3, 10/3], [4, 8]] # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167 expected_loss = 9.9167 keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss * (4. / np.sum(weights)), # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.4977, keys.AUC_PR: 0.5461, } self._test_eval( head=head, features={'weights': weights}, logits=logits, labels=labels, expected_loss=expected_loss, expected_metrics=expected_metrics) @test_util.deprecated_graph_mode_only class MultiLabelHeadForEstimator(tf.test.TestCase): """Tests for create_estimator_spec running in Graph mode only.""" def test_invalid_trainable_variables(self): head = head_lib.MultiLabelHead(n_classes=2) class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params return [ tf.strings.join([ tf.constant('my_train_op'), tf.strings.as_string(loss, precision=2) ]) ] def get_config(self): config = super(_Optimizer, self).get_config() return config with self.assertRaisesRegexp(ValueError, r'trainable_variables cannot be None'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), labels=np.array([[1, 0], [1, 1]], dtype=np.int64), optimizer=_Optimizer('my_optimizer'), trainable_variables=None) with self.assertRaisesRegexp( ValueError, r'trainable_variables should be a list or a tuple'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), labels=np.array([[1, 0], [1, 1]], dtype=np.int64), optimizer=_Optimizer('my_optimizer'), trainable_variables={ 'var_list': [tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)] }) def test_train_with_optimizer(self): head = head_lib.MultiLabelHead(n_classes=2) logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} # For large logits, sigmoid cross entropy loss is approximated as: # loss = labels * (logits < 0) * (-logits) + # (1 - labels) * (logits > 0) * logits => # expected_unweighted_loss = [[10., 10.], [15., 0.]] # Average over classes, sum over examples, divide by batch_size. # loss = ((10 + 10) / 2 + (15 + 0) / 2 ) / 2 expected_loss = 8.75 expected_train_result = 'my_train_op' class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params return [ tf.strings.join([ tf.constant(expected_train_result), tf.strings.as_string(loss, precision=3) ]) ] def get_config(self): config = super(_Optimizer, self).get_config() return config spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=_Optimizer('my_optimizer'), trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) tol = 1e-3 with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) self.assertEqual( six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) def test_predict_with_label_vocabulary(self): n_classes = 4 head = head_lib.MultiLabelHead( n_classes, label_vocabulary=['foo', 'bar', 'foobar', 'barfoo']) logits = np.array([[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32) expected_export_classes = [[b'foo', b'bar', b'foobar', b'barfoo']] * 2 spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) predictions = sess.run(spec.predictions) self.assertAllEqual(expected_export_classes, predictions[prediction_keys.PredictionKeys.CLASSES]) self.assertAllClose(logits, predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllEqual( expected_export_classes, sess.run(spec.export_outputs[test_lib._DEFAULT_SERVING_KEY].classes)) def test_train_with_update_ops(self): with tf.Graph().as_default(): w = tf.Variable(1) update_op = w.assign_add(1) t = tf.Variable('') expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return t.assign(expected_train_result) head = head_lib.MultiLabelHead(n_classes=2) spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), labels=np.array([[1, 0], [1, 1]], dtype=np.int64), train_op_fn=_train_op_fn, update_ops=[update_op], trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) sess.run(spec.train_op) w_value, t_value = sess.run([w, t]) self.assertEqual(2, w_value) self.assertEqual(expected_train_result, t_value) def test_lookup_tables_in_graph(self): n_classes = 2 head = head_lib.MultiLabelHead( n_classes=n_classes, label_vocabulary=['class0', 'class1']) feature_columns = [tf.feature_column.numeric_column('x')] # Create dnn estimator. est = dnn.DNNEstimatorV2( head=head, hidden_units=(2, 2), feature_columns=feature_columns) def input_fn(): return ({ 'x': np.array(((42,), (43,),), dtype=np.int32) }, np.array([[1, 0], [1, 1]], dtype=np.int64)) # Train. num_steps = 1 est.train(input_fn, steps=num_steps) # Eval. eval_results = est.evaluate(input_fn, steps=num_steps) self.assertEqual(num_steps, eval_results[tf.compat.v1.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(eval_results)) # Predict. est.predict(input_fn) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/head/regression_head.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Regression head.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.head import base_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys from tensorflow_estimator.python.estimator.util import tf_keras_v2 @estimator_export('estimator.RegressionHead') class RegressionHead(base_head.Head): """Creates a `Head` for regression using the `mean_squared_error` loss. The loss is the weighted sum over all input dimensions. Namely, if the input labels have shape `[batch_size, label_dimension]`, the loss is the weighted sum over both `batch_size` and `label_dimension`. The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`. In many applications, the shape is `[batch_size, label_dimension]`. The `labels` shape must match `logits`, namely `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape `[D0, D1, ... DN]` is also supported. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN, label_dimension]`. Supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or `(labels, logits, features, loss_reduction)` as arguments and returns unreduced loss with shape `[D0, D1, ... DN, label_dimension]`. Also supports custom `inverse_link_fn`, also known as 'mean function'. `inverse_link_fn` is only used in `PREDICT` mode. It takes `logits` as argument and returns predicted values. This function is the inverse of the link function defined in https://en.wikipedia.org/wiki/Generalized_linear_model#Link_function Namely, for poisson regression, set `inverse_link_fn=tf.exp`. Usage: >>> head = tf.estimator.RegressionHead() >>> logits = np.array(((45,), (41,),), dtype=np.float32) >>> labels = np.array(((43,), (44,),), dtype=np.int32) >>> features = {'x': np.array(((42,),), dtype=np.float32)} >>> # expected_loss = weighted_loss / batch_size >>> # = (43-45)^2 + (44-41)^2 / 2 = 6.50 >>> loss = head.loss(labels, logits, features=features) >>> print('{:.2f}'.format(loss.numpy())) 6.50 >>> eval_metrics = head.metrics() >>> updated_metrics = head.update_metrics( ... eval_metrics, features, logits, labels) >>> for k in sorted(updated_metrics): ... print('{} : {:.2f}'.format(k, updated_metrics[k].result().numpy())) average_loss : 6.50 label/mean : 43.50 prediction/mean : 43.00 >>> preds = head.predictions(logits) >>> print(preds['predictions']) tf.Tensor( [[45.] [41.]], shape=(2, 1), dtype=float32) Usage with a canned estimator: ```python my_head = tf.estimator.RegressionHead() my_estimator = tf.estimator.DNNEstimator( head=my_head, hidden_units=..., feature_columns=...) ``` It can also be used with a custom `model_fn`. Example: ```python def _my_model_fn(features, labels, mode): my_head = tf.estimator.RegressionHead() logits = tf_keras.Model(...)(features) return my_head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=tf_keras.optimizers.Adagrad(lr=0.1), logits=logits) my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn) ``` Args: weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. label_dimension: Number of regression labels per example. This is the size of the last dimension of the labels `Tensor` (typically, this has shape `[batch_size, label_dimension]`). loss_reduction: One of `tf.losses.Reduction` except `NONE`. Decides how to reduce training loss over batch and label dimension. Defaults to `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by `batch_size * label_dimension`. loss_fn: Optional loss function. Defaults to `mean_squared_error`. inverse_link_fn: Optional inverse link function, also known as 'mean function'. Defaults to identity. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. """ def __init__(self, label_dimension=1, weight_column=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, loss_fn=None, inverse_link_fn=None, name=None): if label_dimension < 1: raise ValueError('Invalid label_dimension {}.'.format(label_dimension)) base_head.validate_loss_reduction(loss_reduction) if loss_fn: base_head.validate_loss_fn_args(loss_fn) self._logits_dimension = label_dimension self._weight_column = weight_column self._loss_reduction = loss_reduction self._loss_fn = loss_fn self._inverse_link_fn = inverse_link_fn self._name = name # Metric keys. keys = metric_keys.MetricKeys self._loss_mean_key = self._summary_key(keys.LOSS_MEAN) self._prediction_mean_key = self._summary_key(keys.PREDICTION_MEAN) self._label_mean_key = self._summary_key(keys.LABEL_MEAN) self._loss_regularization_key = self._summary_key(keys.LOSS_REGULARIZATION) @property def name(self): """See `base_head.Head` for details.""" return self._name @property def logits_dimension(self): """See `base_head.Head` for details.""" return self._logits_dimension @property def loss_reduction(self): """See `base_head.Head` for details.""" return self._loss_reduction def _processed_labels(self, logits, labels): labels = base_head.check_dense_labels_match_logits_and_reshape( labels=labels, logits=logits, expected_labels_dimension=self._logits_dimension) labels = tf.cast(labels, dtype=tf.dtypes.float32) return labels def _unweighted_loss_and_weights(self, logits, labels, features): """Computes unweighted loss and weights.""" if self._loss_fn: unweighted_loss = base_head.call_loss_fn( loss_fn=self._loss_fn, labels=labels, logits=logits, features=features, expected_loss_dim=self._logits_dimension) else: unweighted_loss = tf.compat.v1.losses.mean_squared_error( labels=labels, predictions=logits, reduction=tf.compat.v1.losses.Reduction.NONE) weights = base_head.get_weights_and_check_match_logits( features=features, weight_column=self._weight_column, logits=logits, allow_per_logit_weights=True) return unweighted_loss, weights def loss(self, labels, logits, features=None, mode=None, regularization_losses=None): """Return predictions based on keys. See `base_head.Head` for details.""" del mode # Unused for this head. with ops.name_scope( 'losses', values=(logits, labels, regularization_losses, features)): logits = base_head.check_logits_final_dim(logits, self._logits_dimension) labels = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, labels, features) training_loss = tf_keras_v2.__internal__.losses.compute_weighted_loss( unweighted_loss, sample_weight=weights, reduction=self._loss_reduction) regularization_loss = tf.math.add_n( regularization_losses) if regularization_losses is not None else None regularized_training_loss = ( training_loss + regularization_loss if regularization_loss is not None else training_loss) return regularized_training_loss def predictions(self, logits): """Return predictions based on keys. See `base_head.Head` for details. Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. Returns: A dict of predictions. """ logits = base_head.check_logits_final_dim(logits, self._logits_dimension) pred_keys = prediction_keys.PredictionKeys with ops.name_scope('predictions', values=(logits,)): if self._inverse_link_fn: predicted_value = self._inverse_link_fn(logits) predictions = { pred_keys.PREDICTIONS: predicted_value, pred_keys.LOGITS: logits, } else: predicted_value = logits predictions = {pred_keys.PREDICTIONS: predicted_value} return predictions def metrics(self, regularization_losses=None): """Creates metrics. See `base_head.Head` for details.""" with ops.name_scope('metrics', values=(regularization_losses,)): keys = metric_keys.MetricKeys eval_metrics = {} eval_metrics[self._loss_mean_key] = tf_keras.metrics.Mean( name=keys.LOSS_MEAN) eval_metrics[self._prediction_mean_key] = tf_keras.metrics.Mean( name=keys.PREDICTION_MEAN) eval_metrics[self._label_mean_key] = tf_keras.metrics.Mean( name=keys.LABEL_MEAN) if regularization_losses is not None: eval_metrics[self._loss_regularization_key] = tf_keras.metrics.Mean( name=keys.LOSS_REGULARIZATION) return eval_metrics def update_metrics(self, eval_metrics, features, logits, labels, regularization_losses=None): """Updates eval metrics. See `base_head.Head` for details.""" # Compute predictions. predictions = self.predictions(logits) predicted_value = predictions[prediction_keys.PredictionKeys.PREDICTIONS] logits = base_head.check_logits_final_dim(logits, self.logits_dimension) label_ids = self._processed_labels(logits, labels) unweighted_loss, weights = self._unweighted_loss_and_weights( logits, label_ids, features) # Update metrics. eval_metrics[self._loss_mean_key].update_state( values=unweighted_loss, sample_weight=weights) eval_metrics[self._label_mean_key].update_state( values=labels, sample_weight=weights) base_head.update_metric_with_broadcast_weights( eval_metrics[self._prediction_mean_key], predicted_value, weights) if regularization_losses is not None: regularization_loss = tf.math.add_n(regularization_losses) eval_metrics[self._loss_regularization_key].update_state( values=regularization_loss) return eval_metrics def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): """Returns an `EstimatorSpec`. Args: features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. Often to be used to fetch example-weight tensor. mode: Estimator's `ModeKeys`. logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. labels: Labels `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, logits_dimension]`. When `logits_dimension=1`, shape `[D0, D1, ... DN]` is also supported. `labels` is a required argument when `mode` equals `TRAIN` or `EVAL`. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns `train_op`. Used if `optimizer` is `None`. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to set `loss_reduction=SUM_OVER_BATCH_SIZE` when creating the head to avoid scaling errors. Returns: A `model_fn._TPUEstimatorSpec` instance. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. """ with ops.name_scope(self._name, 'head'): # Predict. predictions = self.predictions(logits) if mode == ModeKeys.PREDICT: keys = prediction_keys.PredictionKeys regression_output = export_output.RegressionOutput( value=predictions[keys.PREDICTIONS]) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.PREDICT, predictions=predictions, export_outputs={ base_head.DEFAULT_SERVING_KEY: regression_output, base_head.REGRESS_SERVING_KEY: regression_output, base_head.PREDICT_SERVING_KEY: export_output.PredictOutput( predictions) }) regularized_training_loss = self.loss( logits=logits, labels=labels, features=features, mode=mode, regularization_losses=regularization_losses) # Eval. if mode == ModeKeys.EVAL: eval_metrics = self.metrics(regularization_losses=regularization_losses) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.EVAL, predictions=predictions, loss=regularized_training_loss, eval_metrics=base_head.create_eval_metrics_tuple( self.update_metrics, { 'eval_metrics': eval_metrics, 'features': features, 'logits': logits, 'labels': labels, 'regularization_losses': regularization_losses })) # Train. train_op = base_head.create_estimator_spec_train_op( head_name=self._name, optimizer=optimizer, train_op_fn=train_op_fn, update_ops=update_ops, trainable_variables=trainable_variables, regularized_training_loss=regularized_training_loss, loss_reduction=self._loss_reduction) # Create summary. base_head.create_estimator_spec_summary( regularized_training_loss=regularized_training_loss, regularization_losses=regularization_losses, summary_key_fn=self._summary_key) return model_fn._TPUEstimatorSpec( # pylint: disable=protected-access mode=ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, train_op=train_op) @estimator_export('estimator.PoissonRegressionHead') class PoissonRegressionHead(RegressionHead): """Creates a `Head` for poisson regression using `tf.nn.log_poisson_loss`. The loss is the weighted sum over all input dimensions. Namely, if the input labels have shape `[batch_size, label_dimension]`, the loss is the weighted sum over both `batch_size` and `label_dimension`. The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`. In many applications, the shape is `[batch_size, label_dimension]`. The `labels` shape must match `logits`, namely `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape `[D0, D1, ... DN]` is also supported. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN, label_dimension]`. This is implemented as a generalized linear model, see https://en.wikipedia.org/wiki/Generalized_linear_model. The head can be used with a canned estimator. Example: ```python my_head = tf.estimator.PoissonRegressionHead() my_estimator = tf.estimator.DNNEstimator( head=my_head, hidden_units=..., feature_columns=...) ``` It can also be used with a custom `model_fn`. Example: ```python def _my_model_fn(features, labels, mode): my_head = tf.estimator.PoissonRegressionHead() logits = tf_keras.Model(...)(features) return my_head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=tf_keras.optimizers.Adagrad(lr=0.1), logits=logits) my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn) ``` Args: weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. label_dimension: Number of regression labels per example. This is the size of the last dimension of the labels `Tensor` (typically, this has shape `[batch_size, label_dimension]`). loss_reduction: One of `tf.losses.Reduction` except `NONE`. Decides how to reduce training loss over batch and label dimension. Defaults to `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by `batch size * label_dimension`. compute_full_loss: Whether to include the constant `log(z!)` term in computing the poisson loss. See `tf.nn.log_poisson_loss` for the full documentation. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. """ def __init__(self, label_dimension=1, weight_column=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, compute_full_loss=True, name=None): self._compute_full_loss = compute_full_loss super(PoissonRegressionHead, self).__init__( label_dimension=label_dimension, weight_column=weight_column, loss_reduction=loss_reduction, loss_fn=self._poisson_loss, inverse_link_fn=tf.math.exp, name=name) def _poisson_loss(self, labels, logits): return tf.nn.log_poisson_loss( targets=labels, log_input=logits, compute_full_loss=self._compute_full_loss) @estimator_export('estimator.LogisticRegressionHead') class LogisticRegressionHead(RegressionHead): """Creates a `Head` for logistic regression. Uses `sigmoid_cross_entropy_with_logits` loss, which is the same as `BinaryClassHead`. The differences compared to `BinaryClassHead` are: * Does not support `label_vocabulary`. Instead, labels must be float in the range [0, 1]. * Does not calculate some metrics that do not make sense, such as AUC. * In `PREDICT` mode, only returns logits and predictions (`=tf.sigmoid(logits)`), whereas `BinaryClassHead` also returns probabilities, classes, and class_ids. * Export output defaults to `RegressionOutput`, whereas `BinaryClassHead` defaults to `PredictOutput`. The head expects `logits` with shape `[D0, D1, ... DN, 1]`. In many applications, the shape is `[batch_size, 1]`. The `labels` shape must match `logits`, namely `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`. This is implemented as a generalized linear model, see https://en.wikipedia.org/wiki/Generalized_linear_model. The head can be used with a canned estimator. Example: ```python my_head = tf.estimator.LogisticRegressionHead() my_estimator = tf.estimator.DNNEstimator( head=my_head, hidden_units=..., feature_columns=...) ``` It can also be used with a custom `model_fn`. Example: ```python def _my_model_fn(features, labels, mode): my_head = tf.estimator.LogisticRegressionHead() logits = tf_keras.Model(...)(features) return my_head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=tf_keras.optimizers.Adagrad(lr=0.1), logits=logits) my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn) ``` Args: weight_column: A string or a `NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Decides how to reduce training loss over batch and label dimension. Defaults to `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by `batch size * label_dimension`. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. Also used as `name_scope` when creating ops. """ def _logistic_loss(self, labels, logits): labels = base_head.check_label_range( labels, n_classes=2, message='Labels must be in range [0, 1]') return tf.compat.v1.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) def __init__(self, weight_column=None, loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, name=None): super(LogisticRegressionHead, self).__init__( label_dimension=1, weight_column=weight_column, loss_reduction=loss_reduction, loss_fn=self._logistic_loss, inverse_link_fn=tf.math.sigmoid, name=name) ================================================ FILE: tensorflow_estimator/python/estimator/head/regression_head_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for regression_head.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.head import head_utils as test_lib from tensorflow_estimator.python.estimator.head import regression_head as head_lib from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.mode_keys import ModeKeys @test_util.run_all_in_graph_and_eager_modes class RegressionHead(tf.test.TestCase): def test_invalid_label_dimension(self): with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'): head_lib.RegressionHead(label_dimension=-1) with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'): head_lib.RegressionHead(label_dimension=0) def test_invalid_loss_reduction(self): with self.assertRaisesRegexp( ValueError, r'Invalid loss_reduction: invalid_loss_reduction'): head_lib.RegressionHead(loss_reduction='invalid_loss_reduction') with self.assertRaisesRegexp(ValueError, r'Invalid loss_reduction: none'): head_lib.RegressionHead(loss_reduction=tf.losses.Reduction.NONE) def test_loss_fn_arg_labels_missing(self): def _loss_fn(logits): del logits # Unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: labels\. ' r'Given arguments: \(\'logits\',\)'): head_lib.RegressionHead(loss_fn=_loss_fn) def test_loss_fn_arg_logits_missing(self): def _loss_fn(labels): del labels # unused with self.assertRaisesRegexp( ValueError, r'loss_fn must contain argument: logits\. ' r'Given arguments: \(\'labels\',\)'): head_lib.RegressionHead(loss_fn=_loss_fn) def test_loss_fn_arg_features_ok(self): def _loss_fn(labels, logits, features): del labels, logits, features # Unused head_lib.RegressionHead(loss_fn=_loss_fn) def test_loss_fn_arg_invalid(self): def _loss_fn(labels, logits, name=None): del labels, logits, name # Unused with self.assertRaisesRegexp(ValueError, r'loss_fn has unexpected args: \[\'name\'\]'): head_lib.RegressionHead(loss_fn=_loss_fn) def test_invalid_logits(self): """Label dimension is 3, logits shape [1, 2, 1].""" head = head_lib.RegressionHead(label_dimension=3) self.assertEqual(3, head.logits_dimension) logits_1d = np.array(((45.,), (41.,),)) # Static shape. with self.assertRaisesRegexp(ValueError, 'logits shape'): pred = head.predictions(logits_1d) self.evaluate(pred[prediction_keys.PredictionKeys.PREDICTIONS]) if tf.executing_eagerly(): return # Dynamic shape only works in Graph mode. logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': np.array(((42.,),))}, mode=ModeKeys.PREDICT, logits=logits_placeholder, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.predictions[prediction_keys.PredictionKeys.PREDICTIONS].eval( {logits_placeholder: logits_1d}) def test_incompatible_labels_eval(self): head = head_lib.RegressionHead(label_dimension=3) self.assertEqual(3, head.logits_dimension) values_3d = np.array(((45., 46., 47.), (41., 42., 43.),)) values_1d = np.array(((43.,), (44.,),)) # Static shape. if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.loss( logits=values_3d, labels=values_1d, features={'x': values_1d}, mode=ModeKeys.EVAL) return # Dynamic shape only works in Graph mode. with self.assertRaisesRegexp(ValueError, 'logits shape'): head.create_estimator_spec( features={'x': values_3d}, labels=values_3d, mode=ModeKeys.EVAL, logits=values_1d, train_op_fn=None, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': values_1d}, mode=ModeKeys.EVAL, logits=logits_placeholder, labels=labels_placeholder, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.loss.eval({ labels_placeholder: values_3d, logits_placeholder: values_1d }) regularized_training_loss = head.loss( logits=logits_placeholder, labels=labels_placeholder, features={'x': values_1d}, mode=ModeKeys.EVAL) with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'): regularized_training_loss.eval({ labels_placeholder: values_1d, logits_placeholder: values_3d }) def test_incompatible_labels_train(self): head = head_lib.RegressionHead(label_dimension=3) self.assertEqual(3, head.logits_dimension) values_3d = np.array(((45., 46., 47.), (41., 42., 43.),)) # shape [2, 3] values_1d = np.array(((43.,), (44.,),)) # shape [2, 1] # Static shape. if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.loss( logits=values_3d, labels=values_1d, features={'x': values_1d}, mode=ModeKeys.TRAIN) return # Dynamic shape only works in Graph mode. with self.assertRaisesRegexp(ValueError, 'logits shape'): head.create_estimator_spec( features={'x': values_3d}, mode=ModeKeys.TRAIN, logits=values_1d, labels=values_3d, train_op_fn=lambda x: x, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) labels_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) logits_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) spec = head.create_estimator_spec( features={'x': values_1d}, mode=ModeKeys.TRAIN, logits=logits_placeholder, labels=labels_placeholder, train_op_fn=lambda x: x, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): with self.assertRaisesRegexp(tf.errors.OpError, 'logits shape'): spec.loss.eval({ labels_placeholder: values_3d, logits_placeholder: values_1d }) regularized_training_loss = head.loss( logits=logits_placeholder, labels=labels_placeholder, features={'x': values_1d}, mode=ModeKeys.TRAIN) with self.cached_session(): with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'): regularized_training_loss.eval({ labels_placeholder: values_1d, logits_placeholder: values_3d }) def test_predict(self): head = head_lib.RegressionHead() self.assertEqual(1, head.logits_dimension) logits = np.array(((45,), (41,),), dtype=np.int32) preds = head.predictions(logits) prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), preds.keys()) predictions = preds[prediction_key] self.assertEqual(tf.dtypes.float32, predictions.dtype) self.assertAllClose(logits, self.evaluate(predictions)) if tf.executing_eagerly(): return # Create estimator spec. spec = head.create_estimator_spec( features={'x': np.array(((42.,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=np.array(((45,), (41,),), dtype=np.int32), trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertIsNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) self.assertIsNone(spec.train_op) default_serving_key = (tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY) self.assertItemsEqual((default_serving_key, 'predict', 'regression'), spec.export_outputs.keys()) test_lib._assert_no_hooks(self, spec) # Assert predictions. with self.cached_session(): test_lib._initialize_variables(self, spec.scaffold) self.assertAllClose(logits, spec.export_outputs[default_serving_key].value.eval()) self.assertAllClose(logits, spec.export_outputs['regression'].value.eval()) self.assertAllClose( logits, spec.export_outputs['predict'].outputs['predictions'].eval()) def test_predict_with_inverse_link_fn(self): def _inverse_link_fn(logits): return logits - 10. head = head_lib.RegressionHead(inverse_link_fn=_inverse_link_fn) logits = np.array(((45,), (41,),), dtype=np.int32) preds = head.predictions(logits) keys = prediction_keys.PredictionKeys self.assertItemsEqual((keys.PREDICTIONS, keys.LOGITS), preds.keys()) self.assertEqual(tf.dtypes.float32, preds[keys.PREDICTIONS].dtype) self.assertEqual(tf.dtypes.float32, preds[keys.LOGITS].dtype) expected_predictions = np.array(((35,), (31,),), dtype=np.int32) self.assertAllClose(expected_predictions, self.evaluate(preds[keys.PREDICTIONS])) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) if tf.executing_eagerly(): return # Create estimator spec. spec = head.create_estimator_spec( features={'x': np.array(((42.,),), dtype=np.int32)}, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. default_serving_key = (tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY) self.assertItemsEqual((default_serving_key, 'predict', 'regression'), spec.export_outputs.keys()) # Assert predictions. with self.cached_session(): test_lib._initialize_variables(self, spec.scaffold) self.assertAllClose(expected_predictions, spec.export_outputs[default_serving_key].value.eval()) self.assertAllClose(expected_predictions, spec.export_outputs['regression'].value.eval()) self.assertAllClose( expected_predictions, spec.export_outputs['predict'].outputs['predictions'].eval()) self.assertAllClose( logits, spec.export_outputs['predict'].outputs['logits'].eval()) def test_eval_create_loss(self): head = head_lib.RegressionHead() logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} regularized_training_loss = head.loss( logits=logits, labels=labels, features=features) self.assertAllClose(6.5, self.evaluate(regularized_training_loss)) def test_eval_create_loss_loss_fn(self): """Tests head.loss for eval mode and custom loss_fn.""" loss = np.array([[0., 1.], [2., 3.]], dtype=np.float32) batch_size = 4. logits_input = np.array([[-1., 1.], [-2., 2.]], dtype=np.float32) labels_input = np.array([[1., 0.], [2., -1.]], dtype=np.float32) def _loss_fn(labels, logits): check_labels = tf.debugging.Assert( tf.reduce_all(tf.math.equal(labels, labels_input)), data=[labels]) check_logits = tf.debugging.Assert( tf.reduce_all(tf.math.equal(logits, logits_input)), data=[logits]) with tf.control_dependencies([check_labels, check_logits]): return tf.constant(loss) head = head_lib.RegressionHead(label_dimension=2, loss_fn=_loss_fn) regularized_training_loss = head.loss( logits=logits_input, labels=labels_input, features={'x': np.array(((42,),), dtype=np.int32)}) self.assertAllClose( np.sum(loss) / batch_size, self.evaluate(regularized_training_loss)) def test_eval_create_loss_loss_fn_wrong_shape(self): """Tests custom loss_fn that returns Tensor of unexpected shape.""" loss = np.array([[1.], [2.]], dtype=np.float32) def _loss_fn(labels, logits): del labels, logits # Unused return tf.constant(loss) head = head_lib.RegressionHead(label_dimension=2, loss_fn=_loss_fn) features = {'x': np.array(((42,),), dtype=np.int32)} logits = np.array([[-1., 1.], [-2., 2.]], dtype=np.float32) labels = np.array([[1., 0.], [2., -1.]], dtype=np.float32) if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'loss_shape'): head.loss(logits=logits, labels=labels, features=features) else: regularized_training_loss = head.loss( logits=logits, labels=labels, features=features) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 2\]\. \] ' r'\[logits_shape: \] \[2 2\] \[loss_shape: \] \[2 1\]'): self.evaluate(regularized_training_loss) def test_eval_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib.RegressionHead() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.EVAL, logits=np.array(((45,), (41,),), dtype=np.float32), labels=None, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) def test_eval(self): head = head_lib.RegressionHead() self.assertEqual(1, head.logits_dimension) logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} prediction_key = prediction_keys.PredictionKeys.PREDICTIONS preds = head.predictions(logits) self.assertItemsEqual((prediction_key,), preds.keys()) self.assertEqual(tf.dtypes.float32, preds[prediction_key].dtype) self.assertAllClose(logits, self.evaluate(preds[prediction_key])) # weighted_loss = (43-45)^2 + (44-41)^2 = 13. # loss = weighted_loss / batch_size = (4+9) / 2 = 6.5 expected_loss = 6.5 # loss_mean = loss/sum(weights) = 13/2 = 6.5 expected_loss_mean = 6.5 if tf.executing_eagerly(): eval_metrics = head.metrics() update_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN), update_metrics.keys()) self.assertAllClose( expected_loss_mean, update_metrics[metric_keys.MetricKeys.LOSS_MEAN].result()) loss = head.loss(labels, logits, features=features, mode=ModeKeys.EVAL) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss) else: # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) # Assert spec contains expected tensors. self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[ metric_keys.MetricKeys.LOSS_MEAN] loss, _ = sess.run((spec.loss, loss_mean_update_op)) self.assertAllClose(6.5, loss) # Check results of value ops (in `loss_mean`). self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval()) def test_eval_metric_ops_with_head_name_for_regression(self): head = head_lib.RegressionHead(name='some_regression_head') logits = np.array(((1,), (9,)), dtype=np.float32) labels = np.array(((1,), (1,)), dtype=np.int64) features = {'x': np.array(((42,),), dtype=np.int32)} expected_metric_keys = [ '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS_MEAN), '{}/some_regression_head'.format( metric_keys.MetricKeys.PREDICTION_MEAN), '{}/some_regression_head'.format(metric_keys.MetricKeys.LABEL_MEAN), ] eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual(expected_metric_keys, updated_metrics.keys()) def test_eval_with_regularization_losses(self): head = head_lib.RegressionHead() self.assertEqual(1, head.logits_dimension) logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = ((43-45)^2 + (44-41)^2) / batch_size # = (4 + 9) / 2 = 6.5 expected_unregularized_loss = 6.5 expected_regularized_loss = ( expected_unregularized_loss + expected_regularization_loss) keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_unregularized_loss, keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.PREDICTION_MEAN: (45 + 41) / 2.0, keys.LABEL_MEAN: (43 + 44) / 2.0, } # Test eval metrics in eager mode if tf.executing_eagerly(): eval_metrics = head.metrics(regularization_losses=regularization_losses) updated_metrics = head.update_metrics( eval_metrics, features, logits, labels, regularization_losses=regularization_losses) # Assert metrics. self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}) else: # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = { k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops } update_ops = { k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops } prediction_key = prediction_keys.PredictionKeys.PREDICTIONS predictions, loss, _ = sess.run( (spec.predictions[prediction_key], spec.loss, update_ops)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_regularized_loss, loss) # Check results of value ops (in `metrics`). self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_train_create_loss(self): head = head_lib.RegressionHead() logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} # training_loss = (1 * 4 + 1 * 9) / 2 = 6.5 expected_training_loss = 6.5 # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_training_loss, self.evaluate(training_loss)) def test_train_create_loss_loss_reduction(self): """Tests create_loss with loss_reduction.""" head = head_lib.RegressionHead(loss_reduction=tf.losses.Reduction.SUM) logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43,), (44,),), dtype=np.int32) features = {'x': np.array(((42,),), dtype=np.float32)} # training_loss = (1 * 4 + 1 * 9) expected_training_loss = 13. # Create loss. training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_training_loss, self.evaluate(training_loss)) def test_train_labels_none(self): """Tests that error is raised when labels is None.""" head = head_lib.RegressionHead() def _no_op_train_fn(loss): del loss return tf.no_op() with self.assertRaisesRegexp( ValueError, r'You must provide a labels Tensor\. Given: None\.'): head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(( (45,), (41,), ), dtype=np.float32), labels=None, train_op_fn=_no_op_train_fn, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) def test_train(self): head = head_lib.RegressionHead() self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43.,), (44.,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42.,),), dtype=np.float32)} # loss = ((43-45)^2 + (44-41)^2) / 2 = (4 + 9) / 2 = 6.5 expected_loss = 6.5 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) preds = head.predictions(logits) loss = head.loss(labels, logits, features=features) prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), preds.keys()) self.assertEqual(tf.dtypes.float32, preds[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, loss.dtype) self.assertAllClose(logits, self.evaluate(preds[prediction_key])) self.assertAllClose(expected_loss, self.evaluate(loss)) if tf.executing_eagerly(): return spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str) def test_train_with_regularization_losses(self): head = head_lib.RegressionHead() self.assertEqual(1, head.logits_dimension) logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43.,), (44.,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42.,),), dtype=np.float32)} regularization_losses = [1.5, 0.5] expected_regularization_loss = 2. # unregularized_loss = ((43-45)^2 + (44-41)^2) / batch_size # = (4 + 9) / 2 = 6.5 # loss = unregularized_loss + regularization_loss = 8.5 expected_loss = 8.5 prediction_key = prediction_keys.PredictionKeys.PREDICTIONS loss = head.loss( labels, logits, features=features, mode=ModeKeys.TRAIN, regularization_losses=regularization_losses) preds = head.predictions(logits) self.assertAllClose(logits, self.evaluate(preds[prediction_key])) self.assertAllClose(expected_loss, self.evaluate(loss)) if tf.executing_eagerly(): return def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, regularization_losses=regularization_losses, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries( self, { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_REGULARIZATION: (expected_regularization_loss), }, summary_str) def test_weighted_multi_example_eval(self): """1d label, 3 examples, 1 batch.""" head = head_lib.RegressionHead(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) logits = np.array(((45,), (41,), (44,)), dtype=np.int32) features = { 'x': np.array(((42,), (43,), (44,)), dtype=np.int32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32), } labels = np.array(((35,), (42,), (45,)), dtype=np.int32) prediction_key = prediction_keys.PredictionKeys.PREDICTIONS preds = head.predictions(logits) self.assertItemsEqual((prediction_key,), preds.keys()) predictions = preds[prediction_key] self.assertEqual(tf.dtypes.float32, predictions.dtype) self.assertAllClose(logits, self.evaluate(predictions)) # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6 # expected_loss = loss / batch_size = 33.8666667 expected_loss = 33.8666667 # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231 expected_loss_mean = 39.0769231 if tf.executing_eagerly(): eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN), updated_metrics.keys()) self.assertAllClose( expected_loss_mean, updated_metrics[metric_keys.MetricKeys.LOSS_MEAN].result()) loss = head.loss(labels, logits, features=features, mode=ModeKeys.EVAL) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss) else: # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) # Assert spec contains expected tensors. self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[ metric_keys.MetricKeys.LOSS_MEAN] loss, _ = sess.run((spec.loss, loss_mean_update_op)) self.assertAllClose(expected_loss, loss) # Check results of value ops (in `loss_mean`). self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval()) def test_weight_with_numeric_column(self): """1d label, 3 examples, 1 batch.""" head = head_lib.RegressionHead( weight_column=tf.feature_column.numeric_column( 'label_weights', normalizer_fn=lambda x: x + 1.)) logits = np.array(((45,), (41,), (44,)), dtype=np.int32) features = { 'x': np.array(((42,), (43,), (44,)), dtype=np.int32), 'label_weights': np.array(((0.,), (-0.9,), (0.5,)), dtype=np.float32), } labels = np.array(((35,), (42,), (45,)), dtype=np.int32) loss = head.loss(labels, logits, features=features) # weighted_loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 # = 100+.1+1.5 = 101.6 # loss = weighted_loss / batch_size = 101.6 / 3 = 33.8666667 self.assertAllClose(33.8666667, self.evaluate(loss)) def test_weighted_multi_example_train(self): """1d label, 3 examples, 1 batch.""" head = head_lib.RegressionHead(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) features = { 'x': np.array(((42,), (43,), (44,)), dtype=np.float32), 'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float64), } labels = np.array(((35.,), (42.,), (45.,)), dtype=np.float32) logits = np.array(((45,), (41,), (44,)), dtype=np.float32) expected_train_result = b'my_train_op' # weighted_loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 # = 100+.1+1.5 = 101.6 # expected_loss = weighted_loss / batch_size = 101.6 / 3 = 33.8666667 expected_loss = 33.8666667 preds = head.predictions(logits) loss = head.loss(labels, logits, features=features, mode=ModeKeys.TRAIN) prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), preds.keys()) self.assertEqual(tf.dtypes.float32, preds[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, loss.dtype) self.assertAllClose(logits, self.evaluate(preds[prediction_key])) self.assertAllClose(expected_loss, self.evaluate(loss)) if tf.executing_eagerly(): return def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str) def test_train_one_dim_create_loss(self): """Tests create_loss with 1D labels and weights (shape [batch_size]).""" head = head_lib.RegressionHead(weight_column='label_weights') logits = np.array(((45,), (41,), (44,)), dtype=np.float32) x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32) weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64) labels_rank_1 = np.array((35., 42., 45.,)) # training_loss = (100 * 1 + 1 * .1 + 1.5 * 1) / batch_size # = 101.6 / 3 = 33.8666667 expected_training_loss = 33.8666667 features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1} # Create loss. training_loss = head.loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1) self.assertAllClose(expected_training_loss, self.evaluate(training_loss)) def test_train_one_dim(self): """Tests train with 1D labels and weights (shape [batch_size]).""" head = head_lib.RegressionHead(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) logits = np.array(((45,), (41,), (44,)), dtype=np.float32) expected_train_result = b'my_train_op' # loss = (1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2) / batch_size # = (100+.1+1.5) / 3 = 101.6 / 3 = 33.8666667 expected_loss = 33.8666667 x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32) weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64) labels_rank_1 = np.array((35., 42., 45.,)) features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1} self.assertEqual((3,), x_feature_rank_1.shape) self.assertEqual((3,), weight_rank_1.shape) self.assertEqual((3,), labels_rank_1.shape) preds = head.predictions(logits) loss = head.loss( labels=labels_rank_1, logits=logits, features=features, mode=ModeKeys.TRAIN) prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), preds.keys()) self.assertEqual(tf.dtypes.float32, preds[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, loss.dtype) self.assertAllClose(logits, self.evaluate(preds[prediction_key])) self.assertAllClose(expected_loss, self.evaluate(loss)) if tf.executing_eagerly(): return def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels_rank_1, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str) def test_weighted_multi_value_eval_create_loss(self): """3d label, 1 example, 1 batch.""" head = head_lib.RegressionHead( weight_column='label_weights', label_dimension=3) logits = np.array(((45., 41., 44.),)) labels = np.array(((35., 42., 45.),)) features = { 'x': np.array(((42., 43., 44.),)), 'label_weights': np.array(((1., .1, 1.5),)) } regularized_training_loss = head.loss( logits=logits, labels=labels, features=features) # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1]. # weighted_sum_loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6 # expected_training_loss = weighted_sum_loss / batch_size # = 101.6 / 3 = 33.8666667 self.assertAllClose(33.8666667, self.evaluate(regularized_training_loss)) def test_weighted_multi_value_eval(self): """3d label, 1 example, 1 batch.""" head = head_lib.RegressionHead( weight_column='label_weights', label_dimension=3) self.assertEqual(3, head.logits_dimension) logits = np.array(((45., 41., 44.),)) labels = np.array(((35., 42., 45.),)) features = { 'x': np.array(((42., 43., 44.),)), 'label_weights': np.array(((1., .1, 1.5),)) } prediction_key = prediction_keys.PredictionKeys.PREDICTIONS preds = head.predictions(logits) self.assertItemsEqual((prediction_key,), preds.keys()) predictions = preds[prediction_key] self.assertEqual(tf.dtypes.float32, predictions.dtype) self.assertAllClose(logits, self.evaluate(predictions)) # weighted_loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 # = 100+.1+1.5 = 101.6 # expected_loss = weighted_loss / batch_size = 101.6 / 3 = 33.8666667 expected_loss = 33.8666667 # loss_mean = weighted_loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231 expected_loss_mean = 39.0769231 if tf.executing_eagerly(): eval_metrics = head.metrics() updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN), updated_metrics.keys()) self.assertAllClose( expected_loss_mean, updated_metrics[metric_keys.MetricKeys.LOSS_MEAN].result()) loss = head.loss(labels, logits, features=features, mode=ModeKeys.EVAL) self.assertIsNotNone(loss) self.assertAllClose(expected_loss, loss) else: # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) # Assert spec contains expected tensors. self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN, metric_keys.MetricKeys.PREDICTION_MEAN, metric_keys.MetricKeys.LABEL_MEAN), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Assert predictions, loss, and metrics. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[ metric_keys.MetricKeys.LOSS_MEAN] loss, _ = sess.run((spec.loss, loss_mean_update_op)) self.assertAllClose(expected_loss, loss) # Check results of value ops (in `loss_mean`). self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval()) def test_weighted_multi_value_train_create_loss(self): """3d label, 1 example, 1 batch.""" head = head_lib.RegressionHead( weight_column='label_weights', label_dimension=3) logits = np.array(((45., 41., 44.),)) labels = np.array(((35., 42., 45.),)) features = { 'x': np.array(((42., 43., 44.),)), 'label_weights': np.array(((1., .1, 1.5),)) } # Create loss. regularized_training_loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(33.8666667, self.evaluate(regularized_training_loss)) def test_weighted_multi_value_train(self): """3d label, 1 example, 1 batch.""" head = head_lib.RegressionHead( weight_column='label_weights', label_dimension=3) self.assertEqual(3, head.logits_dimension) logits = np.array(((45., 41., 44.),)) labels = np.array(((35., 42., 45.),)) expected_train_result = b'my_train_op' # loss = (1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2) / batch_size # = (100+.1+1.5) / 3 = 101.6 / 3 = 33.8666667 expected_loss = 33.8666667 def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) features = { 'x': np.array(((42., 43., 44.),)), 'label_weights': np.array(((1., .1, 1.5),)), } preds = head.predictions(logits) loss = head.loss(labels, logits, features=features, mode=ModeKeys.TRAIN) prediction_key = prediction_keys.PredictionKeys.PREDICTIONS self.assertItemsEqual((prediction_key,), preds.keys()) self.assertEqual(tf.dtypes.float32, preds[prediction_key].dtype) self.assertEqual(tf.dtypes.float32, loss.dtype) self.assertAllClose(logits, self.evaluate(preds[prediction_key])) self.assertAllClose(expected_loss, self.evaluate(loss)) if tf.executing_eagerly(): return # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertEqual({}, spec.eval_metric_ops) self.assertIsNotNone(spec.train_op) self.assertIsNone(spec.export_outputs) test_lib._assert_no_hooks(self, spec) # Evaluate predictions, loss, train_op, and summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) predictions, loss, train_result, summary_str = sess.run( (spec.predictions[prediction_key], spec.loss, spec.train_op, spec.scaffold.summary_op)) self.assertAllClose(logits, predictions) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) test_lib._assert_simple_summaries(self, { metric_keys.MetricKeys.LOSS: expected_loss, }, summary_str) def test_weighted_multi_batch_eval_eager(self): """1d label, 1 example, 3 batches.""" with tf.compat.v2.__internal__.eager_context.eager_mode(): head = head_lib.RegressionHead(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) logits = np.array(((45.,), (41.,), (44.,))) features = { 'x': np.array(((42.,), (43.,), (44.,))), 'label_weights': np.array(((1.,), (.1,), (1.5,))), # 'logits' is not a feature, but we use `tf.data.Dataset` to make it # as a `tensor` (required by `update_metrics`), and access it # via `features['logits']` in `update_metrics` 'logits': logits } labels = np.array(((35.,), (42.,), (45.,))) # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5] # loss = sum(losses) = 100+.1+1.5 = 101.6 # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923 expected_metrics = { metric_keys.MetricKeys.LOSS_MEAN: 39.076923, metric_keys.MetricKeys.PREDICTION_MEAN: (45 + 41 * 0.1 + 44 * 1.5) / 2.6, metric_keys.MetricKeys.LABEL_MEAN: (35 + 42 * 0.1 + 45 * 1.5) / 2.6, } dataset = tf.compat.v1.data.Dataset.from_tensor_slices((features, labels)) dataset = dataset.batch(1) eval_metrics = head.metrics() for (features, labels) in dataset: logits = features['logits'] updated_metrics = head.update_metrics(eval_metrics, features, logits, labels) # Assert metrics. self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}) def test_weighted_multi_batch_train_eager(self): """1d label, 1 example, 3 batches.""" if tf.executing_eagerly(): head = head_lib.RegressionHead(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) logits = np.array(((45.,), (41.,), (44.,))) features = { 'x': np.array(((42.,), (43.,), (44.,))), 'label_weights': np.array(((1.,), (.1,), (1.5,))), # 'logits' is not a feature, but we use `tf.data.Dataset` to make it # as a `tensor` (required by `update_metrics`), and access it # via `features['logits']` in `update_metrics` 'logits': logits } labels = np.array(((35.,), (42.,), (45.,))) dataset = tf.compat.v1.data.Dataset.from_tensor_slices((features, labels)) dataset = dataset.batch(1) expected_losses = np.array((100, .1, 1.5)) for (batch, (features, labels)) in enumerate(dataset): logits = features['logits'] loss = head.loss(labels, logits, features=features) self.assertAllClose(expected_losses[batch], loss) def test_multi_dim_weighted_train_create_loss(self): """Logits, labels of shape [2, 2, 3], weight shape [2, 2].""" label_dimension = 3 head = head_lib.RegressionHead( weight_column='label_weights', label_dimension=label_dimension) logits = np.array([[[00., 01., 02.], [10., 11., 12.]], [[20., 21., 22.], [30., 31., 32.]]]) labels = np.array([[[01., 02., 03.], [12., 13., 14.]], [[23., 24., 25.], [34., 35., 36.]]]) weights = np.array([[1., 1.5], [2., 2.5]]) training_loss_weighted_sum = np.sum( np.array([[[1. * x for x in [1., 1., 1.]], [1.5 * x for x in [4., 4., 4.]]], [[2. * x for x in [9., 9., 9.]], [2.5 * x for x in [16., 16., 16.]]]])) # batch_size = 2 * 2 * 3 = 12. # expected_training_loss = training_loss_weighted_sum / batch_size # Create loss. training_loss = head.loss( features={'label_weights': weights}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, regularization_losses=None) self.assertAllClose(training_loss_weighted_sum / 12., self.evaluate(training_loss)) def test_multi_dim_weighted_train(self): """Logits, labels of shape [2, 2, 3], weight shape [2, 2].""" head = head_lib.RegressionHead( weight_column='label_weights', label_dimension=3) logits = np.array([[[00., 01., 02.], [10., 11., 12.]], [[20., 21., 22.], [30., 31., 32.]]]) labels = np.array([[[01., 02., 03.], [12., 13., 14.]], [[23., 24., 25.], [34., 35., 36.]]]) expected_train_result = b'my_train_op' features = { 'label_weights': np.array([[1., 1.5], [2., 2.5]]), } # weighted_loss_sum = (1*3*1^2 + 1.5*3*2^2 + 2*3*3^2 +2.5*3*4^2) = 195 # loss = weighted_loss_sum / batch_size = 195 / (2*2*3) = 16.25 expected_loss = 16.25 loss = head.loss(labels, logits, features=features, mode=ModeKeys.TRAIN) self.assertAllClose(expected_loss, self.evaluate(loss)) if tf.executing_eagerly(): return # Create estimator spec. def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return tf.constant(expected_train_result) spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) self.assertAllClose(expected_loss, spec.loss.eval()) def test_multi_dim_train_weights_wrong_inner_dim(self): """Logits, labels of shape [2, 2, 3], weight shape [2, 1].""" head = head_lib.RegressionHead( weight_column='label_weights', label_dimension=3) logits = np.array([[[00., 01., 02.], [10., 11., 12.]], [[20., 21., 22.], [30., 31., 32.]]]) labels = np.array([[[01., 02., 03.], [12., 13., 14.]], [[23., 24., 25.], [34., 35., 36.]]]) features = { 'label_weights': np.array([[1.], [2]]), } def _no_op_train_fn(loss): del loss return tf.no_op() if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'weights shape'): head.loss( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, regularization_losses=None) return spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'): self.evaluate(spec.loss) def test_multi_dim_train_weights_wrong_outer_dim(self): """Logits, labels of shape [2, 2, 3], weight shape [2, 2, 2].""" head = head_lib.RegressionHead( weight_column='label_weights', label_dimension=3) logits = np.array([[[00., 01., 02.], [10., 11., 12.]], [[20., 21., 22.], [30., 31., 32.]]]) labels = np.array([[[01., 02., 03.], [12., 13., 14.]], [[23., 24., 25.], [34., 35., 36.]]]) def _no_op_train_fn(loss): del loss return tf.no_op() if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, 'weights shape'): head.loss( features={ 'label_weights': np.array([[[1., 1.1], [1.5, 1.6]], [[2., 2.1], [2.5, 2.6]]]) }, mode=ModeKeys.TRAIN, logits=logits, labels=labels, regularization_losses=None) return weights_placeholder = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) features = { 'label_weights': weights_placeholder, } spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_no_op_train_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session(): test_lib._initialize_variables(self, tf.compat.v1.train.Scaffold()) with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 2\]'): spec.loss.eval({ weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]], [[2., 2.1], [2.5, 2.6]]]) }) @test_util.deprecated_graph_mode_only class RegressionHeadForEstimator(tf.test.TestCase): """Tests for create_estimator_spec running in Graph mode only.""" def test_invalid_trainable_variables(self): head = head_lib.RegressionHead() class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params return [ tf.strings.join([ tf.constant('my_train_op'), tf.strings.as_string(loss, precision=2) ]) ] def get_config(self): config = super(_Optimizer, self).get_config() return config with self.assertRaisesRegexp(ValueError, r'trainable_variables cannot be None'): head.create_estimator_spec( features={'x': np.array(((42.,),), dtype=np.float32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (41,),), dtype=np.float32), labels=np.array(((43.,), (44.,),), dtype=np.float64), optimizer=_Optimizer('my_optimizer'), trainable_variables=None) with self.assertRaisesRegexp( ValueError, r'trainable_variables should be a list or a tuple'): head.create_estimator_spec( features={'x': np.array(((42.,),), dtype=np.float32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (41,),), dtype=np.float32), labels=np.array(((43.,), (44.,),), dtype=np.float64), optimizer=_Optimizer('my_optimizer'), trainable_variables={ 'var_list': [tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)] }) def test_train_with_optimizer(self): head = head_lib.RegressionHead() self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43.,), (44.,),), dtype=np.float64) expected_train_result = b'my_train_op' features = {'x': np.array(((42.,),), dtype=np.float32)} # loss = ((43-45)^2 + (44-41)^2) / 2 = (4 + 9) / 2 = 13 / 2 = 6.5 expected_loss = 6.5 class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params with tf.control_dependencies((tf.compat.v1.debugging.assert_equal( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), name='assert_loss'),)): return [tf.constant(expected_train_result)] def get_config(self): config = super(_Optimizer, self).get_config() return config spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, optimizer=_Optimizer('my_optimizer'), trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run((spec.loss, spec.train_op)) self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) def test_train_with_update_ops(self): with tf.Graph().as_default(): w = tf.Variable(1) update_op = w.assign_add(1) t = tf.Variable('') expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return t.assign(expected_train_result) head = head_lib.RegressionHead() spec = head.create_estimator_spec( features={'x': np.array(((42,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=np.array(((45,), (41,),), dtype=np.float32), labels=np.array(((43.,), (44.,),), dtype=np.float64), update_ops=[update_op], train_op_fn=_train_op_fn, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) sess.run(spec.train_op) w_value, t_value = sess.run([w, t]) self.assertEqual(2, w_value) self.assertEqual(expected_train_result, t_value) def test_train_summaries_with_head_name(self): head = head_lib.RegressionHead(name='some_regression_head') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45,), (41,),), dtype=np.float32) labels = np.array(((43.,), (44.,),), dtype=np.float64) features = {'x': np.array(((42.,),), dtype=np.float32)} # loss = ((43-45)^2 + (44-41)^2) / 2 = (4 + 9) / 2 = 6.5 expected_loss = 6.5 def _train_op_fn(loss): del loss return tf.no_op() spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert summaries. with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) summary_str = sess.run(spec.scaffold.summary_op) test_lib._assert_simple_summaries( self, { '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS): expected_loss, }, summary_str) def test_weighted_multi_batch_train(self): """1d label, 1 example, 3 batches.""" # numpy_input_fn is not compitable with eager. head = head_lib.RegressionHead(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45.,), (41.,), (44.,))) input_fn = numpy_io.numpy_input_fn( x={ 'x': np.array(((42.,), (43.,), (44.,))), 'label_weights': np.array(((1.,), (.1,), (1.5,))), # 'logits' is not a feature, but we use `numpy_input_fn` to make a # batched version of it, and pop it off before passing to # `create_estimator_spec`. 'logits': logits, }, y=np.array(((35.,), (42.,), (45.,))), batch_size=1, num_epochs=1, shuffle=False) batched_features, batched_labels = input_fn() batched_logits = batched_features.pop('logits') spec = head.create_estimator_spec( features=batched_features, mode=ModeKeys.TRAIN, logits=batched_logits, labels=batched_labels, train_op_fn=lambda loss: loss * -7., trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # Assert spec contains expected tensors. self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertIsNotNone(spec.train_op) with self.cached_session() as sess: # Finalize graph and initialize variables. test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) tf.compat.v1.train.queue_runner.start_queue_runners() results = tuple( [sess.run((spec.loss, spec.train_op)) for _ in range(len(logits))]) # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5] expected_losses = np.array((100, .1, 1.5)) self.assertAllClose(expected_losses, [r[0] for r in results]) self.assertAllClose(expected_losses * -7., [r[1] for r in results]) def test_weighted_multi_batch_eval(self): """1d label, 1 example, 3 batches.""" # numpy_input_fn is not compitable with eager. head = head_lib.RegressionHead(weight_column='label_weights') self.assertEqual(1, head.logits_dimension) # Create estimator spec. logits = np.array(((45.,), (41.,), (44.,))) input_fn = numpy_io.numpy_input_fn( x={ 'x': np.array(((42.,), (43.,), (44.,))), 'label_weights': np.array(((1.,), (.1,), (1.5,))), # 'logits' is not a feature, but we use `numpy_input_fn` to make a # batched version of it, and pop it off before passing to # `create_estimator_spec`. 'logits': logits, }, y=np.array(((35.,), (42.,), (45.,))), batch_size=1, num_epochs=1, shuffle=False) batched_features, batched_labels = input_fn() batched_logits = batched_features.pop('logits') spec = head.create_estimator_spec( features=batched_features, mode=ModeKeys.EVAL, logits=batched_logits, labels=batched_labels, train_op_fn=None, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5] # loss = sum(losses) = 100+.1+1.5 = 101.6 # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923 expected_metrics = { metric_keys.MetricKeys.LOSS_MEAN: 39.076923, metric_keys.MetricKeys.PREDICTION_MEAN: (45 + 41 * 0.1 + 44 * 1.5) / 2.6, metric_keys.MetricKeys.LABEL_MEAN: (35 + 42 * 0.1 + 45 * 1.5) / 2.6, } # Assert spec contains expected tensors. self.assertEqual(tf.dtypes.float32, spec.loss.dtype) self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys()) self.assertIsNone(spec.train_op) test_lib._assert_no_hooks(self, spec) with self.cached_session() as sess: # Finalize graph and initialize variables. test_lib._initialize_variables(self, spec.scaffold) self.assertIsNotNone(spec.scaffold.summary_op) tf.compat.v1.train.queue_runner.start_queue_runners() # Run tensors for `steps` steps. steps = len(logits) results = tuple([ sess.run(( spec.loss, # The `[1]` gives us the metric update op. {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops})) for _ in range(steps) ]) # Assert losses and metrics. self.assertAllClose((100, .1, 1.5), [r[0] for r in results]) # For metrics, check results of value ops (in `results`). self.assertAllClose( expected_metrics, {k: spec.eval_metric_ops[k][0].eval() for k in spec.eval_metric_ops}) class PoissonRegressionHead(tf.test.TestCase): def test_train(self): head = head_lib.PoissonRegressionHead() logits = np.array([[0], [-1], [1]], dtype=np.float32) labels = np.array([[1], [2], [3]], dtype=np.int32) features = {'x': np.array(((42.,),), dtype=np.int32)} # With x = exp(logits), z = labels. # loss = -ln(exp(-x) * (x^z) / z!) # = x - z * ln(x) + ln(z!) # = exp(logits) - labels * logits - ln(labels!) # But for ln(z!) and z > 1, the Stirling approximation is used # ln(z!) = z*ln(z) - z + 0.5*ln(2*pi*z) # loss = [exp(0) - 1 * 0 + ln(1!), # exp(-1) - 2 * (-1) + 2*ln(2) - 2 + 0.5*ln(2*pi*2), # exp(1) - 3 * 1 + 3*ln(3) - 3 + 0.5*ln(2*pi*3)] # = [1.0, 3.020, 1.482] # training_loss = (1.0 + 3.020 + 1.482) / 3 expected_loss = 1.834 atol = 0.001 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAlmostEqual(expected_loss, loss, delta=atol) return expected_train_result = b'my_train_op' def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_near( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), atol=atol, name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run([spec.loss, spec.train_op]) self.assertAlmostEqual(expected_loss, loss, delta=atol) self.assertEqual(expected_train_result, train_result) def test_predict(self): head = head_lib.PoissonRegressionHead() logits = np.array([[0], [-1], [1]], dtype=np.float32) expected_predictions = np.exp(logits) keys = prediction_keys.PredictionKeys preds = head.predictions(logits) self.assertItemsEqual((keys.PREDICTIONS, keys.LOGITS), preds.keys()) self.assertEqual(tf.dtypes.float32, preds[keys.PREDICTIONS].dtype) self.assertEqual(tf.dtypes.float32, preds[keys.LOGITS].dtype) self.assertAllClose(expected_predictions, self.evaluate(preds[keys.PREDICTIONS])) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) class LogisticRegressionHead(tf.test.TestCase): def test_train(self): head = head_lib.LogisticRegressionHead() logits = np.array([[0], [-1], [1]], dtype=np.float32) labels = np.array([[.4], [.6], [.8]], dtype=np.float32) features = {'x': np.array(((42.,),), dtype=np.int32)} # Following the documentation in # tf.nn.sigmoid_cross_entropy_with_logits: # With x = logits, z = labels. # loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) # loss = [0 - 0 * 0.4 + ln(1 + exp(-0)), # 0 + 1 * 0.6 + ln(1 + exp(-1)), # 1 - 1 * 0.8 + ln(1 + exp(-1))] # = [0.6931, 0.9133, 0.5133] # training_loss = (0.6931 + 0.9133 + 0.5133) / 3 expected_loss = 0.7066 atol = 0.001 if tf.executing_eagerly(): loss = head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) self.assertAlmostEqual(expected_loss, loss, delta=atol) return expected_train_result = b'my_train_op' def _train_op_fn(loss): with tf.control_dependencies((tf.compat.v1.debugging.assert_near( tf.cast(expected_loss, dtype=tf.dtypes.float32), tf.cast(loss, dtype=tf.dtypes.float32), atol=atol, name='assert_loss'),)): return tf.constant(expected_train_result) # Create estimator spec. spec = head.create_estimator_spec( features={'x': np.array(((42.,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) loss, train_result = sess.run([spec.loss, spec.train_op]) self.assertAlmostEqual(expected_loss, loss, delta=atol) self.assertEqual(expected_train_result, train_result) def test_train_labels_too_large(self): head = head_lib.LogisticRegressionHead() logits = np.array([[0], [-1], [1]], dtype=np.float32) labels = np.array([[.4], [1.2], [.8]], dtype=np.float32) features = {'x': np.array(((42.,),), dtype=np.int32)} if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, r'Labels must be in range \[0, 1\]'): head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) return expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return tf.constant(expected_train_result) # Create estimator spec. with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, r'Labels must be in range \[0, 1\]'): spec = head.create_estimator_spec( features=features, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) def test_train_labels_negative(self): head = head_lib.LogisticRegressionHead() logits = np.array([[0], [-1], [1]], dtype=np.float32) labels = np.array([[.4], [-0.2], [.8]], dtype=np.float32) features = {'x': np.array(((42.,),), dtype=np.int32)} if tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, r'Labels must be in range \[0, 1\]'): head.loss( logits=logits, labels=labels, features=features, mode=ModeKeys.TRAIN) return expected_train_result = b'my_train_op' def _train_op_fn(loss): del loss return tf.constant(expected_train_result) # Create estimator spec. with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, r'Labels must be in range \[0, 1\]'): spec = head.create_estimator_spec( features={'x': np.array(((42.,),), dtype=np.int32)}, mode=ModeKeys.TRAIN, logits=logits, labels=labels, train_op_fn=_train_op_fn, trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) def test_predict(self): head = head_lib.LogisticRegressionHead() logits = np.array([[0], [-1], [1]], dtype=np.float32) expected_predictions = 1. / (1. + np.exp(-logits)) keys = prediction_keys.PredictionKeys preds = head.predictions(logits) self.assertItemsEqual((keys.PREDICTIONS, keys.LOGITS), preds.keys()) self.assertEqual(tf.dtypes.float32, preds[keys.PREDICTIONS].dtype) self.assertEqual(tf.dtypes.float32, preds[keys.LOGITS].dtype) self.assertAllClose(expected_predictions, self.evaluate(preds[keys.PREDICTIONS])) self.assertAllClose(logits, self.evaluate(preds[keys.LOGITS])) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/head/sequential_head.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Defines a head for sequential models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import six import tensorflow as tf if six.PY3: from collections.abc import Iterable else: from collections import Iterable from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator.head import base_head from tensorflow_estimator.python.estimator.head import multi_head from tensorflow_estimator.python.estimator.mode_keys import ModeKeys class _SequentialHead(base_head.Head): """Interface for the head of a sequential model. A sequential head handles input sequences of different lengths to compute the output of a model. It requires a sequence mask tensor, to indicate which steps of the sequences are padded and ensure proper aggregation for loss and metrics computation. It has a `input_sequence_mask_key` property that specifies which tensor of the feature dictionary to use as the sequence mask tensor. Such a head can for instance be used with `RNNEstimator` for sequential predictions. Example of usage: ```python def _my_model_fn(features, labels, mode, params, config=None): feature_layer = tf.feature_column.SequenceFeatureLayer(columns) input_layer, sequence_length = feature_layer(features) sequence_length_mask = tf.sequence_mask(sequence_length) rnn_layer = tf_keras.layers.RNN(cell=tf_keras.layers.SimpleRNNCell(units), return_sequences=True) logits = rnn_layer(input_layer, mask=sequence_length_mask) features[sequential_head.input_sequence_mask_key] = sequence_length_mask return sequential_head.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ``` """ __metaclass__ = abc.ABCMeta @abc.abstractproperty def input_sequence_mask_key(self): """Key of the sequence mask tensor in the feature dictionary. Returns: A string. """ raise NotImplementedError('Calling an abstract method.') class SequentialHeadWrapper(_SequentialHead): """Sequential head wrapping a Head object. Wraps a `Head` object and applies a sequential mask to: - Loss aggregation: To only account for masked steps. Used for `create_estimator_spec` and `loss` methods. - Metrics: The sequence mask is used to only account for mask steps in metrics computation with the `update_metrics` method. - Predictions: To add a sequence length mask tensor to the predictions dictionary. """ def __init__(self, static_head, sequence_length_mask='sequence_length_mask', feature_columns=None): """Initializes a `SequentialHeadWrapper` instance. Example of usage: ```python # Define a sequential head. static_head = tf.estimator.BinaryClassHead(weight_column='weights') sequential_head = head_lib.SequentialHeadWrapper( static_head=static_head, sequence_length_mask='mask', feature_columns='weights') # Define feature columns and parsing spec. feature_columns = [ tf.feature_column.sequence_numeric_column('sequential-feature') ] label_column = tf.feature_column.sequence_numeric_column( 'label', dtype=tf.int32), weight_column = tf.feature_column.sequence_numeric_column('weights') parsing_spec = tf.feature_column.make_parse_example_spec( feature_columns + [label_column, weight_column]) # Use the head in a model function. def _my_model_fn(features, labels, mode, params, config=None): feature_layer = tf.feature_column.SequenceFeatureLayer(feature_columns) input_layer, sequence_length = feature_layer(features) sequence_length_mask = tf.sequence_mask(sequence_length) rnn_layer = tf_keras.layers.RNN( cell=tf_keras.layers.SimpleRNNCell(units), return_sequences=True) logits = rnn_layer(input_layer, mask=sequence_length_mask) features['mask'] = sequence_length_mask return sequential_head.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, optimizer=optimizer) ``` Args: static_head: `Head` object, static head to wrap. sequence_length_mask: `str`, name of sequence length mask tensor in features dictionary. Tensor must be a dense tensor of shape [batch_size, seq_length]. feature_columns: `str` or list of the former. Specifies the features of the features dictionary to which the sequence length mask must be applied, and which are passed to the static head's methods when calling `create_estimator_spec`, `loss` or `update_metrics`. This is typically a weight tensor. Raises: TypeError: If `sequence_length_mask` is not of string type. TypeError: If provided features columns are not of string type. """ # Verify and set sequence mask column. # TODO(aarg): Add support for `NumericColumn`. if not isinstance(sequence_length_mask, six.string_types): raise TypeError('`sequence_mask` column must be a string. ' 'Given type: {}.'.format(type(sequence_length_mask))) self._sequence_length_mask = sequence_length_mask # Verify and set feature columns (to be flattened). feature_columns = feature_columns or [] if not isinstance(feature_columns, Iterable): raise TypeError('`feature_columns` must be either a string or an ' 'iterable of strings got {} instead.'.format( type(feature_columns))) if isinstance(feature_columns, six.string_types): self._feature_columns = [feature_columns] else: self._feature_columns = feature_columns for column in self._feature_columns: # TODO(aarg): Add support for `NumericColumn` and `SequenceNumericColumn`. if not isinstance(column, six.string_types): raise TypeError('Column must a string. Given type: {}.'.format( type(column))) # Set other variables. if isinstance(static_head, multi_head.MultiHead): # TODO(aarg): Add support for MultiHead. raise ValueError( '`MultiHead` is not supported with `SequentialHeadWrapper`.') self._static_head = static_head super(SequentialHeadWrapper, self).__init__() def _flatten(self, labels, logits, features): """Flattens labels, logits, and features tensors. Provided tensors need to have at least two dimensions. The two first dimensions of the provided tensors are flattened to one single dimension. If a tensor is dense, the sequence mask in the features dictionary is used to flatten it. Note: If indices of a sparse tensor are not sorted, they will be reordered. Args: labels: `Tensor` or `SparseTensor` to flatten. logits: `Tensor` or `SparseTensor` to flatten. features: Dictionary of `Tensor` or `SparseTensor` objects to flatten. Returns: - Dense `Tensor` with flattened labels. - Dense `Tensor` with flattened logits. - Dictionary of flattened dense `Tensor` objects. Raises: ValueError: If the sequence mask is not found in `features`. ValueError: If one of the provided tensors to flatten has not at least two dimensions. """ # Retrieve sequence_mask from features dictionary. if self.input_sequence_mask_key not in features: raise ValueError('The provided sequence_length_mask key `{}` should be ' 'included in the features dictionary, but was not ' 'found. Found keys: {}.'.format( self.input_sequence_mask_key, list(features.keys()))) sequence_mask = features[self.input_sequence_mask_key] if sequence_mask.get_shape().ndims != 2: raise ValueError('Mask is expected to have two dimensions, got ' '{} instead.'.format(sequence_mask.get_shape().ndims)) with ops.name_scope('flatten'): expected_length = tf.math.reduce_sum( tf.cast(sequence_mask, tf.dtypes.int32)) # Flatten logits and labels. flat_logits = _flatten_tensor(logits, sequence_mask, expected_length) flat_labels = _flatten_tensor(labels, sequence_mask, expected_length) # Flatten features. flat_features = {} for column in self._feature_columns: if column not in features: raise ValueError('`{}` column expected in features ' 'dictionary.'.format(column)) flat_features[column] = _flatten_tensor(features[column], sequence_mask, expected_length) return flat_labels, flat_logits, flat_features def loss(self, logits, labels, features=None, mode=None, regularization_losses=None): """Flattens input and returns regularized training loss. Flattens `logits`, `labels`, and `features` tensors that are specified by the head's `feature_columns` before calling the static head's `loss` method. Args: logits: Logits `Tensor` of rank >= 2 and shape [batch_size, seq_length, D2, ... DN]. labels: Labels `Tensor` or `SparseTensor` or rank >= 2 and shape [batch_size, seq_length, D2, ... DN]. features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. Must contain the sequence length mask tensor. Features corresponding to the sequential's head `feature_columns` are flattened and passed to the static head's `loss` method. mode: Estimator's `ModeKeys`. To be used in case loss calculation is different in Train and Eval mode. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. Returns: A scalar `Tensor` representing regularized training loss used in train and eval. """ flat_labels, flat_logits, flat_features = self._flatten( labels, logits, features) return self._static_head.loss( logits=flat_logits, labels=flat_labels, features=flat_features, mode=mode, regularization_losses=regularization_losses) def create_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): """Returns `EstimatorSpec` that a model_fn can return. If in TRAIN or EVAL mode, `logits`, `labels`, and `features` tensors corresponding to the head's `feature_columns` are flattened before calling the static head's `create_estimator_spec` method. If in PREDICT mode, no flattening is done. The `EstimatatorSpec` is computed using the static head's `create_estimator_spec` method. The sequence length mask tensor is added to the predictions dictionary. Args: features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. If in TRAIN or EVAL mode, only specified features are flattened and passed to the static head's method. mode: Estimator's `ModeKeys`. logits: Logits `Tensor` of rank >= 2 and shape [batch_size, seq_length, D2, ... DN]. labels: Labels `Tensor` or `SparseTensor` or rank >= 2 and shape [batch_size, seq_length, D2, ... DN]. optimizer: An `tf_keras.optimizers.Optimizer` instance to optimize the loss in TRAIN mode. Namely, sets `train_op = optimizer.get_updates(loss, trainable_variables)`, which updates variables to minimize `loss`. trainable_variables: A list or tuple of `Variable` objects to update to minimize `loss`. In Tensorflow 1.x, by default these are the list of variables collected in the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. As Tensorflow 2.x doesn't have collections and GraphKeys, trainable_variables need to be passed explicitly here. train_op_fn: Function that takes a scalar loss `Tensor` and returns an op to optimize the model with the loss in TRAIN mode. Used if `optimizer` is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in TRAIN mode. By default, it is `None` in other modes. If you want to optimize loss yourself, you can pass `lambda _: tf.no_op()` and then use `EstimatorSpec.loss` to compute and apply gradients. update_ops: A list or tuple of update ops to be run at training time. For example, layers such as BatchNormalization create mean and variance update ops that need to be run at training time. In Tensorflow 1.x, these are thrown into an UPDATE_OPS collection. As Tensorflow 2.x doesn't have collections, update_ops need to be passed explicitly here. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. Returns: `EstimatorSpec`. """ if mode == ModeKeys.PREDICT: spec = self._static_head.create_estimator_spec( features=features, mode=mode, logits=logits) spec.predictions[self.input_sequence_mask_key] = features[ self.input_sequence_mask_key] return spec._replace(predictions=spec.predictions) flat_labels, flat_logits, flat_features = self._flatten( labels, logits, features) return self._static_head.create_estimator_spec( features=flat_features, mode=mode, logits=flat_logits, trainable_variables=trainable_variables, labels=flat_labels, optimizer=optimizer, train_op_fn=train_op_fn, regularization_losses=regularization_losses, update_ops=update_ops) def update_metrics(self, eval_metrics, features, logits, labels, regularization_losses=None): """Updates metric objects and returns a `dict` of the updated metrics. Flattens `logits`, `labels`, and `features` tensors that are specified by the head's feature_columns` before calling the static head's `update_metrics` method. Args: eval_metrics: A `dict` of metrics to be updated. features: Input `dict` mapping string feature names to `Tensor` or `SparseTensor` objects containing the values for that feature in a minibatch. Only specified features are flattened and passed to the static head's method. logits: Logits `Tensor` of rank >= 2 and shape [batch_size, seq_length, D2, ... DN]. labels: Labels `Tensor` or `SparseTensor` or rank >= 2 and shape [batch_size, seq_length, D2, ... DN]. regularization_losses: A list of additional scalar losses to be added to the training and evaluation loss, such as regularization losses. Returns: A `dict` of updated metrics keyed by name. The value is an instance of `Metric` class. """ flat_labels, flat_logits, flat_features = self._flatten( labels, logits, features) return self._static_head.update_metrics( eval_metrics=eval_metrics, features=flat_features, logits=flat_logits, labels=flat_labels, regularization_losses=regularization_losses) def _create_tpu_estimator_spec(self, features, mode, logits, labels=None, optimizer=None, trainable_variables=None, train_op_fn=None, update_ops=None, regularization_losses=None): raise NotImplementedError def predictions(self, logits, keys=None): """Calls the static head's `predictions` method.""" return self._static_head.predictions(logits, keys=keys) def metrics(self, regularization_losses=None): """Calls the static head's `metrics` method.""" return self._static_head.metrics(regularization_losses) @property def input_sequence_mask_key(self): """Returns the key for the sequence mask feature.""" return self._sequence_length_mask @property def logits_dimension(self): """Returns the logits dimension of the static head.""" return self._static_head.logits_dimension @property def loss_reduction(self): """Returns the loss reduction of the static head.""" return self._static_head.loss_reduction @property def name(self): """Returns the name of the static head.""" if self._static_head.name: return '{}_sequential'.format(self._static_head.name) return None @property def static_head(self): """Returns the wrapped static head.""" return self._static_head def _flatten_tensor(tensor, sequence_mask, expected_length): """Flattens the two first dimensions and reshapes a tensor or sparse tensor. If `tensor` is a dense tensor, the sequence_mask is used to infer valid inputs. Note: If `tensor` is a `SparseTensor` and the indices are not sorted, they will be reordered. Args: tensor: A `Tensor` or `SparseTensor` of dimension at least 2, of shape [batch_size, seq_length, D0, D1, ..., DN]. sequence_mask: A boolean `Tensor` of shape [batch_size, seq_length]. expected_length: A integer scalar `Tensor` with the expected length of the resulting flattenned Tensor. Returns: A `Tensor` object of shape [expected_length, D0, D1, ..., DN]. Raises: ValueError: If `tensor` has not at least 2 dimensions. ValueError: If `tensor` is not a `Tensor` or `SparseTensor` object. InvalidArgumentError: If the resulting `Tensor` doesn't have the expected length. """ shape = tensor.get_shape() if shape.ndims < 2: raise ValueError('Input tensor expected to have at least 2 dimensions, ' 'got {} instead.'.format(shape.ndims)) if isinstance(tensor, tf.sparse.SparseTensor): # What follows depends on the indices ordering. Hence we reorder the indices # to ensure correctness. flat_tensor = tf.sparse.reorder(tensor).values if shape.ndims > 2: new_shape = tf.concat([[-1], shape[2:]], axis=0) flat_tensor = tf.reshape(tensor.values, new_shape) elif isinstance(tensor, tf.Tensor): flat_tensor = tf.boolean_mask(tensor, sequence_mask) else: raise ValueError('`tensor` expected to be a `Tensor` or `SparseTensor` ' 'got `{}` instead.'.format(tensor)) if shape.ndims == 2: flat_tensor = tf.compat.v1.expand_dims(flat_tensor, -1) expected_shape = tf.concat([[expected_length], [1]], axis=0) else: expected_shape = tf.concat([[expected_length], shape[2:]], axis=0) # TODO(b/119617064): Unify eager and graph implementations. err_message = 'Tensor shape is incompatible with provided mask.' if tf.executing_eagerly(): if flat_tensor._shape_tuple() != tuple(expected_shape.numpy()): # pylint: disable=protected-access raise ValueError(err_message) return flat_tensor with tf.control_dependencies([ tf.compat.v1.debugging.assert_equal( tf.compat.v1.shape(flat_tensor), expected_shape, message=err_message) ]): return tf.identity(flat_tensor) ================================================ FILE: tensorflow_estimator/python/estimator/head/sequential_head_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for sequential_head.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.canned import metric_keys from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.head import binary_class_head as binary_head_lib from tensorflow_estimator.python.estimator.head import head_utils as test_lib from tensorflow_estimator.python.estimator.head import multi_class_head as multi_head_lib from tensorflow_estimator.python.estimator.head import multi_head from tensorflow_estimator.python.estimator.head import sequential_head as seq_head_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys def _convert_to_tensor(features): """Converts an arrays or dict of arrays to tensors or dict of tensors.""" if isinstance(features, dict): if set(features.keys()) == set(['indices', 'values', 'dense_shape']): return tf.sparse.SparseTensor(**features) for col in features: features[col] = _convert_to_tensor(features[col]) return features return ops.convert_to_tensor(features) @test_util.run_all_in_graph_and_eager_modes class TestFlatten(tf.test.TestCase, parameterized.TestCase): """Tests flatten functions.""" @parameterized.named_parameters( { 'testcase_name': 'one_dim_sparse_tensor', 'tensor': { 'indices': ((0, 0), (0, 1), (1, 0)), 'values': (1, 2, 3), 'dense_shape': (2, 2) }, 'expected': [[1], [2], [3]] }, { 'testcase_name': 'multi_dim_sparse_tensor', 'tensor': { 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 0, 1)), 'values': (1, 2, 3, 4, 5, 6), 'dense_shape': (2, 2, 2) }, 'expected': [[1, 2], [3, 4], [5, 6]] }, { 'testcase_name': 'one_dim_dense_tensor', 'tensor': [[1, 2], [3, 4]], 'expected': [[1], [2], [3]] }, { 'testcase_name': 'multi_dim_dense_tensor', 'tensor': [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 'expected': [[1, 2], [3, 4], [5, 6]] }, { 'testcase_name': 'unsorted_sparse_indices', 'tensor': { 'indices': ((0, 0), (1, 0), (0, 1)), 'values': (1, 3, 2), 'dense_shape': (2, 2) }, 'expected': [[1], [2], [3]] }) def test_flatten_tensor(self, tensor, expected): """Tests the output of the `_flatten_tensor` function. Args: tensor: Dense or sparse array. expected: Array with expected output of `_flatten_tensor`. """ sequence_mask = np.array([[1, 1], [1, 0]]) tensor = _convert_to_tensor(tensor) flat_tensor = seq_head_lib._flatten_tensor( tensor, sequence_mask, expected_length=sequence_mask.sum()) if tf.executing_eagerly(): self.assertAllEqual(flat_tensor, expected) return with self.cached_session() as sess: self.assertAllEqual(sess.run(flat_tensor), expected) def _test_flatten_method(self, features, feature_columns): """Runs seq head's `_flatten` method and returns output for testing.""" head = seq_head_lib.SequentialHeadWrapper( static_head=None, sequence_length_mask='sequence_mask', feature_columns=feature_columns) labels = { 'indices': ((0, 0), (0, 1), (1, 0)), 'values': (1, 2, 3), 'dense_shape': (2, 2) } logits = np.array([[[10], [11]], [[12], [13]]]) features = _convert_to_tensor(features) labels = tf.sparse.SparseTensor(**labels) logits = ops.convert_to_tensor(logits) output = head._flatten(labels, logits, features) if tf.executing_eagerly(): return output with self.cached_session() as sess: return sess.run(output) def test_flatten_method(self): """Tests output of `_flatten` method.""" features = {'sequence_mask': np.array([[1, 1], [1, 0]])} expected_output = ([[1], [2], [3]], [[10], [11], [12]], {}) output = self._test_flatten_method(features, feature_columns=[]) self.assertAllClose(expected_output, output) def test_flatten_with_one_feature_columns(self): """Tests output of `_flatten` method with one feature column provided.""" features = { 'sequence_mask': np.array([[1, 1], [1, 0]]), 'weights': np.array([[0.5, 0.5], [1., 0]]) } expected_output = ([[1], [2], [3]], [[10], [11], [12]], { 'weights': np.array([[0.5], [0.5], [1.]]) }) output = self._test_flatten_method(features, feature_columns='weights') self.assertAllClose(expected_output, output) def test_flatten_with_multiple_feature_columns(self): """Tests `_flatten` method with multiple feature columns provided.""" features = { 'sequence_mask': np.array([[1, 1], [1, 0]]), 'a': np.array([[0.5, 0.5], [1., 0]]), 'b': np.array([[1.5, 1.5], [2., 0]]) } expected_output = ([[1], [2], [3]], [[10], [11], [12]], { 'a': np.array([[0.5], [0.5], [1.]]), 'b': np.array([[1.5], [1.5], [2.]]) }) output = self._test_flatten_method(features, feature_columns=['a', 'b']) self.assertAllClose(expected_output, output) def test_flatten_no_mask(self): """Tests error in `_flatten` method when sequence mask is not provided.""" features = {} with self.assertRaisesRegexp( ValueError, (r'The provided sequence_length_mask key `sequence_mask` ' r'should be included in.* Found keys: \[\].')): _ = self._test_flatten_method(features, feature_columns=[]) def test_flatten_missing_feature(self): """Tests error in `_flatten` method when feature is not provided.""" features = {'sequence_mask': np.array([[1, 1], [1, 0]])} with self.assertRaisesRegexp( ValueError, '`weights` column expected in features dictionary.'): _ = self._test_flatten_method(features, feature_columns=['weights']) def test_flatten_tensor_wrong_feature_dim(self): """Tests `_flatten` method when feature has wrong dimension.""" features = { 'sequence_mask': np.array([[1, 1], [1, 0]]), 'weights': np.array([0.5, 0.5, 1., 0]) } with self.assertRaisesRegexp( ValueError, 'Input tensor expected to have at least 2 dimensions.'): _ = self._test_flatten_method(features, feature_columns=['weights']) def test_flatten_tensor_wrong_feature_mask(self): """Tests `_flatten` with feature mask different from provided mask.""" features = {'sequence_mask': np.array([[1, 1], [1, 1]])} error = ( ValueError if tf.executing_eagerly() else tf.errors.InvalidArgumentError) with self.assertRaisesRegexp( error, 'Tensor shape is incompatible with provided mask.'): _ = self._test_flatten_method(features, feature_columns=[]) def test_flatten_tensor_wrong_mask_dim(self): """Tests `_flatten` with mask that has wrong dimensions.""" features = {'sequence_mask': np.array([1, 1])} with self.assertRaisesRegexp( ValueError, 'Mask is expected to have two dimensions, got .* instead.'): _ = self._test_flatten_method(features, feature_columns=[]) class _MockHead(object): """A static head to be wrapped in a sequential head, for testing.""" def metrics(self, regularization_losses=None): return regularization_losses def loss(self, **kwargs): return kwargs def create_estimator_spec(self, **kwargs): Spec = collections.namedtuple('Spec', ['predictions', 'kwargs']) # pylint: disable=invalid-name return Spec(predictions={}, kwargs=kwargs) @test_util.run_all_in_graph_and_eager_modes class TestSequentialHead(tf.test.TestCase): """Tests sequential head methods.""" def _assert_equal(self, d, dref, session=None): """Recursively checks that all items of a dictionary are close. Dictionary can contain numerical values, `Tensor` objects or dictionaries of the former. If an item is a `Tensor`, its value is evaluated then compared to the reference. Args: d: Dictionary to check. dref: Dictionary to use as a reference for checks. session: A `tf.Session` object. """ for key, ref_item in dref.items(): if isinstance(ref_item, dict): self._assert_equal(d[key], dref=ref_item, session=session) elif isinstance(d[key], tf.Tensor): self.assertAllClose( session.run(d[key]) if session else d[key], ref_item) else: self.assertEqual(d[key], ref_item) def test_predictions(self): """Tests predictions output. Use `predictions` method in eager execution, else `create_estimator_spec` in PREDICT mode. logits = [[0.3, -0.4], [0.2, 0.2]] logistics = 1 / (1 + exp(-logits)) = [[0.57, 0.40], [0.55, 0.55]] """ head = seq_head_lib.SequentialHeadWrapper(binary_head_lib.BinaryClassHead(), 'sequence_mask') logits = [[[0.3], [-0.4]], [[0.2], [0.2]]] expected_logistics = [[[0.574443], [0.401312]], [[0.549834], [0.549834]]] features = { 'sequence_mask': ops.convert_to_tensor(np.array([[1, 1], [1, 0]])) } keys = prediction_keys.PredictionKeys if tf.executing_eagerly(): predictions = head.predictions( logits=logits, keys=[keys.LOGITS, keys.LOGISTIC]) self.assertItemsEqual(predictions.keys(), [keys.LOGITS, keys.LOGISTIC]) self.assertAllClose(logits, predictions[keys.LOGITS]) self.assertAllClose(expected_logistics, predictions[keys.LOGISTIC]) return spec = head.create_estimator_spec( features=features, mode=ModeKeys.PREDICT, logits=logits, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) self.assertIn('sequence_mask', spec.predictions) with self.cached_session() as sess: self.assertAllEqual( sess.run(spec.predictions['sequence_mask']), features['sequence_mask']) self.assertAllClose(logits, sess.run(spec.predictions[keys.LOGITS])) self.assertAllClose(expected_logistics, sess.run(spec.predictions[keys.LOGISTIC])) def test_metrics(self): """Tests the `metrics` method. Tests that: - Returned metrics match the returned metrics of the static head. - `regularization_losses` argument is properly passed to the static head's method. """ head = seq_head_lib.SequentialHeadWrapper(binary_head_lib.BinaryClassHead(), 'mask') metrics = head.metrics(regularization_losses=2.5) keys = metric_keys.MetricKeys self.assertIn(keys.ACCURACY, metrics) self.assertIn(keys.LOSS_REGULARIZATION, metrics) def test_loss_args(self): """Tests that variables are flattened and passed to static head's method.""" logits = [[1, 2], [3, 4]] labels = [[0, 1], [0, 2]] features = {'weights': [[0.3, 0.2], [0.5, 100]], 'mask': [[1, 1], [1, 0]]} head = seq_head_lib.SequentialHeadWrapper(_MockHead(), 'mask', 'weights') expected_output = { 'logits': [[1], [2], [3]], 'labels': [[0], [1], [0]], 'features': { 'weights': [[0.3], [0.2], [0.5]] }, 'mode': 'my-mode', 'regularization_losses': 123 } output = head.loss( logits=_convert_to_tensor(logits), labels=_convert_to_tensor(labels), features=_convert_to_tensor(features), mode='my-mode', regularization_losses=123) with self.cached_session() as sess: self._assert_equal(output, dref=expected_output, session=sess) def test_create_estimator_spec_args(self): """Tests that variables are flattened and passed to static head's method.""" logits = [[1, 2], [3, 4]] labels = [[0, 1], [0, 2]] features = {'weights': [[0.3, 0.2], [0.5, 100]], 'mask': [[1, 1], [1, 0]]} head = seq_head_lib.SequentialHeadWrapper(_MockHead(), 'mask', 'weights') w = tf.Variable(1) update_op = w.assign_add(1) trainable_variables = [tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)] expected_output = { 'logits': [[1], [2], [3]], 'labels': [[0], [1], [0]], 'features': { 'weights': [[0.3], [0.2], [0.5]] }, 'mode': ModeKeys.TRAIN, 'regularization_losses': 123, 'optimizer': 'my-opt', 'train_op_fn': 'my-train-op', 'trainable_variables': trainable_variables, 'update_ops': [update_op] } spec = head.create_estimator_spec( logits=_convert_to_tensor(logits), labels=_convert_to_tensor(labels), features=_convert_to_tensor(features), mode=ModeKeys.TRAIN, optimizer='my-opt', train_op_fn='my-train-op', regularization_losses=123, update_ops=[update_op], trainable_variables=trainable_variables) with self.cached_session() as sess: self.assertItemsEqual(spec.kwargs.keys(), expected_output.keys()) self._assert_equal(spec.kwargs, dref=expected_output, session=sess) def test_head_properties(self): """Tests that the head's properties are correcly implemented.""" static_head = binary_head_lib.BinaryClassHead( loss_reduction=tf.losses.Reduction.SUM, name='a_static_head') head = seq_head_lib.SequentialHeadWrapper(static_head, 'a_sequence_mask_col') self.assertEqual(head.name, 'a_static_head_sequential') self.assertEqual(head.logits_dimension, 1) self.assertEqual(head.loss_reduction, tf.losses.Reduction.SUM) self.assertEqual(head.input_sequence_mask_key, 'a_sequence_mask_col') self.assertEqual(head.static_head.name, 'a_static_head') def test_loss_reduction(self): """Tests loss reduction. Use `loss` method in eager execution, else `create_estimator_spec` in TRAIN mode. logits = [[[2., 3., 4.], [5., -0.5, 0.]], [[-1.0, 2.0, 0.5], [_]]], labels = [[0, 1], [2, _]] weights = [[0.5, 0.2], [0.3, _]] loss = [0.5*2.40 + 0.2*5.41 + 0.3*1.74] / 3 = 0.94 """ static_head = multi_head_lib.MultiClassHead( n_classes=3, weight_column='weights') head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask', 'weights') expected_loss = 0.942783 features = { 'weights': tf.sparse.SparseTensor( indices=((0, 0), (0, 1), (1, 0)), values=(0.5, 0.2, 0.3), dense_shape=(2, 2)), 'sequence_mask': ops.convert_to_tensor([[1, 1], [1, 0]]) } logits = ops.convert_to_tensor([[[2., 3., 4.], [5., -0.5, 0.]], [[-1.0, 2.0, 0.5], [1.0, 0.5, 2.0]]]) labels = tf.sparse.SparseTensor( indices=((0, 0), (0, 1), (1, 0)), values=(0, 1, 2), dense_shape=(2, 2)) class _Optimizer(tf_keras.optimizers.Optimizer): def get_updates(self, loss, params): del params, loss return [tf.constant('op')] def get_config(self): config = super(_Optimizer, self).get_config() return config if tf.executing_eagerly(): loss = head.loss(logits=logits, labels=labels, features=features) else: spec = head.create_estimator_spec( features, ModeKeys.TRAIN, logits, labels=labels, optimizer=_Optimizer('my_optimizer'), trainable_variables=[ tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) ]) with self.cached_session() as sess: loss = sess.run(spec.loss) self.assertAllClose(loss, expected_loss, atol=1e-4) def test_metrics_computation(self): """Runs metrics computation tests. Use `update_metrics` method in eager execution, else `create_estimator_spec` in EVAL mode. logits = [[-101, 102, -103], [104, _, _]] predicted_labels = [[0, 1, 0], [1, _, _]] labels = [[1, 1, 1], [1, _, _]] weights = [[2, 5, 1], [2, _, _]] loss = (101*2 + 103*1) / 10 = 30.5 accuracy = (0 + 5 + 0 + 2) / (2 + 5 + 1 + 2) = 0.7 prediction_mean = (0 + 5 + 0 + 2) / (2 + 5 + 1 + 2) = 0.7 precision = (5 + 2) / (5 + 2) = 1.0 recall = (5 + 2) / (2 + 5 + 1 + 2) = 0.7 """ static_head = binary_head_lib.BinaryClassHead(weight_column='weights') head = seq_head_lib.SequentialHeadWrapper(static_head, 'sequence_mask', 'weights') features = { 'sequence_mask': np.array([[1, 1, 1], [1, 0, 0]]), 'weights': np.array([[2, 5, 1], [2, 100, 100]]) } regularization_losses = [100.] logits = _convert_to_tensor([[-101, 102, -103], [104, 100, 100]]) labels = tf.sparse.SparseTensor( values=[1, 1, 1, 1], indices=((0, 0), (0, 1), (0, 2), (1, 0)), dense_shape=(2, 3)) features = _convert_to_tensor(features) expected_loss = 30.5 keys = metric_keys.MetricKeys expected_metrics = { keys.LOSS_MEAN: expected_loss, keys.ACCURACY: 0.7, keys.PREDICTION_MEAN: 0.7, keys.LABEL_MEAN: 1.0, keys.LOSS_REGULARIZATION: 100, keys.PRECISION: 1.0, keys.RECALL: 0.7, keys.ACCURACY_BASELINE: 1.0, keys.AUC: 0., keys.AUC_PR: 1.0 } if tf.executing_eagerly(): eval_metrics = head.metrics(regularization_losses=regularization_losses) updated_metrics = head.update_metrics(eval_metrics, features, logits, labels, regularization_losses) self.assertItemsEqual(expected_metrics.keys(), updated_metrics.keys()) self.assertAllClose( expected_metrics, {k: updated_metrics[k].result() for k in updated_metrics}) return spec = head.create_estimator_spec( features=features, mode=ModeKeys.EVAL, logits=logits, labels=labels, regularization_losses=regularization_losses, trainable_variables=[tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32)]) with self.cached_session() as sess: test_lib._initialize_variables(self, spec.scaffold) self.assertIsNone(spec.scaffold.summary_op) value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops} update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops} _ = sess.run(update_ops) self.assertAllClose(expected_metrics, {k: value_ops[k].eval() for k in value_ops}) def test_wrong_mask_type(self): """Tests error raised when the mask doesn't have proper type.""" with self.assertRaisesRegexp(TypeError, '`sequence_mask` column must be a string.'): _ = seq_head_lib.SequentialHeadWrapper(None, sequence_length_mask=1) def test_wrong_feature_column_type(self): """Tests error raised when the feature column doesn't have proper type.""" with self.assertRaisesRegexp( TypeError, '`feature_columns` must be either a string or an iterable'): _ = seq_head_lib.SequentialHeadWrapper(None, 'mask', feature_columns=1) def test_wrong_feature_column_type_in_iterable(self): """Tests error raised when the feature column doesn't have proper type.""" with self.assertRaisesRegexp(TypeError, 'Column must a string. Given type: .*.'): _ = seq_head_lib.SequentialHeadWrapper(None, 'mask', feature_columns=[1]) def test_multi_head_provided(self): """Tests error raised when a multi-head is provided.""" with self.assertRaisesRegexp( ValueError, '`MultiHead` is not supported with `SequentialHeadWrapper`.'): _ = seq_head_lib.SequentialHeadWrapper( multi_head.MultiHead( [binary_head_lib.BinaryClassHead(name='test-head')])) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/hooks/__init__.py ================================================ ================================================ FILE: tensorflow_estimator/python/estimator/hooks/basic_session_run_hooks.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Some common SessionRunHook classes.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.training.basic_session_run_hooks import CheckpointSaverHook from tensorflow.python.training.basic_session_run_hooks import CheckpointSaverListener from tensorflow.python.training.basic_session_run_hooks import FeedFnHook from tensorflow.python.training.basic_session_run_hooks import FinalOpsHook from tensorflow.python.training.basic_session_run_hooks import GlobalStepWaiterHook from tensorflow.python.training.basic_session_run_hooks import LoggingTensorHook from tensorflow.python.training.basic_session_run_hooks import NanLossDuringTrainingError from tensorflow.python.training.basic_session_run_hooks import NanTensorHook from tensorflow.python.training.basic_session_run_hooks import ProfilerHook from tensorflow.python.training.basic_session_run_hooks import SecondOrStepTimer from tensorflow.python.training.basic_session_run_hooks import StepCounterHook from tensorflow.python.training.basic_session_run_hooks import StopAtStepHook from tensorflow.python.training.basic_session_run_hooks import SummarySaverHook from tensorflow_estimator.python.estimator.estimator_export import estimator_export estimator_export("estimator.SecondOrStepTimer")(SecondOrStepTimer) estimator_export("estimator.LoggingTensorHook")(LoggingTensorHook) estimator_export("estimator.StopAtStepHook")(StopAtStepHook) estimator_export("estimator.CheckpointSaverListener")(CheckpointSaverListener) estimator_export("estimator.CheckpointSaverHook")(CheckpointSaverHook) estimator_export("estimator.StepCounterHook")(StepCounterHook) estimator_export("estimator.NanLossDuringTrainingError")( NanLossDuringTrainingError) estimator_export("estimator.NanTensorHook")(NanTensorHook) estimator_export("estimator.SummarySaverHook")(SummarySaverHook) estimator_export("estimator.GlobalStepWaiterHook")(GlobalStepWaiterHook) estimator_export("estimator.FinalOpsHook")(FinalOpsHook) estimator_export("estimator.FeedFnHook")(FeedFnHook) estimator_export("estimator.ProfilerHook")(ProfilerHook) ================================================ FILE: tensorflow_estimator/python/estimator/hooks/basic_session_run_hooks_test.py ================================================ # pylint: disable=g-bad-file-header # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for basic_session_run_hooks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os.path import shutil import tempfile import time import tensorflow as tf from tensorflow.python.framework import meta_graph from tensorflow.python.framework import test_util from tensorflow.python.platform import tf_logging from tensorflow.python.training import monitored_session from tensorflow.python.training import training_util from tensorflow_estimator.python.estimator.hooks import basic_session_run_hooks from tensorflow_estimator.python.estimator.hooks import fake_summary_writer # Provide a realistic start time for unit tests where we need to mock out # calls to time.time(). MOCK_START_TIME = 1484695987.209386 class MockCheckpointSaverListener( basic_session_run_hooks.CheckpointSaverListener): def __init__(self): self.begin_count = 0 self.before_save_count = 0 self.after_save_count = 0 self.end_count = 0 self.ask_for_stop = False def begin(self): self.begin_count += 1 def before_save(self, session, global_step): self.before_save_count += 1 def after_save(self, session, global_step): self.after_save_count += 1 if self.ask_for_stop: return True def end(self, session, global_step): self.end_count += 1 def get_counts(self): return { 'begin': self.begin_count, 'before_save': self.before_save_count, 'after_save': self.after_save_count, 'end': self.end_count } @test_util.deprecated_graph_mode_only class SecondOrStepTimerTest(tf.test.TestCase): def test_raise_in_both_secs_and_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.SecondOrStepTimer(every_secs=2.0, every_steps=10) def test_raise_in_none_secs_and_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.SecondOrStepTimer() @tf.compat.v1.test.mock.patch.object(time, 'time') def test_every_secs(self, mock_time): mock_time.return_value = MOCK_START_TIME timer = basic_session_run_hooks.SecondOrStepTimer(every_secs=1.0) self.assertTrue(timer.should_trigger_for_step(1)) timer.update_last_triggered_step(1) self.assertFalse(timer.should_trigger_for_step(1)) self.assertFalse(timer.should_trigger_for_step(2)) mock_time.return_value += 1.0 self.assertFalse(timer.should_trigger_for_step(1)) self.assertTrue(timer.should_trigger_for_step(2)) def test_every_steps(self): timer = basic_session_run_hooks.SecondOrStepTimer(every_steps=3) self.assertTrue(timer.should_trigger_for_step(1)) timer.update_last_triggered_step(1) self.assertFalse(timer.should_trigger_for_step(1)) self.assertFalse(timer.should_trigger_for_step(2)) self.assertFalse(timer.should_trigger_for_step(3)) self.assertTrue(timer.should_trigger_for_step(4)) def test_update_last_triggered_step(self): timer = basic_session_run_hooks.SecondOrStepTimer(every_steps=1) elapsed_secs, elapsed_steps = timer.update_last_triggered_step(1) self.assertEqual(None, elapsed_secs) self.assertEqual(None, elapsed_steps) elapsed_secs, elapsed_steps = timer.update_last_triggered_step(5) self.assertLess(0, elapsed_secs) self.assertEqual(4, elapsed_steps) elapsed_secs, elapsed_steps = timer.update_last_triggered_step(7) self.assertLess(0, elapsed_secs) self.assertEqual(2, elapsed_steps) @test_util.deprecated_graph_mode_only class StopAtStepTest(tf.test.TestCase): def test_raise_in_both_last_step_and_num_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.StopAtStepHook(num_steps=10, last_step=20) def test_stop_based_on_last_step(self): h = basic_session_run_hooks.StopAtStepHook(last_step=10) with tf.Graph().as_default(): global_step = tf.compat.v1.train.get_or_create_global_step() no_op = tf.no_op() h.begin() with tf.compat.v1.Session() as sess: mon_sess = monitored_session._HookedSession(sess, [h]) sess.run(tf.compat.v1.assign(global_step, 5)) h.after_create_session(sess, None) mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) sess.run(tf.compat.v1.assign(global_step, 9)) mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) sess.run(tf.compat.v1.assign(global_step, 10)) mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) sess.run(tf.compat.v1.assign(global_step, 11)) mon_sess._should_stop = False mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) def test_stop_based_on_num_step(self): h = basic_session_run_hooks.StopAtStepHook(num_steps=10) with tf.Graph().as_default(): global_step = tf.compat.v1.train.get_or_create_global_step() no_op = tf.no_op() h.begin() with tf.compat.v1.Session() as sess: mon_sess = monitored_session._HookedSession(sess, [h]) sess.run(tf.compat.v1.assign(global_step, 5)) h.after_create_session(sess, None) mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) sess.run(tf.compat.v1.assign(global_step, 13)) mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) sess.run(tf.compat.v1.assign(global_step, 14)) mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) sess.run(tf.compat.v1.assign(global_step, 15)) mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) sess.run(tf.compat.v1.assign(global_step, 16)) mon_sess._should_stop = False mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) def test_stop_based_with_multiple_steps(self): h = basic_session_run_hooks.StopAtStepHook(num_steps=10) with tf.Graph().as_default(): global_step = tf.compat.v1.train.get_or_create_global_step() no_op = tf.no_op() h.begin() with tf.compat.v1.Session() as sess: mon_sess = monitored_session._HookedSession(sess, [h]) sess.run(tf.compat.v1.assign(global_step, 5)) h.after_create_session(sess, None) mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) sess.run(tf.compat.v1.assign(global_step, 15)) mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) @test_util.deprecated_graph_mode_only class LoggingTensorHookTest(tf.test.TestCase): def setUp(self): # Mock out logging calls so we can verify whether correct tensors are being # monitored. self._actual_log = tf_logging.info self.logged_message = None def mock_log(*args, **kwargs): self.logged_message = args self._actual_log(*args, **kwargs) tf_logging.info = mock_log def tearDown(self): tf_logging.info = self._actual_log def test_illegal_args(self): with self.assertRaisesRegexp(ValueError, 'nvalid every_n_iter'): basic_session_run_hooks.LoggingTensorHook(tensors=['t'], every_n_iter=0) with self.assertRaisesRegexp(ValueError, 'nvalid every_n_iter'): basic_session_run_hooks.LoggingTensorHook(tensors=['t'], every_n_iter=-10) with self.assertRaisesRegexp(ValueError, 'xactly one of'): basic_session_run_hooks.LoggingTensorHook( tensors=['t'], every_n_iter=5, every_n_secs=5) with self.assertRaisesRegexp(ValueError, 'xactly one of'): basic_session_run_hooks.LoggingTensorHook(tensors=['t']) def test_print_at_end_only(self): with tf.Graph().as_default(), tf.compat.v1.Session() as sess: t = tf.constant(42.0, name='foo') train_op = tf.constant(3) hook = basic_session_run_hooks.LoggingTensorHook( tensors=[t.name], at_end=True) hook.begin() mon_sess = monitored_session._HookedSession(sess, [hook]) self.evaluate(tf.compat.v1.initializers.global_variables()) self.logged_message = '' for _ in range(3): mon_sess.run(train_op) # assertNotRegexpMatches is not supported by python 3.1 and later self.assertEqual(str(self.logged_message).find(t.name), -1) hook.end(sess) self.assertRegexpMatches(str(self.logged_message), t.name) def _validate_print_every_n_steps(self, sess, at_end): t = tf.constant(42.0, name='foo') train_op = tf.constant(3) hook = basic_session_run_hooks.LoggingTensorHook( tensors=[t.name], every_n_iter=10, at_end=at_end) hook.begin() mon_sess = monitored_session._HookedSession(sess, [hook]) self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess.run(train_op) self.assertRegexpMatches(str(self.logged_message), t.name) for _ in range(3): self.logged_message = '' for _ in range(9): mon_sess.run(train_op) # assertNotRegexpMatches is not supported by python 3.1 and later self.assertEqual(str(self.logged_message).find(t.name), -1) mon_sess.run(train_op) self.assertRegexpMatches(str(self.logged_message), t.name) # Add additional run to verify proper reset when called multiple times. self.logged_message = '' mon_sess.run(train_op) # assertNotRegexpMatches is not supported by python 3.1 and later self.assertEqual(str(self.logged_message).find(t.name), -1) self.logged_message = '' hook.end(sess) if at_end: self.assertRegexpMatches(str(self.logged_message), t.name) else: # assertNotRegexpMatches is not supported by python 3.1 and later self.assertEqual(str(self.logged_message).find(t.name), -1) def test_print_every_n_steps(self): with tf.Graph().as_default(), tf.compat.v1.Session() as sess: self._validate_print_every_n_steps(sess, at_end=False) # Verify proper reset. self._validate_print_every_n_steps(sess, at_end=False) def test_print_every_n_steps_and_end(self): with tf.Graph().as_default(), tf.compat.v1.Session() as sess: self._validate_print_every_n_steps(sess, at_end=True) # Verify proper reset. self._validate_print_every_n_steps(sess, at_end=True) def test_print_first_step(self): # if it runs every iteration, first iteration has None duration. with tf.Graph().as_default(), tf.compat.v1.Session() as sess: t = tf.constant(42.0, name='foo') train_op = tf.constant(3) hook = basic_session_run_hooks.LoggingTensorHook( tensors={'foo': t}, every_n_iter=1) hook.begin() mon_sess = monitored_session._HookedSession(sess, [hook]) self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess.run(train_op) self.assertRegexpMatches(str(self.logged_message), 'foo') # in first run, elapsed time is None. self.assertEqual(str(self.logged_message).find('sec'), -1) def _validate_print_every_n_secs(self, sess, at_end, mock_time): t = tf.constant(42.0, name='foo') train_op = tf.constant(3) hook = basic_session_run_hooks.LoggingTensorHook( tensors=[t.name], every_n_secs=1.0, at_end=at_end) hook.begin() mon_sess = monitored_session._HookedSession(sess, [hook]) self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess.run(train_op) self.assertRegexpMatches(str(self.logged_message), t.name) # assertNotRegexpMatches is not supported by python 3.1 and later self.logged_message = '' mon_sess.run(train_op) self.assertEqual(str(self.logged_message).find(t.name), -1) mock_time.return_value += 1.0 self.logged_message = '' mon_sess.run(train_op) self.assertRegexpMatches(str(self.logged_message), t.name) self.logged_message = '' hook.end(sess) if at_end: self.assertRegexpMatches(str(self.logged_message), t.name) else: # assertNotRegexpMatches is not supported by python 3.1 and later self.assertEqual(str(self.logged_message).find(t.name), -1) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_print_every_n_secs(self, mock_time): with tf.Graph().as_default(), tf.compat.v1.Session() as sess: mock_time.return_value = MOCK_START_TIME self._validate_print_every_n_secs(sess, at_end=False, mock_time=mock_time) # Verify proper reset. self._validate_print_every_n_secs(sess, at_end=False, mock_time=mock_time) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_print_every_n_secs_and_end(self, mock_time): with tf.Graph().as_default(), tf.compat.v1.Session() as sess: mock_time.return_value = MOCK_START_TIME self._validate_print_every_n_secs(sess, at_end=True, mock_time=mock_time) # Verify proper reset. self._validate_print_every_n_secs(sess, at_end=True, mock_time=mock_time) def test_print_formatter(self): with tf.Graph().as_default(), tf.compat.v1.Session() as sess: t = tf.constant(42.0, name='foo') train_op = tf.constant(3) hook = basic_session_run_hooks.LoggingTensorHook( tensors=[t.name], every_n_iter=10, formatter=lambda items: 'qqq=%s' % items[t.name]) hook.begin() mon_sess = monitored_session._HookedSession(sess, [hook]) self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess.run(train_op) self.assertEqual(self.logged_message[0], 'qqq=42.0') @test_util.deprecated_graph_mode_only class CheckpointSaverHookTest(tf.test.TestCase): def setUp(self): self.model_dir = tempfile.mkdtemp() self.graph = tf.Graph() with self.graph.as_default(): self.scaffold = tf.compat.v1.train.Scaffold() self.global_step = tf.compat.v1.train.get_or_create_global_step() self.train_op = training_util._increment_global_step(1) def tearDown(self): shutil.rmtree(self.model_dir, ignore_errors=True) def test_saves_when_saver_and_scaffold_both_missing(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=1) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) self.assertEqual( 1, tf.train.load_variable(self.model_dir, self.global_step.name)) def test_raise_when_saver_and_scaffold_both_present(self): with self.assertRaises(ValueError): basic_session_run_hooks.CheckpointSaverHook( self.model_dir, saver=self.scaffold.saver, scaffold=self.scaffold) def test_raise_in_both_secs_and_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_secs=10, save_steps=20) def test_raise_in_none_secs_and_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.CheckpointSaverHook(self.model_dir) def test_save_secs_saves_in_first_step(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_secs=2, scaffold=self.scaffold) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) self.assertEqual( 1, tf.train.load_variable(self.model_dir, self.global_step.name)) def test_save_secs_calls_listeners_at_begin_and_end(self): with self.graph.as_default(): listener = MockCheckpointSaverListener() hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_secs=2, scaffold=self.scaffold, listeners=[listener]) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) # hook runs here mon_sess.run(self.train_op) # hook won't run here, so it does at end hook.end(sess) # hook runs here self.assertEqual({ 'begin': 1, 'before_save': 2, 'after_save': 2, 'end': 1 }, listener.get_counts()) def test_listener_with_monitored_session(self): with tf.Graph().as_default(): scaffold = tf.compat.v1.train.Scaffold() global_step = tf.compat.v1.train.get_or_create_global_step() train_op = training_util._increment_global_step(1) listener = MockCheckpointSaverListener() hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=1, scaffold=scaffold, listeners=[listener]) with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook], scaffold=scaffold, checkpoint_dir=self.model_dir) as sess: sess.run(train_op) sess.run(train_op) global_step_val = sess.raw_session().run(global_step) listener_counts = listener.get_counts() self.assertEqual(2, global_step_val) self.assertEqual({ 'begin': 1, 'before_save': 3, 'after_save': 3, 'end': 1 }, listener_counts) def test_listener_stops_training_in_after_save(self): with tf.Graph().as_default(): scaffold = tf.compat.v1.train.Scaffold() tf.compat.v1.train.get_or_create_global_step() train_op = training_util._increment_global_step(1) listener = MockCheckpointSaverListener() hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=1, scaffold=scaffold, listeners=[listener]) with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook], scaffold=scaffold, checkpoint_dir=self.model_dir) as sess: sess.run(train_op) self.assertFalse(sess.should_stop()) sess.run(train_op) self.assertFalse(sess.should_stop()) listener.ask_for_stop = True sess.run(train_op) self.assertTrue(sess.should_stop()) def test_listener_with_default_saver(self): with tf.Graph().as_default(): global_step = tf.compat.v1.train.get_or_create_global_step() train_op = training_util._increment_global_step(1) listener = MockCheckpointSaverListener() hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=1, listeners=[listener]) with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook], checkpoint_dir=self.model_dir) as sess: sess.run(train_op) sess.run(train_op) global_step_val = sess.raw_session().run(global_step) listener_counts = listener.get_counts() self.assertEqual(2, global_step_val) self.assertEqual({ 'begin': 1, 'before_save': 3, 'after_save': 3, 'end': 1 }, listener_counts) with tf.Graph().as_default(): global_step = tf.compat.v1.train.get_or_create_global_step() with tf.compat.v1.train.SingularMonitoredSession( checkpoint_dir=self.model_dir) as sess2: global_step_saved_val = sess2.run(global_step) self.assertEqual(2, global_step_saved_val) def test_two_listeners_with_default_saver(self): with tf.Graph().as_default(): global_step = tf.compat.v1.train.get_or_create_global_step() train_op = training_util._increment_global_step(1) listener1 = MockCheckpointSaverListener() listener2 = MockCheckpointSaverListener() hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=1, listeners=[listener1, listener2]) with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook], checkpoint_dir=self.model_dir) as sess: sess.run(train_op) sess.run(train_op) global_step_val = sess.raw_session().run(global_step) listener1_counts = listener1.get_counts() listener2_counts = listener2.get_counts() self.assertEqual(2, global_step_val) self.assertEqual({ 'begin': 1, 'before_save': 3, 'after_save': 3, 'end': 1 }, listener1_counts) self.assertEqual(listener1_counts, listener2_counts) with tf.Graph().as_default(): global_step = tf.compat.v1.train.get_or_create_global_step() with tf.compat.v1.train.SingularMonitoredSession( checkpoint_dir=self.model_dir) as sess2: global_step_saved_val = sess2.run(global_step) self.assertEqual(2, global_step_saved_val) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_save_secs_saves_periodically(self, mock_time): with self.graph.as_default(): mock_time.return_value = MOCK_START_TIME hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_secs=2, scaffold=self.scaffold) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mock_time.return_value = MOCK_START_TIME mon_sess.run(self.train_op) # Saved. mock_time.return_value = MOCK_START_TIME + 0.5 mon_sess.run(self.train_op) # Not saved. self.assertEqual( 1, tf.train.load_variable(self.model_dir, self.global_step.name)) # Simulate 2.5 seconds of sleep. mock_time.return_value = MOCK_START_TIME + 2.5 mon_sess.run(self.train_op) # Saved. mock_time.return_value = MOCK_START_TIME + 2.6 mon_sess.run(self.train_op) # Not saved. mock_time.return_value = MOCK_START_TIME + 2.7 mon_sess.run(self.train_op) # Not saved. self.assertEqual( 3, tf.train.load_variable(self.model_dir, self.global_step.name)) # Simulate 7.5 more seconds of sleep (10 seconds from start. mock_time.return_value = MOCK_START_TIME + 10 mon_sess.run(self.train_op) # Saved. self.assertEqual( 6, tf.train.load_variable(self.model_dir, self.global_step.name)) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_save_secs_calls_listeners_periodically(self, mock_time): with self.graph.as_default(): mock_time.return_value = MOCK_START_TIME listener = MockCheckpointSaverListener() hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_secs=2, scaffold=self.scaffold, listeners=[listener]) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mock_time.return_value = MOCK_START_TIME + 0.5 mon_sess.run(self.train_op) # hook runs here mock_time.return_value = MOCK_START_TIME + 0.5 mon_sess.run(self.train_op) mock_time.return_value = MOCK_START_TIME + 3.0 mon_sess.run(self.train_op) # hook runs here mock_time.return_value = MOCK_START_TIME + 3.5 mon_sess.run(self.train_op) mock_time.return_value = MOCK_START_TIME + 4.0 mon_sess.run(self.train_op) mock_time.return_value = MOCK_START_TIME + 6.5 mon_sess.run(self.train_op) # hook runs here mock_time.return_value = MOCK_START_TIME + 7.0 mon_sess.run(self.train_op) # hook won't run here, so it does at end mock_time.return_value = MOCK_START_TIME + 7.5 hook.end(sess) # hook runs here self.assertEqual({ 'begin': 1, 'before_save': 4, 'after_save': 4, 'end': 1 }, listener.get_counts()) def test_save_steps_saves_in_first_step(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=2, scaffold=self.scaffold) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) self.assertEqual( 1, tf.train.load_variable(self.model_dir, self.global_step.name)) def test_save_steps_saves_periodically(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=2, scaffold=self.scaffold) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) mon_sess.run(self.train_op) # Not saved self.assertEqual( 1, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # saved self.assertEqual( 3, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # Not saved self.assertEqual( 3, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # saved self.assertEqual( 5, tf.train.load_variable(self.model_dir, self.global_step.name)) def test_save_saves_at_end(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_secs=2, scaffold=self.scaffold) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) mon_sess.run(self.train_op) hook.end(sess) self.assertEqual( 2, tf.train.load_variable(self.model_dir, self.global_step.name)) def test_summary_writer_defs(self): fake_summary_writer.FakeSummaryWriter.install() tf.compat.v1.summary.FileWriterCache.clear() summary_writer = tf.compat.v1.summary.FileWriterCache.get(self.model_dir) with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=2, scaffold=self.scaffold) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) hook.after_create_session(sess, None) mon_sess.run(self.train_op) summary_writer.assert_summaries( test_case=self, expected_logdir=self.model_dir, expected_added_meta_graphs=[ meta_graph.create_meta_graph_def( graph_def=self.graph.as_graph_def(add_shapes=True), saver_def=self.scaffold.saver.saver_def) ]) fake_summary_writer.FakeSummaryWriter.uninstall() def test_save_checkpoint_before_first_train_step(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=2, scaffold=self.scaffold) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: mon_sess = monitored_session._HookedSession(sess, [hook]) sess.run(self.scaffold.init_op) hook.after_create_session(sess, None) # Verifies that checkpoint is saved at step 0. self.assertEqual( 0, tf.train.load_variable(self.model_dir, self.global_step.name)) # Verifies that no checkpoint is saved after one training step. mon_sess.run(self.train_op) self.assertEqual( 0, tf.train.load_variable(self.model_dir, self.global_step.name)) # Verifies that checkpoint is saved after save_steps. mon_sess.run(self.train_op) self.assertEqual( 2, tf.train.load_variable(self.model_dir, self.global_step.name)) @test_util.deprecated_graph_mode_only class CheckpointSaverHookMultiStepTest(tf.test.TestCase): def setUp(self): self.model_dir = tempfile.mkdtemp() self.graph = tf.Graph() self.steps_per_run = 5 with self.graph.as_default(): self.scaffold = tf.compat.v1.train.Scaffold() self.global_step = tf.compat.v1.train.get_or_create_global_step() self.train_op = training_util._increment_global_step(self.steps_per_run) def tearDown(self): shutil.rmtree(self.model_dir, ignore_errors=True) def test_save_steps_saves_in_first_step(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=2 * self.steps_per_run, scaffold=self.scaffold) hook._set_steps_per_run(self.steps_per_run) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) self.assertEqual( 5, tf.train.load_variable(self.model_dir, self.global_step.name)) def test_save_steps_saves_periodically(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=2 * self.steps_per_run, scaffold=self.scaffold) hook._set_steps_per_run(self.steps_per_run) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) # Saved (step=5) self.assertEqual( 5, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # Not saved (step=10) self.assertEqual( 5, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # Saved (step=15) self.assertEqual( 15, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # Not saved (step=20) self.assertEqual( 15, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # Saved (step=25) self.assertEqual( 25, tf.train.load_variable(self.model_dir, self.global_step.name)) def test_save_steps_saves_at_end(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=2 * self.steps_per_run, scaffold=self.scaffold) hook._set_steps_per_run(self.steps_per_run) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) mon_sess.run(self.train_op) hook.end(sess) self.assertEqual( 10, tf.train.load_variable(self.model_dir, self.global_step.name)) @test_util.deprecated_graph_mode_only class ResourceCheckpointSaverHookTest(tf.test.TestCase): def setUp(self): self.model_dir = tempfile.mkdtemp() self.graph = tf.Graph() with self.graph.as_default(): self.scaffold = tf.compat.v1.train.Scaffold() with tf.compat.v1.variable_scope('foo', use_resource=True): self.global_step = tf.compat.v1.train.get_or_create_global_step() self.train_op = training_util._increment_global_step(1) def test_save_steps_saves_periodically(self): with self.graph.as_default(): hook = basic_session_run_hooks.CheckpointSaverHook( self.model_dir, save_steps=2, scaffold=self.scaffold) hook.begin() self.scaffold.finalize() with tf.compat.v1.Session() as sess: sess.run(self.scaffold.init_op) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(self.train_op) mon_sess.run(self.train_op) # Not saved self.assertEqual( 1, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # saved self.assertEqual( 3, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # Not saved self.assertEqual( 3, tf.train.load_variable(self.model_dir, self.global_step.name)) mon_sess.run(self.train_op) # saved self.assertEqual( 5, tf.train.load_variable(self.model_dir, self.global_step.name)) @test_util.deprecated_graph_mode_only class StepCounterHookTest(tf.test.TestCase): def setUp(self): self.log_dir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.log_dir, ignore_errors=True) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_step_counter_every_n_steps(self, mock_time): mock_time.return_value = MOCK_START_TIME with tf.Graph().as_default() as g, tf.compat.v1.Session() as sess: tf.compat.v1.train.get_or_create_global_step() train_op = training_util._increment_global_step(1) summary_writer = fake_summary_writer.FakeSummaryWriter(self.log_dir, g) hook = basic_session_run_hooks.StepCounterHook( summary_writer=summary_writer, every_n_steps=10) hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess = monitored_session._HookedSession(sess, [hook]) with tf.compat.v1.test.mock.patch.object(tf_logging, 'warning') as mock_log: for _ in range(30): mock_time.return_value += 0.01 mon_sess.run(train_op) # logging.warning should not be called. self.assertIsNone(mock_log.call_args) hook.end(sess) summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_graph=g, expected_summaries={}) self.assertItemsEqual([11, 21], summary_writer.summaries.keys()) for step in [11, 21]: summary_value = summary_writer.summaries[step][0].value[0] self.assertEqual('global_step/sec', summary_value.tag) self.assertGreater(summary_value.simple_value, 0) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_step_counter_every_n_secs(self, mock_time): mock_time.return_value = MOCK_START_TIME with tf.Graph().as_default() as g, tf.compat.v1.Session() as sess: tf.compat.v1.train.get_or_create_global_step() train_op = training_util._increment_global_step(1) summary_writer = fake_summary_writer.FakeSummaryWriter(self.log_dir, g) hook = basic_session_run_hooks.StepCounterHook( summary_writer=summary_writer, every_n_steps=None, every_n_secs=0.1) hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(train_op) mock_time.return_value += 0.2 mon_sess.run(train_op) mock_time.return_value += 0.2 mon_sess.run(train_op) hook.end(sess) summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_graph=g, expected_summaries={}) self.assertTrue(summary_writer.summaries, 'No summaries were created.') self.assertItemsEqual([2, 3], summary_writer.summaries.keys()) for summary in summary_writer.summaries.values(): summary_value = summary[0].value[0] self.assertEqual('global_step/sec', summary_value.tag) self.assertGreater(summary_value.simple_value, 0) def test_global_step_name(self): with tf.Graph().as_default() as g, tf.compat.v1.Session() as sess: with tf.compat.v1.variable_scope('bar'): tf.compat.v1.get_variable( 'foo', initializer=0, trainable=False, collections=[ tf.compat.v1.GraphKeys.GLOBAL_STEP, tf.compat.v1.GraphKeys.GLOBAL_VARIABLES ]) train_op = training_util._increment_global_step(1) summary_writer = fake_summary_writer.FakeSummaryWriter(self.log_dir, g) hook = basic_session_run_hooks.StepCounterHook( summary_writer=summary_writer, every_n_steps=1, every_n_secs=None) hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(train_op) mon_sess.run(train_op) hook.end(sess) summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_graph=g, expected_summaries={}) self.assertTrue(summary_writer.summaries, 'No summaries were created.') self.assertItemsEqual([2], summary_writer.summaries.keys()) summary_value = summary_writer.summaries[2][0].value[0] self.assertEqual('bar/foo/sec', summary_value.tag) def test_log_warning_if_global_step_not_increased(self): with tf.Graph().as_default(), tf.compat.v1.Session() as sess: tf.compat.v1.train.get_or_create_global_step() train_op = training_util._increment_global_step(0) # keep same. self.evaluate(tf.compat.v1.initializers.global_variables()) hook = basic_session_run_hooks.StepCounterHook( every_n_steps=1, every_n_secs=None) hook.begin() mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess.run(train_op) # Run one step to record global step. with tf.compat.v1.test.mock.patch.object(tf_logging, 'log_first_n') as mock_log: for _ in range(30): mon_sess.run(train_op) self.assertRegexpMatches( str(mock_log.call_args), 'global step.*has not been increased') hook.end(sess) def _setup_steps_per_run_test(self, every_n_steps, steps_per_run, graph, sess): tf.compat.v1.train.get_or_create_global_step() self.train_op = training_util._increment_global_step(steps_per_run) self.summary_writer = fake_summary_writer.FakeSummaryWriter( self.log_dir, graph) self.hook = basic_session_run_hooks.StepCounterHook( summary_writer=self.summary_writer, every_n_steps=every_n_steps) self.hook._set_steps_per_run(steps_per_run) self.hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) self.mon_sess = monitored_session._HookedSession(sess, [self.hook]) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_steps_per_run_less_than_every_n_steps(self, mock_time): mock_time.return_value = MOCK_START_TIME with tf.Graph().as_default() as g, tf.compat.v1.Session() as sess: self._setup_steps_per_run_test(10, 5, g, sess) # Logs at 15, 25 for _ in range(5): mock_time.return_value += 0.01 self.mon_sess.run(self.train_op) self.hook.end(sess) self.summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_graph=g, expected_summaries={}) self.assertItemsEqual([15, 25], self.summary_writer.summaries.keys()) for step in [15, 25]: summary_value = self.summary_writer.summaries[step][0].value[0] self.assertEqual('global_step/sec', summary_value.tag) self.assertGreater(summary_value.simple_value, 0) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_steps_per_run_equal_every_n_steps(self, mock_time): mock_time.return_value = MOCK_START_TIME with tf.Graph().as_default() as g, tf.compat.v1.Session() as sess: self._setup_steps_per_run_test(5, 5, g, sess) # Logs at 10, 15, 20, 25 for _ in range(5): mock_time.return_value += 0.01 self.mon_sess.run(self.train_op) self.hook.end(sess) self.summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_graph=g, expected_summaries={}) self.assertItemsEqual([10, 15, 20, 25], self.summary_writer.summaries.keys()) for step in [10, 15, 20, 25]: summary_value = self.summary_writer.summaries[step][0].value[0] self.assertEqual('global_step/sec', summary_value.tag) self.assertGreater(summary_value.simple_value, 0) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_steps_per_run_greater_than_every_n_steps(self, mock_time): mock_time.return_value = MOCK_START_TIME with tf.Graph().as_default() as g, tf.compat.v1.Session() as sess: self._setup_steps_per_run_test(5, 10, g, sess) # Logs at 20, 30, 40, 50 for _ in range(5): mock_time.return_value += 0.01 self.mon_sess.run(self.train_op) self.hook.end(sess) self.summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_graph=g, expected_summaries={}) self.assertItemsEqual([20, 30, 40, 50], self.summary_writer.summaries.keys()) for step in [20, 30, 40, 50]: summary_value = self.summary_writer.summaries[step][0].value[0] self.assertEqual('global_step/sec', summary_value.tag) self.assertGreater(summary_value.simple_value, 0) @test_util.deprecated_graph_mode_only class SummarySaverHookTest(tf.test.TestCase): def setUp(self): tf.test.TestCase.setUp(self) self.log_dir = 'log/dir' self.summary_writer = fake_summary_writer.FakeSummaryWriter(self.log_dir) var = tf.Variable(0.0) tensor = tf.compat.v1.assign_add(var, 1.0) tensor2 = tensor * 2 self.summary_op = tf.compat.v1.summary.scalar('my_summary', tensor) self.summary_op2 = tf.compat.v1.summary.scalar('my_summary2', tensor2) tf.compat.v1.train.get_or_create_global_step() self.train_op = training_util._increment_global_step(1) def test_raise_when_scaffold_and_summary_op_both_missing(self): with self.assertRaises(ValueError): basic_session_run_hooks.SummarySaverHook() def test_raise_when_scaffold_and_summary_op_both_present(self): with self.assertRaises(ValueError): basic_session_run_hooks.SummarySaverHook( scaffold=tf.compat.v1.train.Scaffold(), summary_op=self.summary_op) def test_raise_in_both_secs_and_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.SummarySaverHook( save_secs=10, save_steps=20, summary_writer=self.summary_writer) def test_raise_in_none_secs_and_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.SummarySaverHook( save_secs=None, save_steps=None, summary_writer=self.summary_writer) def test_save_steps(self): hook = basic_session_run_hooks.SummarySaverHook( save_steps=8, summary_writer=self.summary_writer, summary_op=self.summary_op) with self.cached_session() as sess: hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess = monitored_session._HookedSession(sess, [hook]) for _ in range(30): mon_sess.run(self.train_op) hook.end(sess) self.summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_summaries={ 1: { 'my_summary': 1.0 }, 9: { 'my_summary': 2.0 }, 17: { 'my_summary': 3.0 }, 25: { 'my_summary': 4.0 }, }) def test_multiple_summaries(self): hook = basic_session_run_hooks.SummarySaverHook( save_steps=8, summary_writer=self.summary_writer, summary_op=[self.summary_op, self.summary_op2]) with self.cached_session() as sess: hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess = monitored_session._HookedSession(sess, [hook]) for _ in range(10): mon_sess.run(self.train_op) hook.end(sess) self.summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_summaries={ 1: { 'my_summary': 1.0, 'my_summary2': 2.0 }, 9: { 'my_summary': 2.0, 'my_summary2': 4.0 }, }) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_save_secs_saving_once_every_step(self, mock_time): mock_time.return_value = MOCK_START_TIME hook = basic_session_run_hooks.SummarySaverHook( save_secs=0.5, summary_writer=self.summary_writer, summary_op=self.summary_op) with self.cached_session() as sess: hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess = monitored_session._HookedSession(sess, [hook]) for _ in range(4): mon_sess.run(self.train_op) mock_time.return_value += 0.5 hook.end(sess) self.summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_summaries={ 1: { 'my_summary': 1.0 }, 2: { 'my_summary': 2.0 }, 3: { 'my_summary': 3.0 }, 4: { 'my_summary': 4.0 }, }) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_save_secs_saving_once_every_three_steps(self, mock_time): mock_time.return_value = 1484695987.209386 hook = basic_session_run_hooks.SummarySaverHook( save_secs=9., summary_writer=self.summary_writer, summary_op=self.summary_op) with self.cached_session() as sess: hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess = monitored_session._HookedSession(sess, [hook]) for _ in range(8): mon_sess.run(self.train_op) mock_time.return_value += 3.1 hook.end(sess) # 24.8 seconds passed (3.1*8), it saves every 9 seconds starting from first: self.summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_summaries={ 1: { 'my_summary': 1.0 }, 4: { 'my_summary': 2.0 }, 7: { 'my_summary': 3.0 }, }) @test_util.deprecated_graph_mode_only class GlobalStepWaiterHookTest(tf.test.TestCase): def test_not_wait_for_step_zero(self): with tf.Graph().as_default(): tf.compat.v1.train.get_or_create_global_step() hook = basic_session_run_hooks.GlobalStepWaiterHook(wait_until_step=0) hook.begin() with tf.compat.v1.Session() as sess: # Before run should return without waiting gstep increment. hook.before_run( tf.compat.v1.train.SessionRunContext( original_args=None, session=sess)) @tf.compat.v1.test.mock.patch.object(time, 'sleep') def test_wait_for_step(self, mock_sleep): with tf.Graph().as_default(): gstep = tf.compat.v1.train.get_or_create_global_step() hook = basic_session_run_hooks.GlobalStepWaiterHook(wait_until_step=1000) hook.begin() with tf.compat.v1.Session() as sess: # Mock out calls to time.sleep() to update the global step. class Context(object): counter = 0 def mock_sleep_side_effect(seconds): del seconds # argument is ignored Context.counter += 1 if Context.counter == 1: # The first time sleep() is called, we update the global_step from # 0 to 500. sess.run(tf.compat.v1.assign(gstep, 500)) elif Context.counter == 2: # The second time sleep() is called, we update the global_step from # 500 to 1100. sess.run(tf.compat.v1.assign(gstep, 1100)) else: raise AssertionError( 'Expected before_run() to terminate after the second call to ' 'time.sleep()') mock_sleep.side_effect = mock_sleep_side_effect # Run the mocked-out interaction with the hook. self.evaluate(tf.compat.v1.initializers.global_variables()) run_context = tf.compat.v1.train.SessionRunContext( original_args=None, session=sess) hook.before_run(run_context) self.assertEqual(Context.counter, 2) @test_util.deprecated_graph_mode_only class FinalOpsHookTest(tf.test.TestCase): def test_final_ops_is_scalar_tensor(self): with tf.Graph().as_default(): expected_value = 4 final_ops = tf.constant(expected_value) hook = basic_session_run_hooks.FinalOpsHook(final_ops) hook.begin() with tf.compat.v1.Session() as session: hook.end(session) self.assertEqual(expected_value, hook.final_ops_values) def test_final_ops_is_tensor(self): with tf.Graph().as_default(): expected_values = [1, 6, 3, 5, 2, 4] final_ops = tf.constant(expected_values) hook = basic_session_run_hooks.FinalOpsHook(final_ops) hook.begin() with tf.compat.v1.Session() as session: hook.end(session) self.assertListEqual(expected_values, hook.final_ops_values.tolist()) def test_final_ops_triggers_out_of_range_error(self): with tf.Graph().as_default(): dataset = tf.compat.v1.data.Dataset.range(1) iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) read_ops = iterator.get_next() final_ops = read_ops hook = basic_session_run_hooks.FinalOpsHook(final_ops) hook.begin() with tf.compat.v1.Session() as session: session.run(read_ops) with tf.compat.v1.test.mock.patch.object(tf_logging, 'warning') as mock_log: with self.assertRaisesRegexp(tf.errors.OutOfRangeError, 'End of sequence'): hook.end(session) self.assertRegexpMatches( str(mock_log.call_args), 'dependency back to some input source') def test_final_ops_with_dictionary(self): with tf.Graph().as_default(): expected_values = [4, -3] final_ops = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) final_ops_feed_dict = {final_ops: expected_values} hook = basic_session_run_hooks.FinalOpsHook(final_ops, final_ops_feed_dict) hook.begin() with tf.compat.v1.Session() as session: hook.end(session) self.assertListEqual(expected_values, hook.final_ops_values.tolist()) @test_util.deprecated_graph_mode_only class ResourceSummarySaverHookTest(tf.test.TestCase): def setUp(self): tf.test.TestCase.setUp(self) self.log_dir = 'log/dir' self.summary_writer = fake_summary_writer.FakeSummaryWriter(self.log_dir) var = tf.compat.v1.get_variable('var', initializer=0.0, use_resource=True) tensor = tf.compat.v1.assign_add(var, 1.0) self.summary_op = tf.compat.v1.summary.scalar('my_summary', tensor) with tf.compat.v1.variable_scope('foo', use_resource=True): tf.compat.v1.train.create_global_step() self.train_op = training_util._increment_global_step(1) def test_save_steps(self): hook = basic_session_run_hooks.SummarySaverHook( save_steps=8, summary_writer=self.summary_writer, summary_op=self.summary_op) with self.cached_session() as sess: hook.begin() self.evaluate(tf.compat.v1.initializers.global_variables()) mon_sess = monitored_session._HookedSession(sess, [hook]) for _ in range(30): mon_sess.run(self.train_op) hook.end(sess) self.summary_writer.assert_summaries( test_case=self, expected_logdir=self.log_dir, expected_summaries={ 1: { 'my_summary': 1.0 }, 9: { 'my_summary': 2.0 }, 17: { 'my_summary': 3.0 }, 25: { 'my_summary': 4.0 }, }) @test_util.deprecated_graph_mode_only class FeedFnHookTest(tf.test.TestCase): def test_feeding_placeholder(self): with tf.Graph().as_default(), tf.compat.v1.Session() as sess: x = tf.compat.v1.placeholder(dtype=tf.dtypes.float32) y = x + 1 hook = basic_session_run_hooks.FeedFnHook(feed_fn=lambda: {x: 1.0}) hook.begin() mon_sess = monitored_session._HookedSession(sess, [hook]) self.assertEqual(mon_sess.run(y), 2) @test_util.deprecated_graph_mode_only class ProfilerHookTest(tf.test.TestCase): def setUp(self): super(ProfilerHookTest, self).setUp() self.output_dir = tempfile.mkdtemp() self.graph = tf.Graph() self.filepattern = os.path.join(self.output_dir, 'timeline-*.json') with self.graph.as_default(): self.global_step = tf.compat.v1.train.get_or_create_global_step() self.train_op = tf.compat.v1.assign_add(self.global_step, 1) def tearDown(self): super(ProfilerHookTest, self).tearDown() shutil.rmtree(self.output_dir, ignore_errors=True) def _count_timeline_files(self): return len(tf.compat.v1.gfile.Glob(self.filepattern)) def test_raise_in_both_secs_and_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.ProfilerHook(save_secs=10, save_steps=20) def test_raise_in_none_secs_and_steps(self): with self.assertRaises(ValueError): basic_session_run_hooks.ProfilerHook(save_secs=None, save_steps=None) def test_save_secs_does_not_save_in_first_step(self): with self.graph.as_default(): hook = basic_session_run_hooks.ProfilerHook( save_secs=2, output_dir=self.output_dir) with tf.compat.v1.train.SingularMonitoredSession(hooks=[hook]) as sess: sess.run(self.train_op) self.assertEqual(0, self._count_timeline_files()) @tf.compat.v1.test.mock.patch.object(time, 'time') def test_save_secs_saves_periodically(self, mock_time): # Pick a fixed start time. with self.graph.as_default(): mock_time.return_value = MOCK_START_TIME hook = basic_session_run_hooks.ProfilerHook( save_secs=2, output_dir=self.output_dir) with tf.compat.v1.train.SingularMonitoredSession(hooks=[hook]) as sess: sess.run(self.train_op) # Not saved. self.assertEqual(0, self._count_timeline_files()) # Simulate 2.5 seconds of sleep. mock_time.return_value = MOCK_START_TIME + 2.5 sess.run(self.train_op) # Saved. self.assertEqual(1, self._count_timeline_files()) # Pretend some small amount of time has passed. mock_time.return_value = MOCK_START_TIME + 2.6 sess.run(self.train_op) # Not saved. # Edge test just before we should save the timeline. mock_time.return_value = MOCK_START_TIME + 4.4 sess.run(self.train_op) # Not saved. self.assertEqual(1, self._count_timeline_files()) mock_time.return_value = MOCK_START_TIME + 4.5 sess.run(self.train_op) # Saved. self.assertEqual(2, self._count_timeline_files()) def test_save_steps_does_not_save_in_first_step(self): with self.graph.as_default(): hook = basic_session_run_hooks.ProfilerHook( save_steps=1, output_dir=self.output_dir) with tf.compat.v1.train.SingularMonitoredSession(hooks=[hook]) as sess: sess.run(self.train_op) # Not saved. self.assertEqual(0, self._count_timeline_files()) def test_save_steps_saves_periodically(self): with self.graph.as_default(): hook = basic_session_run_hooks.ProfilerHook( save_steps=2, output_dir=self.output_dir) with tf.compat.v1.train.SingularMonitoredSession(hooks=[hook]) as sess: self.assertEqual(0, self._count_timeline_files()) sess.run(self.train_op) # Not saved. self.assertEqual(0, self._count_timeline_files()) sess.run(self.train_op) # Saved. self.assertEqual(1, self._count_timeline_files()) sess.run(self.train_op) # Not saved. self.assertEqual(1, self._count_timeline_files()) sess.run(self.train_op) # Saved. self.assertEqual(2, self._count_timeline_files()) sess.run(self.train_op) # Not saved. self.assertEqual(2, self._count_timeline_files()) def test_run_metadata_saves(self): tf.compat.v1.summary.FileWriterCache.clear() fake_summary_writer.FakeSummaryWriter.install() fake_writer = tf.compat.v1.summary.FileWriterCache.get(self.output_dir) with self.graph.as_default(): hook = basic_session_run_hooks.ProfilerHook( save_steps=1, output_dir=self.output_dir) with tf.compat.v1.train.SingularMonitoredSession(hooks=[hook]) as sess: sess.run(self.train_op) # Not saved. sess.run(self.train_op) # Saved. self.assertEqual( list(fake_writer._added_run_metadata.keys()), ['step_2']) fake_summary_writer.FakeSummaryWriter.uninstall() if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/hooks/fake_summary_writer.py ================================================ # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Fake summary writer for unit tests.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.core.framework import summary_pb2 from tensorflow.python.framework import test_util from tensorflow.python.summary.writer import writer from tensorflow.python.summary.writer import writer_cache # TODO(ptucker): Replace with mock framework. class FakeSummaryWriter(object): """Fake summary writer.""" _replaced_summary_writer = None @classmethod def install(cls): if cls._replaced_summary_writer: raise ValueError('FakeSummaryWriter already installed.') cls._replaced_summary_writer = writer.FileWriter writer.FileWriter = FakeSummaryWriter writer_cache.FileWriter = FakeSummaryWriter @classmethod def uninstall(cls): if not cls._replaced_summary_writer: raise ValueError('FakeSummaryWriter not installed.') writer.FileWriter = cls._replaced_summary_writer writer_cache.FileWriter = cls._replaced_summary_writer cls._replaced_summary_writer = None def __init__(self, logdir, graph=None): self._logdir = logdir self._graph = graph self._summaries = {} self._added_graphs = [] self._added_meta_graphs = [] self._added_session_logs = [] self._added_run_metadata = {} @property def summaries(self): return self._summaries def assert_summaries(self, test_case, expected_logdir=None, expected_graph=None, expected_summaries=None, expected_added_graphs=None, expected_added_meta_graphs=None, expected_session_logs=None): """Assert expected items have been added to summary writer.""" if expected_logdir is not None: test_case.assertEqual(expected_logdir, self._logdir) if expected_graph is not None: test_case.assertTrue(expected_graph is self._graph) expected_summaries = expected_summaries or {} for step in expected_summaries: test_case.assertTrue( step in self._summaries, msg='Missing step %s from %s.' % (step, self._summaries.keys())) actual_simple_values = {} for step_summary in self._summaries[step]: for v in step_summary.value: # Ignore global_step/sec since it's written by Supervisor in a # separate thread, so it's non-deterministic how many get written. if 'global_step/sec' != v.tag: actual_simple_values[v.tag] = v.simple_value test_case.assertEqual(expected_summaries[step], actual_simple_values) if expected_added_graphs is not None: test_case.assertEqual(expected_added_graphs, self._added_graphs) if expected_added_meta_graphs is not None: test_case.assertEqual( len(expected_added_meta_graphs), len(self._added_meta_graphs)) for expected, actual in zip(expected_added_meta_graphs, self._added_meta_graphs): test_util.assert_meta_graph_protos_equal(test_case, expected, actual) if expected_session_logs is not None: test_case.assertEqual(expected_session_logs, self._added_session_logs) def add_summary(self, summ, current_global_step): """Add summary.""" if isinstance(summ, bytes): summary_proto = summary_pb2.Summary() summary_proto.ParseFromString(summ) summ = summary_proto if current_global_step in self._summaries: step_summaries = self._summaries[current_global_step] else: step_summaries = [] self._summaries[current_global_step] = step_summaries step_summaries.append(summ) # NOTE: Ignore global_step since its value is non-deterministic. def add_graph(self, graph, global_step=None, graph_def=None): """Add graph.""" if (global_step is not None) and (global_step < 0): raise ValueError('Invalid global_step %s.' % global_step) if graph_def is not None: raise ValueError('Unexpected graph_def %s.' % graph_def) self._added_graphs.append(graph) def add_meta_graph(self, meta_graph_def, global_step=None): """Add metagraph.""" if (global_step is not None) and (global_step < 0): raise ValueError('Invalid global_step %s.' % global_step) self._added_meta_graphs.append(meta_graph_def) # NOTE: Ignore global_step since its value is non-deterministic. def add_session_log(self, session_log, global_step=None): # pylint: disable=unused-argument self._added_session_logs.append(session_log) def add_run_metadata(self, run_metadata, tag, global_step=None): if (global_step is not None) and (global_step < 0): raise ValueError('Invalid global_step %s.' % global_step) self._added_run_metadata[tag] = run_metadata def flush(self): pass def reopen(self): pass def close(self): pass ================================================ FILE: tensorflow_estimator/python/estimator/hooks/hooks.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Some useful session run hooks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import time import tensorflow as tf from tensorflow.python.training import training_util from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator.estimator_export import estimator_export # pylint: disable=protected-access @estimator_export('estimator.experimental.InMemoryEvaluatorHook') class InMemoryEvaluatorHook(tf.compat.v1.train.SessionRunHook): """Hook to run evaluation in training without a checkpoint. Example: ```python def train_input_fn(): ... return train_dataset def eval_input_fn(): ... return eval_dataset estimator = tf.estimator.DNNClassifier(...) evaluator = tf.estimator.experimental.InMemoryEvaluatorHook( estimator, eval_input_fn) estimator.train(train_input_fn, hooks=[evaluator]) ``` Current limitations of this approach are: * It doesn't support multi-node distributed mode. * It doesn't support saveable objects other than variables (such as boosted tree support) * It doesn't support custom saver logic (such as ExponentialMovingAverage support) """ def __init__(self, estimator, input_fn, steps=None, hooks=None, name=None, every_n_iter=100): """Initializes a `InMemoryEvaluatorHook`. Args: estimator: A `tf.estimator.Estimator` instance to call evaluate. input_fn: Equivalent to the `input_fn` arg to `estimator.evaluate`. A function that constructs the input data for evaluation. See [Creating input functions]( https://tensorflow.org/guide/premade_estimators#create_input_functions) for more information. The function should construct and return one of the following: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. * A tuple (features, labels): Where `features` is a `Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. steps: Equivalent to the `steps` arg to `estimator.evaluate`. Number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. hooks: Equivalent to the `hooks` arg to `estimator.evaluate`. List of `SessionRunHook` subclass instances. Used for callbacks inside the evaluation call. name: Equivalent to the `name` arg to `estimator.evaluate`. Name of the evaluation if user needs to run multiple evaluations on different data sets, such as on training data vs test data. Metrics for different evaluations are saved in separate folders, and appear separately in tensorboard. every_n_iter: `int`, runs the evaluator once every N training iteration. Raises: ValueError: if `every_n_iter` is non-positive or it's not a single machine training """ if every_n_iter is None or every_n_iter <= 0: raise ValueError('invalid every_n_iter=%s.' % every_n_iter) if (estimator.config.num_ps_replicas > 0 or estimator.config.num_worker_replicas > 1): raise ValueError( 'InMemoryEvaluator supports only single machine (aka Local) setting.') self._estimator = estimator self._input_fn = input_fn self._steps = steps self._name = name self._every_n_iter = every_n_iter self._eval_dir = os.path.join(self._estimator.model_dir, 'eval' if not name else 'eval_' + name) self._graph = None self._hooks = estimator_lib._check_hooks_type(hooks) self._hooks.extend(self._estimator._convert_eval_steps_to_hooks(steps)) self._timer = tf.compat.v1.train.SecondOrStepTimer(every_steps=every_n_iter) def begin(self): """Build eval graph and restoring op.""" self._timer.reset() self._iter_count = 0 self._graph = tf.Graph() with self._graph.as_default(): (self._scaffold, self._update_op, self._eval_dict, self._all_hooks) = self._estimator._evaluate_build_graph( self._input_fn, self._hooks, checkpoint_path=None) if self._scaffold.saver is not None: raise ValueError('InMemoryEvaluator does not support custom saver') if self._scaffold.init_fn is not None: raise ValueError('InMemoryEvaluator does not support custom init_fn') self._var_name_to_eval_var = { v.name: v for v in tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) } self._var_name_to_placeholder = { v.name: tf.compat.v1.placeholder(v.dtype) for v in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) } def after_create_session(self, session, coord): # pylint: disable=unused-argument """Does first run which shows the eval metrics before training.""" if tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.SAVEABLE_OBJECTS): raise ValueError( 'InMemoryEvaluator does not support saveables other than global ' 'variables.') self._var_name_to_train_var = { v.name: v for v in tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) } var_names_to_transfer = set(self._var_name_to_placeholder.keys()) & set( self._var_name_to_train_var.keys()) # Filter training var names that are not exist in evaluation self._var_name_to_train_var = { v_name: self._var_name_to_train_var[v_name] for v_name in var_names_to_transfer } # Filter eval var names that are not exist in training self._var_name_to_eval_var = { v_name: self._var_name_to_eval_var[v_name] for v_name in var_names_to_transfer } with self._graph.as_default(): self._var_feed_op = tf.group([ tf.compat.v1.assign(self._var_name_to_eval_var[v_name], self._var_name_to_placeholder[v_name]) for v_name in var_names_to_transfer ]) self._evaluate(session) def _evaluate(self, train_session): var_name_to_value = train_session.run(self._var_name_to_train_var) placeholder_to_value = { self._var_name_to_placeholder[v_name]: var_name_to_value[v_name] for v_name in var_name_to_value } def feed_variables(scaffold, session): del scaffold session.run(self._var_feed_op, feed_dict=placeholder_to_value) scaffold = tf.compat.v1.train.Scaffold( init_fn=feed_variables, copy_from_scaffold=self._scaffold) with self._graph.as_default(): self._estimator._evaluate_run( checkpoint_path=None, scaffold=scaffold, update_op=self._update_op, eval_dict=self._eval_dict, all_hooks=self._all_hooks, output_dir=self._eval_dir) self._timer.update_last_triggered_step(self._iter_count) def after_run(self, run_context, run_values): # pylint: disable=unused-argument """Runs evaluator.""" self._iter_count += 1 if self._timer.should_trigger_for_step(self._iter_count): self._evaluate(run_context.session) def end(self, session): # pylint: disable=unused-argument """Runs evaluator for final model.""" self._evaluate(session) class _StopAtCheckpointStepHook(tf.compat.v1.train.SessionRunHook): """Hook that requests stop at a specified step based on checkpoint. Note: We recommend using 'make_stop_at_checkpoint_step_hook` to get the proper hook. """ def __init__(self, model_dir, last_step, wait_after_file_check_secs=30): """Initializes a `StopAtCheckpointStepHook`. This hook requests stop after a last step has been reached. It checks latest checkpoint to verify last step is written on disk or not. Args: model_dir: Directory to read global step from latest checkpoint. last_step: Step after which to stop. wait_after_file_check_secs: Reading same file by many workers may create I/O issues. To throttle that we will wait given secs after each read of the file. Raises: ValueError: If one of the arguments is invalid. """ if last_step is None: raise ValueError('last_step must be specified.') if model_dir is None: raise ValueError('model_dir must be specified.') self._model_dir = model_dir self._last_step = last_step self._wait_after_file_check_secs = wait_after_file_check_secs def begin(self): self._global_step_tensor = training_util._get_or_create_global_step_read() # pylint: disable=protected-access if self._global_step_tensor is None: raise RuntimeError( 'Global step should be created to use StopAtCheckpointStepHook.') def before_run(self, run_context): # pylint: disable=unused-argument return tf.compat.v1.train.SessionRunArgs(self._global_step_tensor) def after_run(self, run_context, run_values): global_step = run_values.results + 1 if global_step >= self._last_step: # Check latest global step in the checkpoint to ensure that the targeted # last step is written on disk. step = estimator_lib._load_global_step_from_checkpoint_dir( self._model_dir) if step >= self._last_step: run_context.request_stop() else: time.sleep(self._wait_after_file_check_secs) @estimator_export('estimator.experimental.make_stop_at_checkpoint_step_hook') def make_stop_at_checkpoint_step_hook(estimator, last_step, wait_after_file_check_secs=30): """Creates a proper StopAtCheckpointStepHook based on chief status.""" if estimator.config.is_chief: return tf.compat.v1.train.StopAtStepHook(last_step=last_step) return _StopAtCheckpointStepHook( model_dir=estimator.model_dir, last_step=last_step, wait_after_file_check_secs=wait_after_file_check_secs) # pylint: enable=protected-access ================================================ FILE: tensorflow_estimator/python/estimator/hooks/hooks_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for hooks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import glob import json import os import tempfile import time import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import estimator_lib from tensorflow_estimator.python.estimator import run_config as run_config_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.hooks import hooks as hooks_lib def summary_step_keyword_to_value_mapping(dir_): tf.compat.v1.summary.FileWriterCache.clear() # Get last Event written. event_paths = glob.glob(os.path.join(dir_, 'events*')) step_keyword_to_value = {} for last_event in tf.compat.v1.train.summary_iterator(event_paths[-1]): if last_event.step not in step_keyword_to_value: step_keyword_to_value[last_event.step] = {} if last_event.summary is not None: for value in last_event.summary.value: step_keyword_to_value[last_event.step][value.tag] = value.simple_value return step_keyword_to_value def get_summary_value(dir_, step, keyword): """Get summary value for given step and keyword.""" tf.compat.v1.summary.FileWriterCache.clear() # Get last Event written. event_paths = glob.glob(os.path.join(dir_, 'events*')) print('XXX', event_paths) for last_event in tf.compat.v1.train.summary_iterator(event_paths[-1]): if last_event.step == step and last_event.summary is not None: for value in last_event.summary.value: if keyword in value.tag: return value.simple_value return None @test_util.deprecated_graph_mode_only class InMemoryEvaluatorHookTest(tf.test.TestCase): def test_runs_eval_metrics(self): def model_fn(features, labels, mode): _ = labels if estimator_lib.ModeKeys.TRAIN == mode: with tf.control_dependencies([features]): train_op = tf.compat.v1.assign_add( tf.compat.v1.train.get_global_step(), 1) return estimator_lib.EstimatorSpec( mode, loss=tf.constant(3.), train_op=train_op) if estimator_lib.ModeKeys.EVAL == mode: mean = tf_keras.metrics.Mean() mean.update_state(features) return estimator_lib.EstimatorSpec( mode, loss=tf.constant(5.), eval_metric_ops={ 'mean_of_features': mean, }) estimator = estimator_lib.Estimator(model_fn=model_fn) def input_fn(): return tf.compat.v1.data.Dataset.range(10) evaluator = hooks_lib.InMemoryEvaluatorHook( estimator, input_fn, every_n_iter=4) estimator.train(input_fn, hooks=[evaluator]) self.assertTrue(os.path.isdir(estimator.eval_dir())) step_keyword_to_value = summary_step_keyword_to_value_mapping( estimator.eval_dir()) # 4.5 = sum(range(10))/10 # before training self.assertEqual(4.5, step_keyword_to_value[0]['mean_of_features']) # intervals (every_n_iter=4) self.assertEqual(4.5, step_keyword_to_value[4]['mean_of_features']) self.assertEqual(4.5, step_keyword_to_value[8]['mean_of_features']) # end self.assertEqual(4.5, step_keyword_to_value[10]['mean_of_features']) self.assertEqual(set([0, 4, 8, 10]), set(step_keyword_to_value.keys())) def test_uses_latest_variable_value(self): def model_fn(features, labels, mode): _ = labels step = tf.compat.v1.train.get_global_step() w = tf.compat.v1.get_variable( 'w', shape=[], initializer=tf.compat.v1.initializers.zeros(), dtype=tf.dtypes.int64) if estimator_lib.ModeKeys.TRAIN == mode: # to consume features, we have control dependency with tf.control_dependencies([features]): step_inc = tf.compat.v1.assign_add( tf.compat.v1.train.get_global_step(), 1) with tf.control_dependencies([step_inc]): assign_w_to_step_plus_2 = w.assign(step + 2) return estimator_lib.EstimatorSpec( mode, loss=tf.constant(3.), train_op=assign_w_to_step_plus_2) if estimator_lib.ModeKeys.EVAL == mode: # to consume features, we have control dependency with tf.control_dependencies([features]): loss = tf.constant(5.) mean = tf_keras.metrics.Mean() mean.update_state(w) return estimator_lib.EstimatorSpec( mode, loss=loss, # w is constant in each step, so the mean. # w = 0 if step==0 else step+2 eval_metric_ops={'mean_of_const': mean}) estimator = estimator_lib.Estimator(model_fn=model_fn) def input_fn(): return tf.compat.v1.data.Dataset.range(10) evaluator = hooks_lib.InMemoryEvaluatorHook( estimator, input_fn, every_n_iter=4) estimator.train(input_fn, hooks=[evaluator]) self.assertTrue(os.path.isdir(estimator.eval_dir())) step_keyword_to_value = summary_step_keyword_to_value_mapping( estimator.eval_dir()) # w = 0 if step==0 else step+2 self.assertEqual(0, step_keyword_to_value[0]['mean_of_const']) self.assertEqual(6, step_keyword_to_value[4]['mean_of_const']) self.assertEqual(12, step_keyword_to_value[10]['mean_of_const']) def test_dnn_classifier(self): embedding = tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_vocabulary_list( 'wire_cast', ['kima', 'omar', 'stringer']), 8) dnn = estimator_lib.DNNClassifier( feature_columns=[embedding], hidden_units=[3, 1]) def train_input_fn(): return tf.compat.v1.data.Dataset.from_tensors(({ 'wire_cast': [['omar'], ['kima']] }, [[0], [1]])).repeat(3) def eval_input_fn(): return tf.compat.v1.data.Dataset.from_tensors(({ 'wire_cast': [['stringer'], ['kima']] }, [[0], [1]])).repeat(2) evaluator = hooks_lib.InMemoryEvaluatorHook( dnn, eval_input_fn, name='in-memory') dnn.train(train_input_fn, hooks=[evaluator]) self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory'))) step_keyword_to_value = summary_step_keyword_to_value_mapping( dnn.eval_dir('in-memory')) final_metrics = dnn.evaluate(eval_input_fn) step = final_metrics[tf.compat.v1.GraphKeys.GLOBAL_STEP] for summary_tag in final_metrics: if summary_tag == tf.compat.v1.GraphKeys.GLOBAL_STEP: continue self.assertEqual(final_metrics[summary_tag], step_keyword_to_value[step][summary_tag]) def test_raise_error_with_multi_worker(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } with tf.compat.v1.test.mock.patch.dict( 'os.environ', {'TF_CONFIG': json.dumps(tf_config)}): dnn = estimator_lib.DNNClassifier( feature_columns=[tf.feature_column.numeric_column('x')], hidden_units=[3, 1]) def eval_input_fn(): pass with self.assertRaisesRegexp(ValueError, 'supports only single machine'): hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn) def test_raise_error_with_ps(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1'], }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } with tf.compat.v1.test.mock.patch.dict( 'os.environ', {'TF_CONFIG': json.dumps(tf_config)}): dnn = estimator_lib.DNNClassifier( feature_columns=[tf.feature_column.numeric_column('x')], hidden_units=[3, 1]) def eval_input_fn(): pass with self.assertRaisesRegexp(ValueError, 'supports only single machine'): hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn) def test_raise_error_with_custom_saver_in_eval(self): def model_fn(features, labels, mode): _, _ = features, labels mean = tf_keras.metrics.Mean() mean.update_state(tf.constant(2.)) return estimator_lib.EstimatorSpec( mode, loss=tf.constant(3.), scaffold=tf.compat.v1.train.Scaffold( saver=tf.compat.v1.train.Saver()), train_op=tf.constant(5.), eval_metric_ops={ 'mean_of_features': mean, }) estimator = estimator_lib.Estimator(model_fn=model_fn) def input_fn(): return tf.compat.v1.data.Dataset.range(10) evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn) with self.assertRaisesRegexp(ValueError, 'does not support custom saver'): evaluator.begin() def test_raise_error_with_custom_init_fn_in_eval(self): def model_fn(features, labels, mode): _, _ = features, labels def init_fn(scaffold, session): _, _ = scaffold, session mean = tf_keras.metrics.Mean() mean.update_state(tf.constant(2.)) return estimator_lib.EstimatorSpec( mode, loss=tf.constant(3.), scaffold=tf.compat.v1.train.Scaffold(init_fn=init_fn), train_op=tf.constant(5.), eval_metric_ops={ 'mean_of_features': mean, }) estimator = estimator_lib.Estimator(model_fn=model_fn) def input_fn(): return tf.compat.v1.data.Dataset.range(10) evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn) with self.assertRaisesRegexp(ValueError, 'does not support custom init_fn'): evaluator.begin() def test_raise_error_with_saveables_other_than_global_variables(self): def model_fn(features, labels, mode): _, _ = features, labels w = tf.compat.v1.Variable( initial_value=[0.], trainable=False, collections=[tf.compat.v1.GraphKeys.SAVEABLE_OBJECTS]) init_op = tf.group( [w.initializer, tf.compat.v1.train.get_global_step().initializer]) mean = tf_keras.metrics.Mean() mean.update_state(tf.constant(2.)) return estimator_lib.EstimatorSpec( mode, loss=tf.constant(3.), scaffold=tf.compat.v1.train.Scaffold(init_op=init_op), train_op=tf.constant(5.), eval_metric_ops={ 'mean_of_features': mean, }) estimator = estimator_lib.Estimator(model_fn=model_fn) def input_fn(): return tf.compat.v1.data.Dataset.range(10) evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn) with self.assertRaisesRegexp(ValueError, 'does not support saveables'): estimator.train(input_fn, hooks=[evaluator]) @test_util.deprecated_graph_mode_only class StopAtCheckpointStepHookTest(tf.test.TestCase): def test_do_not_stop_if_checkpoint_is_not_there(self): with tf.Graph().as_default(): step = tf.compat.v1.train.create_global_step() assign_ten = step.assign(10) no_op = tf.no_op() hook = hooks_lib._StopAtCheckpointStepHook( model_dir=tempfile.mkdtemp(), last_step=10) with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook]) as mon_sess: mon_sess.raw_session().run(assign_ten) with tf.compat.v1.test.mock.patch.object(time, 'sleep') as mock_sleep: mon_sess.run(no_op) self.assertTrue(mock_sleep.called) self.assertFalse(mon_sess.should_stop()) def test_do_not_stop_if_checkpoint_step_is_smaller(self): model_dir = tempfile.mkdtemp() with tf.Graph().as_default(): step = tf.compat.v1.train.create_global_step() assign_nine = step.assign(9) assign_ten = step.assign(10) no_op = tf.no_op() hook = hooks_lib._StopAtCheckpointStepHook( model_dir=model_dir, last_step=10) with tf.compat.v1.Session() as sess: sess.run(assign_nine) tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook]) as mon_sess: mon_sess.raw_session().run(assign_ten) with tf.compat.v1.test.mock.patch.object(time, 'sleep') as mock_sleep: mon_sess.run(no_op) self.assertTrue(mock_sleep.called) self.assertFalse(mon_sess.should_stop()) def test_stop_if_checkpoint_step_is_laststep(self): model_dir = tempfile.mkdtemp() with tf.Graph().as_default(): step = tf.compat.v1.train.create_global_step() assign_ten = step.assign(10) no_op = tf.no_op() hook = hooks_lib._StopAtCheckpointStepHook( model_dir=model_dir, last_step=10) with tf.compat.v1.Session() as sess: sess.run(assign_ten) tf.compat.v1.train.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) with tf.compat.v1.train.SingularMonitoredSession( hooks=[hook]) as mon_sess: mon_sess.raw_session().run(assign_ten) with tf.compat.v1.test.mock.patch.object(time, 'sleep') as mock_sleep: mon_sess.run(no_op) self.assertFalse(mock_sleep.called) self.assertTrue(mon_sess.should_stop()) def test_creates_regular_stop_at_step_hook_for_chief(self): # by default an estimator is in chief mode dnn = estimator_lib.DNNClassifier( feature_columns=[tf.feature_column.numeric_column('x')], hidden_units=[3, 1]) hook = hooks_lib.make_stop_at_checkpoint_step_hook(dnn, 300) self.assertIsInstance(hook, tf.compat.v1.train.StopAtStepHook) self.assertEqual(300, hook._last_step) def test_creates_checkpoint_hook_for_workers(self): class FakeWorkerConfig(estimator_lib.RunConfig): @property def is_chief(self): return False dnn = estimator_lib.DNNClassifier( feature_columns=[tf.feature_column.numeric_column('x')], hidden_units=[3, 1], config=FakeWorkerConfig()) hook = hooks_lib.make_stop_at_checkpoint_step_hook(dnn, 300) self.assertIsInstance(hook, hooks_lib._StopAtCheckpointStepHook) self.assertEqual(300, hook._last_step) self.assertEqual(dnn.model_dir, hook._model_dir) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/hooks/session_run_hook.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """A SessionRunHook extends `session.run()` calls for the `MonitoredSession`. SessionRunHooks are useful to track training, report progress, request early stopping and more. SessionRunHooks use the observer pattern and notify at the following points: - when a session starts being used - before a call to the `session.run()` - after a call to the `session.run()` - when the session closed A SessionRunHook encapsulates a piece of reusable/composable computation that can piggyback a call to `MonitoredSession.run()`. A hook can add any ops-or-tensor/feeds to the run call, and when the run call finishes with success gets the outputs it requested. Hooks are allowed to add ops to the graph in `hook.begin()`. The graph is finalized after the `begin()` method is called. There are a few pre-defined hooks: - StopAtStepHook: Request stop based on global_step - CheckpointSaverHook: saves checkpoint - LoggingTensorHook: outputs one or more tensor values to log - NanTensorHook: Request stop if given `Tensor` contains Nans. - SummarySaverHook: saves summaries to a summary writer For more specific needs, you can create custom hooks: class ExampleHook(SessionRunHook): def begin(self): # You can add ops to the graph here. print('Starting the session.') self.your_tensor = ... def after_create_session(self, session, coord): # When this is called, the graph is finalized and # ops can no longer be added to the graph. print('Session created.') def before_run(self, run_context): print('Before calling session.run().') return SessionRunArgs(self.your_tensor) def after_run(self, run_context, run_values): print('Done running one step. The value of my tensor: %s', run_values.results) if you-need-to-stop-loop: run_context.request_stop() def end(self, session): print('Done with the session.') To understand how hooks interact with calls to `MonitoredSession.run()`, look at following code: with MonitoredTrainingSession(hooks=your_hooks, ...) as sess: while not sess.should_stop(): sess.run(your_fetches) Above user code leads to following execution: call hooks.begin() sess = tf.Session() call hooks.after_create_session() while not stop is requested: call hooks.before_run() try: results = sess.run(merged_fetches, feed_dict=merged_feeds) except (errors.OutOfRangeError, StopIteration): break call hooks.after_run() call hooks.end() sess.close() Note that if sess.run() raises OutOfRangeError or StopIteration then hooks.after_run() will not be called but hooks.end() will still be called. If sess.run() raises any other exception then neither hooks.after_run() nor hooks.end() will be called. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.training.session_run_hook import SessionRunArgs from tensorflow.python.training.session_run_hook import SessionRunContext from tensorflow.python.training.session_run_hook import SessionRunHook from tensorflow.python.training.session_run_hook import SessionRunValues from tensorflow_estimator.python.estimator.estimator_export import estimator_export estimator_export("estimator.SessionRunHook")(SessionRunHook) estimator_export("estimator.SessionRunArgs")(SessionRunArgs) estimator_export("estimator.SessionRunContext")(SessionRunContext) estimator_export("estimator.SessionRunValues")(SessionRunValues) ================================================ FILE: tensorflow_estimator/python/estimator/inputs/__init__.py ================================================ ================================================ FILE: tensorflow_estimator/python/estimator/inputs/inputs.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utility methods to create simple input_fns.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow_estimator.python.estimator.inputs.numpy_io import numpy_input_fn from tensorflow_estimator.python.estimator.inputs.pandas_io import pandas_input_fn # pylint: enable=unused-import,line-too-long ================================================ FILE: tensorflow_estimator/python/estimator/inputs/numpy_io.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Methods to allow dict of numpy arrays.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import numpy as np from six import string_types from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.inputs.queues import feeding_functions # Key name to pack the target into dict of `features`. See # `_get_unique_target_key` for details. _TARGET_KEY = '__target_key__' def _get_unique_target_key(features): """Returns a key not existed in the input dict `features`. Caller of `input_fn` usually provides `features` (dict of numpy arrays) and `target`, but the underlying feeding module expects a single dict of numpy arrays as input. So, the `target` needs to be packed into the `features` temporarily and unpacked after calling the feeding function. Toward this goal, this function returns a key not existed in the `features` to pack the `target`. Args: features: OrderedDict of numpy arrays Returns: A unique key that can be used to insert the subsequent target into features dict. """ target_key = _TARGET_KEY while target_key in features: target_key += '_n' return target_key def _validate_and_convert_features(x): """Type check input data and make a shadow copy as an ordered dict. Args: x: numpy array object or dict of numpy array objects. If an array, the array will be treated as a single feature. Returns: OrderedDict copy of x. Raises: ValueError: if x is empty TypeError: if x is an unknown type. """ if isinstance(x, dict): if not x: raise ValueError('x cannot be an empty dict') # Make a shadow copy and also ensure the order of iteration is consistent. ordered_dict_data = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) elif isinstance(x, np.ndarray): if x.size == 0: raise ValueError('x cannot be an empty array') # Make a shadow copy and convert to dict to align with dict processing. ordered_dict_data = collections.OrderedDict({'__direct_np_input__': x}) else: x_type = type(x).__name__ raise TypeError('x must be a dict or array; got {}'.format(x_type)) return ordered_dict_data @estimator_export(v1=['estimator.inputs.numpy_input_fn']) def numpy_input_fn(x, y=None, batch_size=128, num_epochs=1, shuffle=None, queue_capacity=1000, num_threads=1): """Returns input function that would feed dict of numpy arrays into the model. This returns a function outputting `features` and `targets` based on the dict of numpy arrays. The dict `features` has the same keys as the `x`. The dict `targets` has the same keys as the `y` if `y` is a dict. Example: ```python age = np.arange(4) * 1.0 height = np.arange(32, 36) x = {'age': age, 'height': height} y = np.arange(-32, -28) with tf.Session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) ``` Args: x: numpy array object or dict of numpy array objects. If an array, the array will be treated as a single feature. y: numpy array object or dict of numpy array object. `None` if absent. batch_size: Integer, size of batches to return. num_epochs: Integer, number of epochs to iterate over data. If `None` will run forever. shuffle: Boolean, if True shuffles the queue. Avoid shuffle at prediction time. queue_capacity: Integer, size of queue to accumulate. num_threads: Integer, number of threads used for reading and enqueueing. In order to have predicted and repeatable order of reading and enqueueing, such as in prediction and evaluation mode, `num_threads` should be 1. Returns: Function, that has signature of ()->(dict of `features`, `targets`) Raises: ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e., values in `x` have same shape). ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict. ValueError: if x or y is an empty dict. TypeError: `x` is not a dict or array. ValueError: if 'shuffle' is not provided or a bool. """ if not isinstance(shuffle, bool): raise ValueError('shuffle must be provided and explicitly set as boolean ' '(it is recommended to set it as True for training); ' 'got {}'.format(shuffle)) def input_fn(): """Numpy input function.""" # Note that `x` should not be used after conversion to ordered_dict_data, # as type could be either dict or array. ordered_dict_data = _validate_and_convert_features(x) # Deep copy keys which is a view in python 3 feature_keys = list(ordered_dict_data.keys()) if y is None: target_keys = None elif isinstance(y, dict): if not y: raise ValueError('y cannot be empty dict, use None instead.') ordered_dict_y = collections.OrderedDict( sorted(y.items(), key=lambda t: t[0])) target_keys = list(ordered_dict_y.keys()) duplicate_keys = set(feature_keys).intersection(set(target_keys)) if duplicate_keys: raise ValueError('{} duplicate keys are found in both x and y: ' '{}'.format(len(duplicate_keys), duplicate_keys)) ordered_dict_data.update(ordered_dict_y) else: target_keys = _get_unique_target_key(ordered_dict_data) ordered_dict_data[target_keys] = y if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys} if target_keys is None: shape_of_y = None elif isinstance(target_keys, string_types): shape_of_y = y.shape else: shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys} raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' 'Shapes in x: {}\n' 'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y)) queue = feeding_functions._enqueue_data( # pylint: disable=protected-access ordered_dict_data, queue_capacity, shuffle=shuffle, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) batch = ( queue.dequeue_many(batch_size) if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `batch`, which is the row number. if batch: batch.pop(0) if isinstance(x, np.ndarray): # Return as the same type as original array. features = batch[0] else: # Return as the original dict type features = dict(zip(feature_keys, batch[:len(feature_keys)])) if target_keys is None: # TODO(martinwicke), return consistent result return features elif isinstance(target_keys, string_types): target = batch[-1] return features, target else: target = dict(zip(target_keys, batch[-len(target_keys):])) return features, target return input_fn ================================================ FILE: tensorflow_estimator/python/estimator/inputs/numpy_io_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for numpy_io.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow.python.feature_column.feature_column import _LinearModel from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.inputs import numpy_io @test_util.run_v1_only('Tests v1 only symbols') class NumpyIoTest(tf.test.TestCase): def testNumpyInputFn(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = np.arange(-32, -28) with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [0, 1]) self.assertAllEqual(res[0]['b'], [32, 33]) self.assertAllEqual(res[1], [-32, -31]) session.run([features, target]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithVeryLargeBatchSizeAndMultipleEpochs(self): a = np.arange(2) * 1.0 b = np.arange(32, 34) x = {'a': a, 'b': b} y = np.arange(-32, -30) with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=128, shuffle=False, num_epochs=2) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [0, 1, 0, 1]) self.assertAllEqual(res[0]['b'], [32, 33, 32, 33]) self.assertAllEqual(res[1], [-32, -31, -32, -31]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithZeroEpochs(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = np.arange(-32, -28) with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=0) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithBatchSizeNotDividedByDataSize(self): batch_size = 2 a = np.arange(5) * 1.0 b = np.arange(32, 37) x = {'a': a, 'b': b} y = np.arange(-32, -27) with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=batch_size, shuffle=False, num_epochs=1) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [0, 1]) self.assertAllEqual(res[0]['b'], [32, 33]) self.assertAllEqual(res[1], [-32, -31]) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [2, 3]) self.assertAllEqual(res[0]['b'], [34, 35]) self.assertAllEqual(res[1], [-30, -29]) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [4]) self.assertAllEqual(res[0]['b'], [36]) self.assertAllEqual(res[1], [-28]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithBatchSizeNotDividedByDataSizeAndMultipleEpochs(self): batch_size = 2 a = np.arange(3) * 1.0 b = np.arange(32, 35) x = {'a': a, 'b': b} y = np.arange(-32, -29) with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=batch_size, shuffle=False, num_epochs=3) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [0, 1]) self.assertAllEqual(res[0]['b'], [32, 33]) self.assertAllEqual(res[1], [-32, -31]) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [2, 0]) self.assertAllEqual(res[0]['b'], [34, 32]) self.assertAllEqual(res[1], [-30, -32]) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [1, 2]) self.assertAllEqual(res[0]['b'], [33, 34]) self.assertAllEqual(res[1], [-31, -30]) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [0, 1]) self.assertAllEqual(res[0]['b'], [32, 33]) self.assertAllEqual(res[1], [-32, -31]) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [2]) self.assertAllEqual(res[0]['b'], [34]) self.assertAllEqual(res[1], [-30]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithBatchSizeLargerThanDataSize(self): batch_size = 10 a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = np.arange(-32, -28) with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=batch_size, shuffle=False, num_epochs=1) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [0, 1, 2, 3]) self.assertAllEqual(res[0]['b'], [32, 33, 34, 35]) self.assertAllEqual(res[1], [-32, -31, -30, -29]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithDifferentDimensionsOfFeatures(self): a = np.array([[1, 2], [3, 4]]) b = np.array([5, 6]) x = {'a': a, 'b': b} y = np.arange(-32, -30) with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['a'], [[1, 2], [3, 4]]) self.assertAllEqual(res[0]['b'], [5, 6]) self.assertAllEqual(res[1], [-32, -31]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithXAsNonDict(self): x = list(range(32, 36)) y = np.arange(4) with self.cached_session(): with self.assertRaisesRegexp(TypeError, 'x must be a dict or array'): failing_input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() def testNumpyInputFnWithXIsEmptyDict(self): x = {} y = np.arange(4) with self.cached_session(): with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'): failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) failing_input_fn() def testNumpyInputFnWithXIsEmptyArray(self): x = np.array([[], []]) y = np.arange(4) with self.cached_session(): with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'): failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) failing_input_fn() def testNumpyInputFnWithYIsNone(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = None with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) feature = session.run(features_tensor) self.assertEqual(len(feature), 2) self.assertAllEqual(feature['a'], [0, 1]) self.assertAllEqual(feature['b'], [32, 33]) session.run([features_tensor]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features_tensor]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithNonBoolShuffle(self): x = np.arange(32, 36) y = np.arange(4) with self.cached_session(): with self.assertRaisesRegexp( ValueError, 'shuffle must be provided and explicitly ' 'set as boolean'): # Default shuffle is None. numpy_io.numpy_input_fn(x, y) def testNumpyInputFnWithTargetKeyAlreadyInX(self): array = np.arange(32, 36) x = {'__target_key__': array} y = np.arange(4) with self.cached_session(): input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) input_fn() self.assertAllEqual(x['__target_key__'], array) # The input x should not be mutated. self.assertItemsEqual(x.keys(), ['__target_key__']) def testNumpyInputFnWithMismatchLengthOfInputs(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} x_mismatch_length = {'a': np.arange(1), 'b': b} y_longer_length = np.arange(10) with self.cached_session(): with self.assertRaisesRegexp( ValueError, 'Length of tensors in x and y is mismatched.'): failing_input_fn = numpy_io.numpy_input_fn( x, y_longer_length, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() with self.assertRaisesRegexp( ValueError, 'Length of tensors in x and y is mismatched.'): failing_input_fn = numpy_io.numpy_input_fn( x=x_mismatch_length, y=None, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() def testNumpyInputFnWithYAsDict(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)} with self.cached_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor, targets_tensor = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) features, targets = session.run([features_tensor, targets_tensor]) self.assertEqual(len(features), 2) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertEqual(len(targets), 2) self.assertAllEqual(targets['y1'], [-32, -31]) self.assertAllEqual(targets['y2'], [32, 31]) session.run([features_tensor, targets_tensor]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features_tensor, targets_tensor]) coord.request_stop() coord.join(threads) def testNumpyInputFnWithYIsEmptyDict(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = {} with self.cached_session(): with self.assertRaisesRegexp(ValueError, 'y cannot be empty'): failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) failing_input_fn() def testNumpyInputFnWithDuplicateKeysInXAndY(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b} with self.cached_session(): with self.assertRaisesRegexp( ValueError, '2 duplicate keys are found in both x and y'): failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) failing_input_fn() def testNumpyInputFnWithXIsArray(self): x = np.arange(4) * 1.0 y = np.arange(-32, -28) input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() with tf.compat.v1.train.MonitoredSession() as session: res = session.run([features, target]) self.assertAllEqual(res[0], [0, 1]) self.assertAllEqual(res[1], [-32, -31]) session.run([features, target]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features, target]) def testNumpyInputFnWithXIsNDArray(self): x = np.arange(16).reshape(4, 2, 2) * 1.0 y = np.arange(-48, -32).reshape(4, 2, 2) input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() with tf.compat.v1.train.MonitoredSession() as session: res = session.run([features, target]) self.assertAllEqual(res[0], [[[0, 1], [2, 3]], [[4, 5], [6, 7]]]) self.assertAllEqual(res[1], [[[-48, -47], [-46, -45]], [[-44, -43], [-42, -41]]]) session.run([features, target]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features, target]) def testNumpyInputFnWithXIsArrayYIsDict(self): x = np.arange(4) * 1.0 y = {'y1': np.arange(-32, -28)} input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor, targets_tensor = input_fn() with tf.compat.v1.train.MonitoredSession() as session: features, targets = session.run([features_tensor, targets_tensor]) self.assertEqual(len(features), 2) self.assertAllEqual(features, [0, 1]) self.assertEqual(len(targets), 1) self.assertAllEqual(targets['y1'], [-32, -31]) session.run([features_tensor, targets_tensor]) with self.assertRaises(tf.errors.OutOfRangeError): session.run([features_tensor, targets_tensor]) def testArrayAndDictGiveSameOutput(self): a = np.arange(4) * 1.0 b = np.arange(32, 36) x_arr = np.vstack((a, b)) x_dict = {'feature1': x_arr} y = np.arange(-48, -40).reshape(2, 4) input_fn_arr = numpy_io.numpy_input_fn( x_arr, y, batch_size=2, shuffle=False, num_epochs=1) features_arr, targets_arr = input_fn_arr() input_fn_dict = numpy_io.numpy_input_fn( x_dict, y, batch_size=2, shuffle=False, num_epochs=1) features_dict, targets_dict = input_fn_dict() with tf.compat.v1.train.MonitoredSession() as session: res_arr, res_dict = session.run([(features_arr, targets_arr), (features_dict, targets_dict)]) self.assertAllEqual(res_arr[0], res_dict[0]['feature1']) self.assertAllEqual(res_arr[1], res_dict[1]) @test_util.run_v1_only('Tests v1 only symbols') class FeatureColumnIntegrationTest(tf.test.TestCase): def _initialized_session(self, config=None): sess = tf.compat.v1.Session(config=config) sess.run(tf.compat.v1.initializers.global_variables()) sess.run(tf.compat.v1.initializers.tables_initializer()) return sess def _get_linear_model_bias(self, name='linear_model'): with tf.compat.v1.variable_scope(name, reuse=True): return tf.compat.v1.get_variable('bias_weights') def _get_linear_model_column_var(self, column, name='linear_model'): return tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, name + '/' + column.name)[0] def _get_keras_linear_model_predictions(self, features, feature_columns, units=1, sparse_combiner='sum', weight_collections=None, trainable=True, cols_to_vars=None): keras_linear_model = _LinearModel( feature_columns, units, sparse_combiner, weight_collections, trainable, name='linear_model') retval = keras_linear_model(features) # pylint: disable=not-callable if cols_to_vars is not None: cols_to_vars.update(keras_linear_model.cols_to_vars()) return retval def test_linear_model_numpy_input_fn(self): price = tf.feature_column.numeric_column('price') price_buckets = tf.feature_column.bucketized_column( price, boundaries=[0., 10., 100.,]) body_style = tf.feature_column.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([-1., 2., 13., 104.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = tf.compat.v1.feature_column.linear_model(features, [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( sess, coord=coord) bias = self._get_linear_model_bias() price_buckets_var = self._get_linear_model_column_var(price_buckets) body_style_var = self._get_linear_model_column_var(body_style) sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) coord.request_stop() coord.join(threads) def test_linear_model_impl_numpy_input_fn(self): price = tf.feature_column.numeric_column('price') price_buckets = tf.feature_column.bucketized_column( price, boundaries=[ 0., 10., 100., ]) body_style = tf.feature_column.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([-1., 2., 13., 104.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = self._get_keras_linear_model_predictions(features, [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( sess, coord=coord) bias = self._get_linear_model_bias() price_buckets_var = self._get_linear_model_column_var(price_buckets) body_style_var = self._get_linear_model_column_var(body_style) sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) coord.request_stop() coord.join(threads) def test_functional_input_layer_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values # price has 1 dimension in input_layer price = tf.feature_column.numeric_column('price') body_style = tf.feature_column.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # one_hot_body_style has 3 dims in input_layer. one_hot_body_style = tf.feature_column.indicator_column(body_style) # embedded_body_style has 5 dims in input_layer. embedded_body_style = tf.feature_column.embedding_column( body_style, dimension=5, initializer=_initializer) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([11., 12., 13., 14.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = tf.compat.v1.feature_column.input_layer( features, [price, one_hot_body_style, embedded_body_style]) self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( sess, coord=coord) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual([[11., 12., 13., 14., 15., 0., 0., 1., 11.], [1., 2., 3., 4., 5., 1., 0., 0., 12]], sess.run(net)) coord.request_stop() coord.join(threads) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/inputs/pandas_io.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Methods to allow pandas.DataFrame.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import uuid import numpy as np import six from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.inputs.queues import feeding_functions try: # pylint: disable=g-import-not-at-top # pylint: disable=unused-import import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False def _get_unique_target_key(features, target_column_name): """Returns a key that does not exist in the input DataFrame `features`. Args: features: DataFrame target_column_name: Name of the target column as a `str` Returns: A unique key that can be used to insert the target into features. """ if target_column_name in features: target_column_name += '_' + str(uuid.uuid4()) return target_column_name @estimator_export(v1=['estimator.inputs.pandas_input_fn']) def pandas_input_fn(x, y=None, batch_size=128, num_epochs=1, shuffle=None, queue_capacity=1000, num_threads=1, target_column='target'): """Returns input function that would feed Pandas DataFrame into the model. Note: `y`'s index must match `x`'s index. Args: x: pandas `DataFrame` object. y: pandas `Series` object or `DataFrame`. `None` if absent. batch_size: int, size of batches to return. num_epochs: int, number of epochs to iterate over data. If not `None`, read attempts that would exceed this value will raise `OutOfRangeError`. shuffle: bool, whether to read the records in random order. queue_capacity: int, size of the read queue. If `None`, it will be set roughly to the size of `x`. num_threads: Integer, number of threads used for reading and enqueueing. In order to have predicted and repeatable order of reading and enqueueing, such as in prediction and evaluation mode, `num_threads` should be 1. target_column: str, name to give the target column `y`. This parameter is not used when `y` is a `DataFrame`. Returns: Function, that has signature of ()->(dict of `features`, `target`) Raises: ValueError: if `x` already contains a column with the same name as `y`, or if the indexes of `x` and `y` don't match. ValueError: if 'shuffle' is not provided or a bool. """ if not HAS_PANDAS: raise TypeError( 'pandas_input_fn should not be called without pandas installed') if not isinstance(shuffle, bool): raise ValueError('shuffle must be provided and explicitly set as boolean ' '(it is recommended to set it as True for training); ' 'got {}'.format(shuffle)) if not isinstance(target_column, six.string_types): raise TypeError('target_column must be a string type') x = x.copy() if y is not None: if target_column in x: raise ValueError( 'Cannot use name %s for target column: DataFrame already has a ' 'column with that name: %s' % (target_column, x.columns)) if not np.array_equal(x.index, y.index): raise ValueError('Index for x and y are mismatched.\nIndex for x: %s\n' 'Index for y: %s\n' % (x.index, y.index)) if isinstance(y, pd.DataFrame): y_columns = [ (column, _get_unique_target_key(x, column)) for column in list(y) ] target_column = [v for _, v in y_columns] x[target_column] = y else: x[target_column] = y # TODO(mdan): These are memory copies. We probably don't need 4x slack space. # The sizes below are consistent with what I've seen elsewhere. if queue_capacity is None: if shuffle: queue_capacity = 4 * len(x) else: queue_capacity = len(x) min_after_dequeue = max(queue_capacity / 4, 1) def input_fn(): """Pandas input function.""" queue = feeding_functions._enqueue_data( # pylint: disable=protected-access x, queue_capacity, shuffle=shuffle, min_after_dequeue=min_after_dequeue, num_threads=num_threads, enqueue_size=batch_size, num_epochs=num_epochs) if num_epochs is None: features = queue.dequeue_many(batch_size) else: features = queue.dequeue_up_to(batch_size) assert len(features) == len(x.columns) + 1, ('Features should have one ' 'extra element for the index.') features = features[1:] features = dict(zip(list(x.columns), features)) if y is not None: if isinstance(target_column, list): keys = [k for k, _ in y_columns] values = [features.pop(column) for column in target_column] target = {k: v for k, v in zip(keys, values)} else: target = features.pop(target_column) return features, target return features return input_fn ================================================ FILE: tensorflow_estimator/python/estimator/inputs/pandas_io_test.py ================================================ # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for pandas_io.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.inputs import pandas_io try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False @test_util.run_v1_only('Tests v1 only symbols') class PandasIoTest(tf.test.TestCase): def makeTestDataFrame(self): index = np.arange(100, 104) a = np.arange(4) b = np.arange(32, 36) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.Series(np.arange(-32, -28), index=index) return x, y def makeTestDataFrameWithYAsDataFrame(self): index = np.arange(100, 104) a = np.arange(4) b = np.arange(32, 36) a_label = np.arange(10, 14) b_label = np.arange(50, 54) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.DataFrame({'a_target': a_label, 'b_target': b_label}, index=index) return x, y def callInputFnOnce(self, input_fn, session): results = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) result_values = session.run(results) coord.request_stop() coord.join(threads) return result_values def testPandasInputFn_IndexMismatch(self): if not HAS_PANDAS: return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) with self.assertRaises(ValueError): pandas_io.pandas_input_fn( x, y_noindex, batch_size=2, shuffle=False, num_epochs=1) def testPandasInputFn_RaisesWhenTargetColumnIsAList(self): if not HAS_PANDAS: return x, y = self.makeTestDataFrame() with self.assertRaisesRegexp(TypeError, 'target_column must be a string type'): pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1, target_column=['one', 'two']) def testPandasInputFn_NonBoolShuffle(self): if not HAS_PANDAS: return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) with self.assertRaisesRegexp( ValueError, 'shuffle must be provided and explicitly ' 'set as boolean'): # Default shuffle is None pandas_io.pandas_input_fn(x, y_noindex) def testPandasInputFn_ProducesExpectedOutputs(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(target, [-32, -31]) def testPandasInputFnWhenYIsDataFrame_ProducesExpectedOutput(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrameWithYAsDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, targets = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(targets['a_target'], [10, 11]) self.assertAllEqual(targets['b_target'], [50, 51]) def testPandasInputFnYIsDataFrame_HandlesOverlappingColumns(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrameWithYAsDataFrame() y = y.rename(columns={'a_target': 'a', 'b_target': 'b'}) input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, targets = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(targets['a'], [10, 11]) self.assertAllEqual(targets['b'], [50, 51]) def testPandasInputFnYIsDataFrame_HandlesOverlappingColumnsInTargets(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrameWithYAsDataFrame() y = y.rename(columns={'a_target': 'a', 'b_target': 'a_n'}) input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, targets = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(targets['a'], [10, 11]) self.assertAllEqual(targets['a_n'], [50, 51]) def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return with self.cached_session() as session: index = np.arange(100, 102) a = np.arange(2) b = np.arange(32, 34) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.Series(np.arange(-32, -30), index=index) input_fn = pandas_io.pandas_input_fn( x, y, batch_size=128, shuffle=False, num_epochs=2) results = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) features, target = session.run(results) self.assertAllEqual(features['a'], [0, 1, 0, 1]) self.assertAllEqual(features['b'], [32, 33, 32, 33]) self.assertAllEqual(target, [-32, -31, -32, -31]) with self.assertRaises(tf.errors.OutOfRangeError): session.run(results) coord.request_stop() coord.join(threads) def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize(self): if not HAS_PANDAS: return with self.cached_session() as session: index = np.arange(100, 105) a = np.arange(5) b = np.arange(32, 37) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.Series(np.arange(-32, -27), index=index) input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) results = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) features, target = session.run(results) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(target, [-32, -31]) features, target = session.run(results) self.assertAllEqual(features['a'], [2, 3]) self.assertAllEqual(features['b'], [34, 35]) self.assertAllEqual(target, [-30, -29]) features, target = session.run(results) self.assertAllEqual(features['a'], [4]) self.assertAllEqual(features['b'], [36]) self.assertAllEqual(target, [-28]) with self.assertRaises(tf.errors.OutOfRangeError): session.run(results) coord.request_stop() coord.join(threads) def testPandasInputFn_OnlyX(self): if not HAS_PANDAS: return with self.cached_session() as session: x, _ = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y=None, batch_size=2, shuffle=False, num_epochs=1) features = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) def testPandasInputFn_ExcludesIndex(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, _ = self.callInputFnOnce(input_fn, session) self.assertFalse('index' in features) def assertInputsCallableNTimes(self, input_fn, session, n): inputs = input_fn() coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( session, coord=coord) for _ in range(n): session.run(inputs) with self.assertRaises(tf.errors.OutOfRangeError): session.run(inputs) coord.request_stop() coord.join(threads) def testPandasInputFn_RespectsEpoch_NoShuffle(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=4, shuffle=False, num_epochs=1) self.assertInputsCallableNTimes(input_fn, session, 1) def testPandasInputFn_RespectsEpoch_WithShuffle(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=4, shuffle=True, num_epochs=1) self.assertInputsCallableNTimes(input_fn, session, 1) def testPandasInputFn_RespectsEpoch_WithShuffleAutosize(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=True, queue_capacity=None, num_epochs=2) self.assertInputsCallableNTimes(input_fn, session, 4) def testPandasInputFn_RespectsEpochUnevenBatches(self): if not HAS_PANDAS: return x, y = self.makeTestDataFrame() with self.cached_session() as session: input_fn = pandas_io.pandas_input_fn( x, y, batch_size=3, shuffle=False, num_epochs=1) # Before the last batch, only one element of the epoch should remain. self.assertInputsCallableNTimes(input_fn, session, 2) def testPandasInputFn_Idempotent(self): if not HAS_PANDAS: return x, y = self.makeTestDataFrame() for _ in range(2): pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1)() for _ in range(2): pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=True, num_epochs=1)() if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/inputs/queues/__init__.py ================================================ ================================================ FILE: tensorflow_estimator/python/estimator/inputs/queues/feeding_functions.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Helper functions for enqueuing data from arrays and pandas `DataFrame`s.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import random import types as tp import numpy as np import six import tensorflow as tf from tensorflow.python.framework import ops from tensorflow_estimator.python.estimator.inputs.queues import feeding_queue_runner as fqr try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False def _fill_array(arr, seq, fillvalue=0): """Recursively fills padded arr with elements from seq. If length of seq is less than arr padded length, fillvalue used. Args: arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len]. seq: Non-padded list of data samples of shape [batch_size, ..., padded_dim(None)] fillvalue: Default fillvalue to use. """ if arr.ndim == 1: try: len_ = len(seq) except TypeError: len_ = 0 arr[:len_] = seq arr[len_:] = fillvalue else: for subarr, subseq in six.moves.zip_longest(arr, seq, fillvalue=()): _fill_array(subarr, subseq, fillvalue) def _pad_if_needed(batch_key_item, fillvalue=0): """ Returns padded batch. Args: batch_key_item: List of data samples of any type with shape [batch_size, ..., padded_dim(None)]. fillvalue: Default fillvalue to use. Returns: Padded with zeros tensor of same type and shape [batch_size, ..., max_padded_dim_len]. Raises: ValueError if data samples have different shapes (except last padded dim). """ shapes = [ seq.shape[:-1] if len(seq.shape) > 0 else -1 for seq in batch_key_item ] if not all(shapes[0] == x for x in shapes): raise ValueError("Array shapes must match.") last_length = [ seq.shape[-1] if len(seq.shape) > 0 else 0 for seq in batch_key_item ] if all([x == last_length[0] for x in last_length]): return batch_key_item batch_size = len(batch_key_item) max_sequence_length = max(last_length) result_batch = np.zeros( shape=[batch_size] + list(shapes[0]) + [max_sequence_length], dtype=batch_key_item[0].dtype) _fill_array(result_batch, batch_key_item, fillvalue) return result_batch def _get_integer_indices_for_next_batch(batch_indices_start, batch_size, epoch_end, array_length, current_epoch, total_epochs): """Returns the integer indices for next batch. If total epochs is not None and current epoch is the final epoch, the end index of the next batch should not exceed the `epoch_end` (i.e., the final batch might not have size `batch_size` to avoid overshooting the last epoch). Args: batch_indices_start: Integer, the index to start next batch. batch_size: Integer, size of batches to return. epoch_end: Integer, the end index of the epoch. The epoch could start from a random position, so `epoch_end` provides the end index for that. array_length: Integer, the length of the array. current_epoch: Integer, the epoch number has been emitted. total_epochs: Integer or `None`, the total number of epochs to emit. If `None` will run forever. Returns: A tuple of a list with integer indices for next batch and `current_epoch` value after the next batch. Raises: OutOfRangeError if `current_epoch` is not less than `total_epochs`. """ if total_epochs is not None and current_epoch >= total_epochs: raise tf.errors.OutOfRangeError( None, None, "Already emitted %s epochs." % current_epoch) batch_indices_end = batch_indices_start + batch_size batch_indices = [ j % array_length for j in range(batch_indices_start, batch_indices_end) ] epoch_end_indices = [i for i, x in enumerate(batch_indices) if x == epoch_end] current_epoch += len(epoch_end_indices) if total_epochs is None or current_epoch < total_epochs: return (batch_indices, current_epoch) # Now we might have emitted more data for expected epochs. Need to trim. final_epoch_end_inclusive = epoch_end_indices[-(current_epoch - total_epochs + 1)] batch_indices = batch_indices[:final_epoch_end_inclusive + 1] return (batch_indices, total_epochs) class _ArrayFeedFn(object): """Creates feed dictionaries from numpy arrays.""" def __init__(self, placeholders, array, batch_size, random_start=False, seed=None, num_epochs=None): if len(placeholders) != 2: raise ValueError("_array_feed_fn expects 2 placeholders; got {}.".format( len(placeholders))) self._placeholders = placeholders self._array = array self._max = len(array) self._batch_size = batch_size self._num_epochs = num_epochs self._epoch = 0 random.seed(seed) self._trav = random.randrange(self._max) if random_start else 0 self._epoch_end = (self._trav - 1) % self._max def __call__(self): integer_indexes, self._epoch = _get_integer_indices_for_next_batch( batch_indices_start=self._trav, batch_size=self._batch_size, epoch_end=self._epoch_end, array_length=self._max, current_epoch=self._epoch, total_epochs=self._num_epochs) self._trav = (integer_indexes[-1] + 1) % self._max return { self._placeholders[0]: integer_indexes, self._placeholders[1]: self._array[integer_indexes] } class _OrderedDictNumpyFeedFn(object): """Creates feed dictionaries from `OrderedDict`s of numpy arrays.""" def __init__(self, placeholders, ordered_dict_of_arrays, batch_size, random_start=False, seed=None, num_epochs=None): if len(placeholders) != len(ordered_dict_of_arrays) + 1: raise ValueError("Expected {} placeholders; got {}.".format( len(ordered_dict_of_arrays), len(placeholders))) self._index_placeholder = placeholders[0] self._col_placeholders = placeholders[1:] self._ordered_dict_of_arrays = ordered_dict_of_arrays self._max = len(next(iter(ordered_dict_of_arrays.values()))) for _, v in ordered_dict_of_arrays.items(): if len(v) != self._max: raise ValueError("Array lengths must match.") self._batch_size = batch_size self._num_epochs = num_epochs self._epoch = 0 random.seed(seed) self._trav = random.randrange(self._max) if random_start else 0 self._epoch_end = (self._trav - 1) % self._max def __call__(self): integer_indexes, self._epoch = _get_integer_indices_for_next_batch( batch_indices_start=self._trav, batch_size=self._batch_size, epoch_end=self._epoch_end, array_length=self._max, current_epoch=self._epoch, total_epochs=self._num_epochs) self._trav = (integer_indexes[-1] + 1) % self._max feed_dict = {self._index_placeholder: integer_indexes} cols = [ column[integer_indexes] for column in self._ordered_dict_of_arrays.values() ] feed_dict.update(dict(zip(self._col_placeholders, cols))) return feed_dict class _PandasFeedFn(object): """Creates feed dictionaries from pandas `DataFrames`.""" def __init__(self, placeholders, dataframe, batch_size, random_start=False, seed=None, num_epochs=None): if len(placeholders) != len(dataframe.columns) + 1: raise ValueError("Expected {} placeholders; got {}.".format( len(dataframe.columns) + 1, len(placeholders))) self._index_placeholder = placeholders[0] self._col_placeholders = placeholders[1:] self._dataframe = dataframe self._max = len(dataframe) self._batch_size = batch_size self._num_epochs = num_epochs self._epoch = 0 random.seed(seed) self._trav = random.randrange(self._max) if random_start else 0 self._epoch_end = (self._trav - 1) % self._max def __call__(self): integer_indexes, self._epoch = _get_integer_indices_for_next_batch( batch_indices_start=self._trav, batch_size=self._batch_size, epoch_end=self._epoch_end, array_length=self._max, current_epoch=self._epoch, total_epochs=self._num_epochs) self._trav = (integer_indexes[-1] + 1) % self._max result = self._dataframe.iloc[integer_indexes] cols = [result[col].values for col in result.columns] feed_dict = dict(zip(self._col_placeholders, cols)) feed_dict[self._index_placeholder] = result.index.values return feed_dict class _GeneratorFeedFn(object): """Creates feed dictionaries from `Generator` of `dicts` of numpy arrays.""" def __init__(self, placeholders, generator, batch_size, random_start=False, seed=None, num_epochs=None, pad_value=None): first_sample = next(generator()) if len(placeholders) != len(first_sample): raise ValueError("Expected {} placeholders; got {}.".format( len(first_sample), len(placeholders))) self._keys = sorted(list(first_sample.keys())) self._col_placeholders = placeholders self._generator_function = generator self._iterator = generator() self._batch_size = batch_size self._num_epochs = num_epochs self._epoch = 0 self._pad_value = pad_value random.seed(seed) def __call__(self): if self._num_epochs and self._epoch >= self._num_epochs: raise tf.errors.OutOfRangeError( None, None, "Already emitted %s epochs." % self._epoch) list_dict = {} list_dict_size = 0 while list_dict_size < self._batch_size: try: data_row = next(self._iterator) except StopIteration: self._epoch += 1 self._iterator = self._generator_function() data_row = next(self._iterator) for index, key in enumerate(self._keys): if key not in data_row.keys(): raise KeyError("key mismatch between dicts emitted by GenFun " "Expected {} keys; got {}".format( self._keys, data_row.keys())) list_dict.setdefault(self._col_placeholders[index], list()).append(data_row[key]) list_dict_size += 1 if self._pad_value is not None: feed_dict = { key: np.asarray(_pad_if_needed(item, self._pad_value)) for key, item in list(list_dict.items()) } else: feed_dict = { key: np.asarray(item) for key, item in list(list_dict.items()) } return feed_dict def _enqueue_data(data, capacity, shuffle=False, min_after_dequeue=None, num_threads=1, seed=None, name="enqueue_input", enqueue_size=1, num_epochs=None, pad_value=None): """Creates a queue filled from a numpy array or pandas `DataFrame`. Returns a queue filled with the rows of the given (`OrderedDict` of) array or `DataFrame`. In the case of a pandas `DataFrame`, the first enqueued `Tensor` corresponds to the index of the `DataFrame`. For (`OrderedDict` of) numpy arrays, the first enqueued `Tensor` contains the row number. Args: data: a numpy `ndarray`, `OrderedDict` of numpy arrays, or a generator yielding `dict`s of numpy arrays or pandas `DataFrame` that will be read into the queue. capacity: the capacity of the queue. shuffle: whether or not to shuffle the rows of the array. min_after_dequeue: minimum number of elements that can remain in the queue after a dequeue operation. Only used when `shuffle` is true. If not set, defaults to `capacity` / 4. num_threads: number of threads used for reading and enqueueing. seed: used to seed shuffling and reader starting points. name: a scope name identifying the data. enqueue_size: the number of rows to enqueue per step. num_epochs: limit enqueuing to a specified number of epochs, if provided. pad_value: default value for dynamic padding of data samples, if provided. Returns: A queue filled with the rows of the given (`OrderedDict` of) array or `DataFrame`. Raises: TypeError: `data` is not a Pandas `DataFrame`, an `OrderedDict` of numpy arrays, a numpy `ndarray`, or a generator producing these. NotImplementedError: padding and shuffling data at the same time. NotImplementedError: padding usage with non generator data type. """ with ops.name_scope(name): if isinstance(data, np.ndarray): types = [tf.dtypes.int64, tf.dtypes.as_dtype(data.dtype)] queue_shapes = [(), data.shape[1:]] get_feed_fn = _ArrayFeedFn elif isinstance(data, collections.OrderedDict): types = [tf.dtypes.int64 ] + [tf.dtypes.as_dtype(col.dtype) for col in data.values()] queue_shapes = [()] + [col.shape[1:] for col in data.values()] get_feed_fn = _OrderedDictNumpyFeedFn elif isinstance(data, tp.FunctionType): x_first_el = six.next(data()) x_first_keys = sorted(x_first_el.keys()) x_first_values = [x_first_el[key] for key in x_first_keys] types = [tf.dtypes.as_dtype(col.dtype) for col in x_first_values] queue_shapes = [col.shape for col in x_first_values] get_feed_fn = _GeneratorFeedFn elif HAS_PANDAS and isinstance(data, pd.DataFrame): types = [ tf.dtypes.as_dtype(dt) for dt in [data.index.dtype] + list(data.dtypes) ] queue_shapes = [() for _ in types] get_feed_fn = _PandasFeedFn else: raise TypeError( "data must be either a numpy array or pandas DataFrame if pandas is " "installed; got {}".format(type(data).__name__)) pad_data = pad_value is not None if pad_data and get_feed_fn is not _GeneratorFeedFn: raise NotImplementedError( "padding is only available with generator usage") if shuffle and pad_data: raise NotImplementedError( "padding and shuffling data at the same time is not implemented") # TODO(jamieas): TensorBoard warnings for all warnings below once available. if num_threads > 1 and num_epochs is not None: tf.compat.v1.logging.warn( "enqueue_data was called with num_epochs and num_threads > 1. " "num_epochs is applied per thread, so this will produce more " "epochs than you probably intend. " "If you want to limit epochs, use one thread.") if shuffle and num_threads > 1 and num_epochs is not None: tf.compat.v1.logging.warn( "enqueue_data was called with shuffle=True, num_threads > 1, and " "num_epochs. This will create multiple threads, all reading the " "array/dataframe in order adding to the same shuffling queue; the " "results will likely not be sufficiently shuffled.") if not shuffle and num_threads > 1: tf.compat.v1.logging.warn( "enqueue_data was called with shuffle=False and num_threads > 1. " "This will create multiple threads, all reading the " "array/dataframe in order. If you want examples read in order, use" " one thread; if you want multiple threads, enable shuffling.") if shuffle: min_after_dequeue = int( capacity / 4 if min_after_dequeue is None else min_after_dequeue) queue = tf.queue.RandomShuffleQueue( capacity, min_after_dequeue, dtypes=types, shapes=queue_shapes, seed=seed) elif pad_data: min_after_dequeue = 0 # just for the summary text queue_shapes = list( map(lambda x: tuple(list(x[:-1]) + [None]) if len(x) > 0 else x, queue_shapes)) queue = tf.queue.PaddingFIFOQueue( capacity, dtypes=types, shapes=queue_shapes) else: min_after_dequeue = 0 # just for the summary text queue = tf.queue.FIFOQueue(capacity, dtypes=types, shapes=queue_shapes) enqueue_ops = [] feed_fns = [] for i in range(num_threads): # Note the placeholders have no shapes, so they will accept any # enqueue_size. enqueue_many below will break them up. placeholders = [tf.compat.v1.placeholder(t) for t in types] enqueue_ops.append(queue.enqueue_many(placeholders)) seed_i = None if seed is None else (i + 1) * seed if not pad_data: feed_fns.append( get_feed_fn( placeholders, data, enqueue_size, random_start=shuffle, seed=seed_i, num_epochs=num_epochs)) else: feed_fns.append( get_feed_fn( placeholders, data, enqueue_size, random_start=shuffle, seed=seed_i, num_epochs=num_epochs, pad_value=pad_value)) runner = fqr._FeedingQueueRunner( # pylint: disable=protected-access queue=queue, enqueue_ops=enqueue_ops, feed_fns=feed_fns) tf.compat.v1.train.queue_runner.add_queue_runner(runner) full = ( tf.cast( tf.math.maximum(0, queue.size() - min_after_dequeue), tf.dtypes.float32) * (1. / (capacity - min_after_dequeue))) # Note that name contains a '/' at the end so we intentionally do not place # a '/' after %s below. summary_name = ( "queue/%sfraction_over_%d_of_%d_full" % (queue.name, min_after_dequeue, capacity - min_after_dequeue)) tf.compat.v1.summary.scalar(summary_name, full) return queue ================================================ FILE: tensorflow_estimator/python/estimator/inputs/queues/feeding_functions_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests feeding functions using arrays and `DataFrames`.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import numpy as np import tensorflow as tf from tensorflow_estimator.python.estimator.inputs.queues import feeding_functions as ff try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False def vals_to_list(a): return { key: val.tolist() if isinstance(val, np.ndarray) else val for key, val in a.items() } class _FeedingFunctionsTestCase(tf.test.TestCase): """Tests for feeding functions.""" def testArrayFeedFnBatchOne(self): array = np.arange(32).reshape([16, 2]) placeholders = ["index_placeholder", "value_placeholder"] aff = ff._ArrayFeedFn(placeholders, array, 1) # cycle around a couple times for x in range(0, 100): i = x % 16 expected = { "index_placeholder": [i], "value_placeholder": [[2 * i, 2 * i + 1]] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testArrayFeedFnBatchFive(self): array = np.arange(32).reshape([16, 2]) placeholders = ["index_placeholder", "value_placeholder"] aff = ff._ArrayFeedFn(placeholders, array, 5) # cycle around a couple times for _ in range(0, 101, 2): aff() expected = { "index_placeholder": [15, 0, 1, 2, 3], "value_placeholder": [[30, 31], [0, 1], [2, 3], [4, 5], [6, 7]] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testArrayFeedFnBatchTwoWithOneEpoch(self): array = np.arange(5) + 10 placeholders = ["index_placeholder", "value_placeholder"] aff = ff._ArrayFeedFn(placeholders, array, batch_size=2, num_epochs=1) expected = {"index_placeholder": [0, 1], "value_placeholder": [10, 11]} actual = aff() self.assertEqual(expected, vals_to_list(actual)) expected = {"index_placeholder": [2, 3], "value_placeholder": [12, 13]} actual = aff() self.assertEqual(expected, vals_to_list(actual)) expected = {"index_placeholder": [4], "value_placeholder": [14]} actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testArrayFeedFnBatchOneHundred(self): array = np.arange(32).reshape([16, 2]) placeholders = ["index_placeholder", "value_placeholder"] aff = ff._ArrayFeedFn(placeholders, array, 100) expected = { "index_placeholder": list(range(0, 16)) * 6 + list(range(0, 4)), "value_placeholder": np.arange(32).reshape([16, 2]).tolist() * 6 + [[0, 1], [2, 3], [4, 5], [6, 7]] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testArrayFeedFnBatchOneHundredWithSmallerArrayAndMultipleEpochs(self): array = np.arange(2) + 10 placeholders = ["index_placeholder", "value_placeholder"] aff = ff._ArrayFeedFn(placeholders, array, batch_size=100, num_epochs=2) expected = { "index_placeholder": [0, 1, 0, 1], "value_placeholder": [10, 11, 10, 11], } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testPandasFeedFnBatchOne(self): if not HAS_PANDAS: return array1 = np.arange(32, 64) array2 = np.arange(64, 96) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128)) placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"] aff = ff._PandasFeedFn(placeholders, df, 1) # cycle around a couple times for x in range(0, 100): i = x % 32 expected = { "index_placeholder": [i + 96], "a_placeholder": [32 + i], "b_placeholder": [64 + i] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testPandasFeedFnBatchFive(self): if not HAS_PANDAS: return array1 = np.arange(32, 64) array2 = np.arange(64, 96) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128)) placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"] aff = ff._PandasFeedFn(placeholders, df, 5) # cycle around a couple times for _ in range(0, 101, 2): aff() expected = { "index_placeholder": [127, 96, 97, 98, 99], "a_placeholder": [63, 32, 33, 34, 35], "b_placeholder": [95, 64, 65, 66, 67] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testPandasFeedFnBatchTwoWithOneEpoch(self): if not HAS_PANDAS: return array1 = np.arange(32, 37) array2 = np.arange(64, 69) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 101)) placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"] aff = ff._PandasFeedFn(placeholders, df, batch_size=2, num_epochs=1) expected = { "index_placeholder": [96, 97], "a_placeholder": [32, 33], "b_placeholder": [64, 65] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) expected = { "index_placeholder": [98, 99], "a_placeholder": [34, 35], "b_placeholder": [66, 67] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) expected = { "index_placeholder": [100], "a_placeholder": [36], "b_placeholder": [68] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testPandasFeedFnBatchOneHundred(self): if not HAS_PANDAS: return array1 = np.arange(32, 64) array2 = np.arange(64, 96) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128)) placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"] aff = ff._PandasFeedFn(placeholders, df, 100) expected = { "index_placeholder": list(range(96, 128)) * 3 + list(range(96, 100)), "a_placeholder": list(range(32, 64)) * 3 + list(range(32, 36)), "b_placeholder": list(range(64, 96)) * 3 + list(range(64, 68)) } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testPandasFeedFnBatchOneHundredWithSmallDataArrayAndMultipleEpochs(self): if not HAS_PANDAS: return array1 = np.arange(32, 34) array2 = np.arange(64, 66) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 98)) placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"] aff = ff._PandasFeedFn(placeholders, df, batch_size=100, num_epochs=2) expected = { "index_placeholder": [96, 97, 96, 97], "a_placeholder": [32, 33, 32, 33], "b_placeholder": [64, 65, 64, 65] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testOrderedDictNumpyFeedFnBatchTwoWithOneEpoch(self): a = np.arange(32, 37) b = np.arange(64, 69) x = {"a": a, "b": b} ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"] aff = ff._OrderedDictNumpyFeedFn( placeholders, ordered_dict_x, batch_size=2, num_epochs=1) expected = { "index_placeholder": [0, 1], "a_placeholder": [32, 33], "b_placeholder": [64, 65] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) expected = { "index_placeholder": [2, 3], "a_placeholder": [34, 35], "b_placeholder": [66, 67] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) expected = { "index_placeholder": [4], "a_placeholder": [36], "b_placeholder": [68] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testOrderedDictNumpyFeedFnLargeBatchWithSmallArrayAndMultipleEpochs(self): a = np.arange(32, 34) b = np.arange(64, 66) x = {"a": a, "b": b} ordered_dict_x = collections.OrderedDict( sorted(x.items(), key=lambda t: t[0])) placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"] aff = ff._OrderedDictNumpyFeedFn( placeholders, ordered_dict_x, batch_size=100, num_epochs=2) expected = { "index_placeholder": [0, 1, 0, 1], "a_placeholder": [32, 33, 32, 33], "b_placeholder": [64, 65, 64, 65] } actual = aff() self.assertEqual(expected, vals_to_list(actual)) def testFillArraySmall(self): a = ( np.ones(shape=[32, 32], dtype=np.int32).tolist() + np.ones(shape=[32, 36], dtype=np.int32).tolist()) actual = np.ones(shape=[64, 36], dtype=np.int32) ff._fill_array(actual, a) expected = np.ones(shape=[64, 36], dtype=np.int32) expected[:32, 32:] = 0 self.assertEqual(expected.tolist(), actual.tolist()) def testFillArrayLarge(self): a = ( np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() + np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist()) actual = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32) ff._fill_array(actual, a) expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32) expected[:8, ..., 32:] = 0 self.assertEqual(expected.tolist(), actual.tolist()) def testFillArraySmallWithSpecifiedValue(self): fill_value = 8 a = ( np.ones(shape=[32, 32], dtype=np.int32).tolist() + np.ones(shape=[32, 36], dtype=np.int32).tolist()) actual = np.ones(shape=[64, 36], dtype=np.int32) ff._fill_array(actual, a, fill_value) expected = np.ones(shape=[64, 36], dtype=np.int32) expected[:32, 32:] = fill_value self.assertEqual(expected.tolist(), actual.tolist()) def testFillArrayLargeWithSpecifiedValue(self): fill_value = 8 a = ( np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() + np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist()) actual = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32) ff._fill_array(actual, a, fill_value) expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32) expected[:8, ..., 32:] = fill_value self.assertEqual(expected.tolist(), actual.tolist()) def testPadIfNeededSmall(self): a = ( np.ones(shape=[32, 32], dtype=np.int32).tolist() + np.ones(shape=[32, 36], dtype=np.int32).tolist()) a = list(map(np.array, a)) actual = ff._pad_if_needed(a) expected = np.ones(shape=[64, 36], dtype=np.int32) expected[:32, 32:] = 0 self.assertEqual(expected.tolist(), actual.tolist()) def testPadIfNeededLarge(self): a = ( np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() + np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist()) a = list(map(np.array, a)) actual = ff._pad_if_needed(a) expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32) expected[:8, ..., 32:] = 0 self.assertEqual(expected.tolist(), actual.tolist()) def testPadIfNeededSmallWithSpecifiedValue(self): fill_value = 8 a = ( np.ones(shape=[32, 32], dtype=np.int32).tolist() + np.ones(shape=[32, 36], dtype=np.int32).tolist()) a = list(map(np.array, a)) actual = ff._pad_if_needed(a, fill_value) expected = np.ones(shape=[64, 36], dtype=np.int32) expected[:32, 32:] = fill_value self.assertEqual(expected.tolist(), actual.tolist()) def testPadIfNeededLargeWithSpecifiedValue(self): fill_value = 8 a = ( np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() + np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist()) a = list(map(np.array, a)) actual = ff._pad_if_needed(a, fill_value) expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32) expected[:8, ..., 32:] = fill_value self.assertEqual(expected.tolist(), actual.tolist()) def testPadIfNeededSmallWithSpecifiedNonNumericValue(self): fill_value = False a = ( np.ones(shape=[32, 32], dtype=bool).tolist() + np.ones(shape=[32, 36], dtype=bool).tolist()) a = list(map(np.array, a)) actual = ff._pad_if_needed(a, fill_value) expected = np.ones(shape=[64, 36], dtype=bool) expected[:32, 32:] = fill_value self.assertEqual(expected.tolist(), actual.tolist()) def testPadIfNeededLargeWithSpecifiedNonNumericValue(self): fill_value = False a = ( np.ones(shape=[8, 8, 8, 8, 32], dtype=bool).tolist() + np.ones(shape=[8, 8, 8, 8, 36], dtype=bool).tolist()) a = list(map(np.array, a)) actual = ff._pad_if_needed(a, fill_value) expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=bool) expected[:8, ..., 32:] = fill_value self.assertEqual(expected.tolist(), actual.tolist()) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/inputs/queues/feeding_queue_runner.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """A `QueueRunner` that takes a feed function as an argument.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import threading import tensorflow as tf class _FeedingQueueRunner(tf.compat.v1.train.queue_runner.QueueRunner): """A queue runner that allows the feeding of values such as numpy arrays.""" def __init__(self, queue=None, enqueue_ops=None, close_op=None, cancel_op=None, feed_fns=None, queue_closed_exception_types=None): """Initialize the queue runner. For further documentation, see `queue_runner.py`. Note that `FeedingQueueRunner` does not support construction from protobuffer nor serialization to protobuffer. Args: queue: A `Queue`. enqueue_ops: List of enqueue ops to run in threads later. close_op: Op to close the queue. Pending enqueue ops are preserved. cancel_op: Op to close the queue and cancel pending enqueue ops. feed_fns: a list of functions that return a dictionary mapping fed `Tensor`s to values. Must be the same length as `enqueue_ops`. queue_closed_exception_types: Optional tuple of Exception types that indicate that the queue has been closed when raised during an enqueue operation. Defaults to `(tf.errors.OutOfRangeError, tf.errors.CancelledError)`. Raises: ValueError: `feed_fns` is not `None` and has different length than `enqueue_ops`. """ if queue_closed_exception_types is None: queue_closed_exception_types = (tf.errors.OutOfRangeError, tf.errors.CancelledError) super(_FeedingQueueRunner, self).__init__( queue, enqueue_ops, close_op, cancel_op, queue_closed_exception_types=queue_closed_exception_types) if feed_fns is None: self._feed_fns = [None for _ in enqueue_ops] else: if len(feed_fns) != len(enqueue_ops): raise ValueError( "If feed_fns is not None, it must have the same length as " "enqueue_ops.") self._feed_fns = feed_fns # pylint: disable=broad-except def _run(self, sess, enqueue_op, feed_fn, coord=None): """Execute the enqueue op in a loop, close the queue in case of error. Args: sess: A `Session`. enqueue_op: The `Operation` to run. feed_fn: the feed function to pass to `sess.run`. coord: Optional `Coordinator` object for reporting errors and checking for stop conditions. """ # TODO(jamieas): Reduce code duplication with `QueueRunner`. if coord: coord.register_thread(threading.current_thread()) decremented = False try: while True: if coord and coord.should_stop(): break try: feed_dict = None if feed_fn is None else feed_fn() sess.run(enqueue_op, feed_dict=feed_dict) except (tf.errors.OutOfRangeError, tf.errors.CancelledError): # This exception indicates that a queue was closed. with self._lock: self._runs_per_session[sess] -= 1 decremented = True if self._runs_per_session[sess] == 0: try: sess.run(self._close_op) except Exception as e: # Intentionally ignore errors from close_op. tf.compat.v1.logging.vlog(1, "Ignored exception: %s", str(e)) return except Exception as e: # This catches all other exceptions. if coord: coord.request_stop(e) else: tf.compat.v1.logging.error("Exception in QueueRunner: %s", str(e)) with self._lock: self._exceptions_raised.append(e) raise finally: # Make sure we account for all terminations: normal or errors. if not decremented: with self._lock: self._runs_per_session[sess] -= 1 def create_threads(self, sess, coord=None, daemon=False, start=False): """Create threads to run the enqueue ops for the given session. This method requires a session in which the graph was launched. It creates a list of threads, optionally starting them. There is one thread for each op passed in `enqueue_ops`. The `coord` argument is an optional coordinator, that the threads will use to terminate together and report exceptions. If a coordinator is given, this method starts an additional thread to close the queue when the coordinator requests a stop. If previously created threads for the given session are still running, no new threads will be created. Args: sess: A `Session`. coord: Optional `Coordinator` object for reporting errors and checking stop conditions. daemon: Boolean. If `True` make the threads daemon threads. start: Boolean. If `True` starts the threads. If `False` the caller must call the `start()` method of the returned threads. Returns: A list of threads. """ with self._lock: try: if self._runs_per_session[sess] > 0: # Already started: no new threads to return. return [] except KeyError: # We haven't seen this session yet. pass self._runs_per_session[sess] = len(self._enqueue_ops) self._exceptions_raised = [] ret_threads = [ threading.Thread(target=self._run, args=(sess, op, feed_fn, coord)) for op, feed_fn in zip(self._enqueue_ops, self._feed_fns) ] if coord: ret_threads.append( threading.Thread( target=self._close_on_stop, args=(sess, self._cancel_op, coord))) for t in ret_threads: if daemon: t.daemon = True if start: t.start() return ret_threads def _init_from_proto(self, queue_runner_def): raise NotImplementedError( "{} does not support initialization from proto.".format( type(self).__name__)) def to_proto(self): raise NotImplementedError( "{} does not support serialization to proto.".format( type(self).__name__)) ================================================ FILE: tensorflow_estimator/python/estimator/inputs/queues/feeding_queue_runner_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests `FeedingQueueRunner` using arrays and `DataFrames`.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.inputs.queues import feeding_functions as ff try: # pylint: disable=g-import-not-at-top import pandas as pd HAS_PANDAS = True except IOError: # Pandas writes a temporary file during import. If it fails, don't use pandas. HAS_PANDAS = False except ImportError: HAS_PANDAS = False def get_rows(array, row_indices): rows = [array[i] for i in row_indices] return np.vstack(rows) @test_util.deprecated_graph_mode_only class FeedingQueueRunnerTestCase(tf.test.TestCase): """Tests for `FeedingQueueRunner`.""" def testArrayFeeding(self): with tf.Graph().as_default(): array = np.arange(32).reshape([16, 2]) q = ff._enqueue_data(array, capacity=100) batch_size = 3 dq_op = q.dequeue_many(batch_size) with tf.compat.v1.Session() as sess: coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( sess=sess, coord=coord) for i in range(100): indices = [ j % array.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_dq = get_rows(array, indices) dq = sess.run(dq_op) np.testing.assert_array_equal(indices, dq[0]) np.testing.assert_array_equal(expected_dq, dq[1]) coord.request_stop() coord.join(threads) def testArrayFeedingMultiThread(self): with tf.Graph().as_default(): array = np.arange(256).reshape([128, 2]) q = ff._enqueue_data(array, capacity=128, num_threads=8, shuffle=True) batch_size = 3 dq_op = q.dequeue_many(batch_size) with tf.compat.v1.Session() as sess: coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_dq = get_rows(array, indices) np.testing.assert_array_equal(expected_dq, dq[1]) coord.request_stop() coord.join(threads) def testPandasFeeding(self): if not HAS_PANDAS: return with tf.Graph().as_default(): array1 = np.arange(32) array2 = np.arange(32, 64) df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96)) q = ff._enqueue_data(df, capacity=100) batch_size = 5 dq_op = q.dequeue_many(5) with tf.compat.v1.Session() as sess: coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( sess=sess, coord=coord) for i in range(100): indices = [ j % array1.shape[0] for j in range(batch_size * i, batch_size * (i + 1)) ] expected_df_indices = df.index[indices] expected_rows = df.iloc[indices] dq = sess.run(dq_op) np.testing.assert_array_equal(expected_df_indices, dq[0]) for col_num, col in enumerate(df.columns): np.testing.assert_array_equal(expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads) def testPandasFeedingMultiThread(self): if not HAS_PANDAS: return with tf.Graph().as_default(): array1 = np.arange(128, 256) array2 = 2 * array1 df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128)) q = ff._enqueue_data(df, capacity=128, num_threads=8, shuffle=True) batch_size = 5 dq_op = q.dequeue_many(batch_size) with tf.compat.v1.Session() as sess: coord = tf.train.Coordinator() threads = tf.compat.v1.train.queue_runner.start_queue_runners( sess=sess, coord=coord) for _ in range(100): dq = sess.run(dq_op) indices = dq[0] expected_rows = df.iloc[indices] for col_num, col in enumerate(df.columns): np.testing.assert_array_equal(expected_rows[col].values, dq[col_num + 1]) coord.request_stop() coord.join(threads) if __name__ == "__main__": tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/keras_distribute_strategy_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for Keras model-to-estimator using tf.distribute.Strategy.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow.python.distribute import strategy_combinations from tensorflow.python.eager import test from tensorflow.python.ops.parsing_ops import gen_parsing_ops from tensorflow_estimator.python.estimator import keras_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator import run_config as run_config_lib _RANDOM_SEED = 1337 _TRAIN_SIZE = 200 _INPUT_SIZE = (10,) _NUM_CLASS = 2 def simple_sequential_model(): model = tf_keras.models.Sequential() model.add(tf_keras.layers.Dense(16, activation='relu', input_shape=_INPUT_SIZE)) model.add(tf_keras.layers.Dropout(0.1)) model.add(tf_keras.layers.Dense(_NUM_CLASS, activation='softmax')) return model def simple_functional_model(): a = tf_keras.layers.Input(shape=_INPUT_SIZE) b = tf_keras.layers.Dense(16, activation='relu')(a) b = tf_keras.layers.Dropout(0.1)(b) b = tf_keras.layers.Dense(_NUM_CLASS, activation='softmax')(b) model = tf_keras.models.Model(inputs=[a], outputs=[b]) return model def multi_inputs_multi_outputs_model(): input_a = tf_keras.layers.Input(shape=(16,), name='input_a') input_b = tf_keras.layers.Input(shape=(16,), name='input_b') input_m = tf_keras.layers.Input(shape=(8,), dtype='string', name='input_m') dense = tf_keras.layers.Dense(8, name='dense_1') interm_a = dense(input_a) # Read m interm_m = tf_keras.layers.Lambda(gen_parsing_ops.string_to_number)(input_m) interm_s = tf_keras.layers.Lambda(lambda k: k[0] * k[1])([interm_m, interm_a]) interm_b = dense(input_b) merged = tf_keras.layers.concatenate([interm_s, interm_b], name='merge') output_c = tf_keras.layers.Dense(3, activation='softmax', name='dense_2')( merged) output_d = tf_keras.layers.Dense(2, activation='softmax', name='dense_3')( merged) model = tf_keras.models.Model( inputs=[input_a, input_b, input_m], outputs=[output_c, output_d]) model.compile( loss='categorical_crossentropy', optimizer=tf_keras.optimizers.legacy.SGD(learning_rate=0.001), metrics={ 'dense_2': 'categorical_accuracy', 'dense_3': 'categorical_accuracy' }) return model def get_ds_train_input_fn(): np.random.seed(_RANDOM_SEED) (x_train, y_train), _ = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=_INPUT_SIZE, num_classes=_NUM_CLASS) y_train = tf_keras.utils.to_categorical(y_train) dataset = tf.compat.v1.data.Dataset.from_tensor_slices((x_train, y_train)) dataset = dataset.batch(32) return dataset def get_ds_test_input_fn(): np.random.seed(_RANDOM_SEED) _, (x_test, y_test) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=_INPUT_SIZE, num_classes=_NUM_CLASS) y_test = tf_keras.utils.to_categorical(y_test) dataset = tf.compat.v1.data.Dataset.from_tensor_slices((x_test, y_test)) dataset = dataset.batch(32) return dataset def get_multi_inputs_multi_outputs_data(): (a_train, c_train), (a_test, c_test) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(16,), num_classes=3, random_seed=_RANDOM_SEED) (b_train, d_train), (b_test, d_test) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(16,), num_classes=2, random_seed=_RANDOM_SEED) (m_train, _), (m_test, _) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(8,), num_classes=2, random_seed=_RANDOM_SEED) c_train = tf_keras.utils.to_categorical(c_train) c_test = tf_keras.utils.to_categorical(c_test) d_train = tf_keras.utils.to_categorical(d_train) d_test = tf_keras.utils.to_categorical(d_test) train_data = { 'input_a': a_train, 'input_b': b_train, 'input_m': m_train, 'output_c': c_train, 'output_d': d_train } test_data = { 'input_a': a_test, 'input_b': b_test, 'input_m': m_test, 'output_c': c_test, 'output_d': d_test } return (train_data, test_data) class TestEstimatorDistributionStrategy(tf.test.TestCase, parameterized.TestCase): def setUp(self): super(TestEstimatorDistributionStrategy, self).setUp() strategy_combinations.set_virtual_cpus_to_at_least(3) self._base_dir = os.path.join(self.get_temp_dir(), 'keras_to_estimator_strategy_test') tf.compat.v1.gfile.MakeDirs(self._base_dir) self._config = run_config_lib.RunConfig( tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir) def tearDown(self): super(TestEstimatorDistributionStrategy, self).tearDown() tf.compat.v1.summary.FileWriterCache.clear() if os.path.isdir(self._base_dir): tf.compat.v1.gfile.DeleteRecursively(self._base_dir) @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( distribution=[strategy_combinations.mirrored_strategy_with_two_cpus], mode=['graph'], cloning=[True, False], ) ) def test_train_functional_with_distribution_strategy(self, distribution, cloning): keras_model = simple_functional_model() keras_model.compile( loss='categorical_crossentropy', metrics=[tf_keras.metrics.CategoricalAccuracy()], optimizer=tf_keras.optimizers.legacy.RMSprop(learning_rate=0.01), cloning=cloning) config = run_config_lib.RunConfig( tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir, train_distribute=distribution, eval_distribute=distribution) with self.cached_session(): est_keras = keras_lib.model_to_estimator( keras_model=keras_model, config=config) before_eval_results = est_keras.evaluate( input_fn=get_ds_test_input_fn, steps=1) est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16) after_eval_results = est_keras.evaluate( input_fn=get_ds_test_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) tf.compat.v1.summary.FileWriterCache.clear() tf.compat.v1.gfile.DeleteRecursively(self._config.model_dir) @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( distribution=[strategy_combinations.mirrored_strategy_with_two_cpus], mode=['graph'], cloning=[True, False], ) ) def test_train_sequential_with_distribution_strategy(self, distribution, cloning): keras_model = simple_sequential_model() keras_model.compile( loss='categorical_crossentropy', metrics=[tf_keras.metrics.CategoricalAccuracy()], optimizer=tf_keras.optimizers.legacy.RMSprop(learning_rate=0.01), cloning=cloning) config = run_config_lib.RunConfig( tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir, train_distribute=distribution) with self.cached_session(): est_keras = keras_lib.model_to_estimator( keras_model=keras_model, config=config) before_eval_results = est_keras.evaluate( input_fn=get_ds_test_input_fn, steps=1) est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16) after_eval_results = est_keras.evaluate( input_fn=get_ds_test_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) tf.compat.v1.summary.FileWriterCache.clear() tf.compat.v1.gfile.DeleteRecursively(self._config.model_dir) @tf.compat.v2.__internal__.distribute.combinations.generate( tf.compat.v2.__internal__.test.combinations.combine( distribution=[strategy_combinations.mirrored_strategy_with_two_cpus], mode=['graph'], ) ) def test_multi_inputs_multi_outputs_with_input_fn_as_dict(self, distribution): train_data, test_data = get_multi_inputs_multi_outputs_data() def train_input_fn(): input_dict = { 'input_a': train_data['input_a'], 'input_b': train_data['input_b'], 'input_m': train_data['input_m'].astype(str) } output_dict = { 'dense_2': train_data['output_c'], 'dense_3': train_data['output_d'] } return tf.compat.v1.data.Dataset.from_tensor_slices( (input_dict, output_dict)).batch(16) def eval_input_fn(): input_dict = { 'input_a': test_data['input_a'], 'input_b': test_data['input_b'], 'input_m': test_data['input_m'].astype(str) } output_dict = { 'dense_2': test_data['output_c'], 'dense_3': test_data['output_d'] } return tf.compat.v1.data.Dataset.from_tensor_slices( (input_dict, output_dict)).batch(16) self.do_test_multi_inputs_multi_outputs_with_input_fn( distribution, train_input_fn, eval_input_fn) def do_test_multi_inputs_multi_outputs_with_input_fn(self, distribution, train_input_fn, eval_input_fn): config = run_config_lib.RunConfig( tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir, train_distribute=distribution) with self.cached_session(): model = multi_inputs_multi_outputs_model() est_keras = keras_lib.model_to_estimator(keras_model=model, config=config) baseline_eval_results = est_keras.evaluate( input_fn=eval_input_fn, steps=1) est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(eval_results['loss'], baseline_eval_results['loss']) def get_test_data(train_samples, test_samples, input_shape, num_classes, random_seed=None): if random_seed is not None: np.random.seed(random_seed) num_sample = train_samples + test_samples templates = 2 * num_classes * np.random.random((num_classes,) + input_shape) y = np.random.randint(0, num_classes, size=(num_sample,)) x = np.zeros((num_sample,) + input_shape, dtype=np.float32) for i in range(num_sample): x[i] = templates[y[i]] + np.random.normal(loc=0, scale=1., size=input_shape) return ((x[:train_samples], y[:train_samples]), (x[train_samples:], y[train_samples:])) if __name__ == '__main__': test.main() ================================================ FILE: tensorflow_estimator/python/estimator/keras_lib.py ================================================ # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== # pylint: disable=protected-access """Home of estimator related functions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import os import re from absl import logging import tensorflow as tf from tensorflow.python.checkpoint import checkpoint as trackable_util from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.export import export_lib from tensorflow_estimator.python.estimator.mode_keys import ModeKeys from tensorflow_estimator.python.estimator.util import tf_keras_v2 from tensorflow_estimator.python.estimator.util import tf_keras_v1 _DEFAULT_SERVING_KEY = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY class FormattedKeyError(KeyError): """KeyError with formatted error message. Python's `KeyError` has special casing around formatting (see https://bugs.python.org/issue2651). Use this class when the error message has newlines and other special format characters. Needed by https://github.com/tensorflow/tensorflow/issues/36857. """ def __init__(self, message): self.message = message def __str__(self): return self.message def _cast_tensor_to_floatx(x): """Cast tensor to keras's floatx dtype if it is not already the same dtype.""" if x.dtype == tf_keras.backend.floatx(): return x else: return tf.cast(x, tf_keras.backend.floatx()) def _convert_tensor(x): """Create or cast tensor if needed.""" if not tf.is_tensor(x): # x is a numpy array x = tf.compat.v1.convert_to_tensor_or_sparse_tensor(x) return x def _any_weight_initialized(keras_model): """Check if any weights has been initialized in the Keras model. Args: keras_model: An instance of compiled keras model. Returns: boolean, True if at least one weight has been initialized, else False. Currently keras initialize all weights at get_session(). """ if keras_model is None: return False if tf.compat.v1.executing_eagerly_outside_functions(): return True for layer in keras_model.layers: for weight in layer.weights: if hasattr(weight, '_keras_initialized'): return True return False def _convert_estimator_io_to_keras(keras_model, features, labels): """Converts estimator features and labels to keras input and target tensors. Args: keras_model: a compiled `tf_keras.Model` instance, used to determine the order of the returned lists. features: Dict of tensors or `None`. labels: Dict of tensors, a single tensor, or `None`. Returns: Tuple of ( list of input tensors or `None`, list of target tensors or `None`, list of sample weight tensors or `None`) The order of tensors is determined by the order set in the keras model. """ def _to_ordered_tensor_list(obj, key_order, obj_name, order_name): """Convert obj to an ordered list of tensors. Args: obj: List, dict, or single tensor. May be `None`. key_order: List of strings with the order to return (used if obj is a dict). obj_name: String name of object (e.g. "features" or "labels") order_name: String name of the key order (e.g. "inputs" or "outputs") Returns: List of tensors, or `None` Raises: KeyError: If obj has invalid keys. """ if obj is None: return None elif isinstance(obj, (list, tuple)): return [_convert_tensor(x) for x in obj] elif isinstance(obj, dict): # Ensure that keys in key_order are contained in obj keys. # One can provide more data keys described in obj, as long as the keys # requested by model are provided. different_keys = set(key_order) - set(obj.keys()) if different_keys: raise FormattedKeyError( 'The dictionary passed into {obj_name} does not cover requested ' '{order_name} keys defined in the keras model.' '\n\tExpected keys: {order_keys}' '\n\t{obj_name} keys: {obj_keys}' '\n\tMissed keys: {different_keys}'.format( order_name=order_name, order_keys=set(key_order), obj_name=obj_name, obj_keys=set(obj.keys()), different_keys=different_keys)) return [_convert_tensor(obj[key]) for key in key_order] else: # Assume obj is a tensor. return [_convert_tensor(obj)] features, sample_weight_tensors = _extract_sample_weight_tensors(features) input_names = None output_names = None if isinstance(features, dict): input_names = ( keras_model.input_names if keras_model._is_graph_network else ['input_%d' % i for i in range(1, len(features) + 1)]) if isinstance(labels, dict): output_names = ( keras_model.output_names if keras_model._is_graph_network else ['output_%d' % i for i in range(1, len(labels) + 1)]) if isinstance(keras_model.inputs, dict): # Keep input tensors as a dict if keras_model is built with dict input. input_tensors = { k: _convert_tensor(features[k]) for (k, v) in keras_model.inputs.items() } elif keras_model.inputs is None and isinstance(features, dict): # Keep input tensors as a dict if keras_model input structure is unknown. input_tensors = {k: _convert_tensor(v) for (k, v) in features.items()} else: # converting input tensors into sorted list. input_tensors = _to_ordered_tensor_list(features, input_names, 'features', 'inputs') target_tensors = _to_ordered_tensor_list(labels, output_names, 'labels', 'outputs') return input_tensors, target_tensors, sample_weight_tensors def _extract_sample_weight_tensors(features): if isinstance(features, dict) and set( features.keys()) == {'features', 'sample_weights'}: feature_tensor = features['features'] sample_weight_tensors = features['sample_weights'] else: feature_tensor = features sample_weight_tensors = None return feature_tensor, sample_weight_tensors def _clone_and_build_model(mode, keras_model, custom_objects, features=None, labels=None, optimizer_config=None): """Clone and build the given keras_model. Args: mode: training mode. keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. features: Dict of tensors. labels: Dict of tensors, or single tensor instance. optimizer_config: Optimizer config dictionary, returned by `optimizer.get_config()`. This is used when cloning a model with an optimizer. Since `_clone_and_build_model` is called in a different graph and session from the model, `optimizer.get_config()` may raise an error during the attempt to serialize the optimizer hyperparameter values. Returns: The newly built model. """ # Set to True during training, False for inference or testing. tf_keras.backend.set_learning_phase(mode == ModeKeys.TRAIN) input_tensors, target_tensors, sample_weight_tensors = ( _convert_estimator_io_to_keras(keras_model, features, labels)) compile_clone = (mode != ModeKeys.PREDICT) global_step = None if compile_clone: # Set iterations to the global step created by tf.train.create_global_step() # which is automatically run in the estimator framework. global_step = tf.compat.v1.train.get_or_create_global_step() tf_keras_v2.__internal__.backend.track_variable(global_step) clone = tf_keras_v2.__internal__.models.clone_and_build_model( keras_model, input_tensors, target_tensors, custom_objects, compile_clone=compile_clone, in_place_reset=(not keras_model._is_graph_network), optimizer_iterations=global_step, optimizer_config=optimizer_config) if sample_weight_tensors is not None: sample_weight_tensors = standardize_sample_weights( sample_weight_tensors, clone.output_names) # Update calculated loss (model.total_loss) to include sample weights. clone._compile_weights_loss_and_weighted_metrics(sample_weight_tensors) return clone def _convert_keras_metrics_to_estimator(model, metric_names_map=None): """Convert metrics from a Keras model to ops used by the Estimator framework. Args: model: A `tf_keras.Model` object. metric_names_map: Optional dictionary mapping Keras model output metric names to custom names. Returns: Dictionary mapping metric names to tuples of (value, update) ops. May return `None` if the model does not contain any metrics. """ if not getattr(model, '_compile_metrics', None): return None # We are not using model.metrics here because we want to exclude the metrics # added using `add_metric` API. compiled_metrics = model._compile_metric_functions if metric_names_map: custom_map_keys = set(metric_names_map.keys()) expected_keys = {m.name for m in compiled_metrics} unknown = expected_keys.difference(custom_map_keys) if unknown: raise ValueError( 'Invalid `metric_names_map`. ' 'The following keras model metric names:"{}" do not exist in ' 'the `metric_names_map` dictionary'.format(list(unknown))) extra = custom_map_keys.difference(expected_keys) if extra: raise ValueError('Invalid `metric_names_map`. ' 'There are unexpected keys in the `metric_names_map` ' 'dictionary. Expected keys: {}, Received: {}'.format( list(expected_keys), list(extra))) return {metric_names_map[m.name]: m for m in compiled_metrics} else: return {m.name: m for m in compiled_metrics} def _create_keras_model_fn(keras_model, custom_objects=None, save_object_ckpt=False, metric_names_map=None, export_outputs=None): """Creates model_fn for keras Estimator. Args: keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. save_object_ckpt: Whether to save an object-based checkpoint. metric_names_map: Optional dictionary mapping Keras model output metric names to custom names. export_outputs: Optional dictionary mapping custom names to a subclass of `tf.estimator.export.ExportOutput`. Returns: The model_fn for a keras Estimator. """ if isinstance(keras_model.optimizer, tf_keras.optimizers.experimental.Optimizer): # Experimental optimizer cannot work with estimator, so we convert it to # legacy optimizer. if tf.executing_eagerly(): logging.warning( 'You are using `tf_keras.optimizers.experimental.Optimizer` in TF ' 'estimator, which only supports ' '`tf_keras.optimizers.legacy.Optimizer`. Automatically converting ' 'your optimizer to `tf_keras.optimizers.legacy.Optimizer`.') opt = tf_keras.__internal__.optimizers.convert_to_legacy_optimizer( keras_model.optimizer) keras_model.optimizer = opt else: raise ValueError('Please set your optimizer as an instance of ' '`tf_keras.optimizers.legacy.Optimizer`, e.g., ' '`tf_keras.optimizers.legacy.Adam`. Received optimizer ' f'type: {type(keras_model.optimizer)}.') # Get optimizer config in the current context (since model_fn is called in the # estimator graph and session). OptimizerV2 objects serialize variable/tensor # hyperparameters in their configs, resulting to wrong-session errors during # model cloning. try: if isinstance(keras_model.optimizer, (tuple, list)): optimizer_config = [opt.get_config() for opt in keras_model.optimizer] else: optimizer_config = keras_model.optimizer.get_config() except (NotImplementedError, AttributeError): # TFOptimizers and other custom optimizers do not have a config. optimizer_config = None def model_fn(features, labels, mode): """model_fn for keras Estimator.""" model = _clone_and_build_model( mode=mode, keras_model=keras_model, custom_objects=custom_objects, features=features, labels=labels, optimizer_config=optimizer_config) model_output_names = [] # We need to make sure that the output names of the last layer in the model # is the same for each of the cloned models. This is required for mirrored # strategy when we call regroup. if tf.distribute.has_strategy(): for name in model.output_names: name = re.compile(r'_\d$').sub('', name) model_output_names.append(name) else: model_output_names = model.output_names # Get inputs to EstimatorSpec predictions = dict(zip(model_output_names, model.outputs)) loss = None train_op = None eval_metric_ops = None # Set loss and metric only during train and evaluate. if mode is not ModeKeys.PREDICT: if mode is ModeKeys.TRAIN: model._make_train_function() # pylint: disable=protected-access else: model._make_test_function() # pylint: disable=protected-access loss = model.total_loss eval_metric_ops = _convert_keras_metrics_to_estimator( model, metric_names_map) # Set train_op only during train. if mode is ModeKeys.TRAIN: train_op = model.train_function.updates_op if (not model._is_graph_network and hasattr(keras_model, '_original_attributes_cache') and keras_model._original_attributes_cache is not None): # To avoid `model_fn` being destructive for the initial model argument. (tf_keras_v2.__internal__.models. in_place_subclassed_model_state_restoration(keras_model)) scaffold = None if save_object_ckpt: model._track_trackable(tf.compat.v1.train.get_global_step(), 'estimator_global_step') # Create saver that maps variable names to object-checkpoint keys. object_graph = tf.compat.v2.__internal__.tracking.ObjectGraphView(model) var_list = object_graph.frozen_saveable_objects() saver = tf.compat.v1.train.Saver(var_list=var_list, sharded=True) saver._object_restore_saver = trackable_util.frozen_saver(model) scaffold = tf.compat.v1.train.Scaffold(saver=saver) final_export_outputs = { _DEFAULT_SERVING_KEY: export_lib.PredictOutput(predictions) } if export_outputs is not None: different_keys = set(export_outputs.keys()) - set(model.output_names) if different_keys: raise FormattedKeyError( 'The list passed into {obj_name} does not cover requested ' '{order_name} keys defined in the keras model.' '\n\tExpected keys: {order_keys}' '\n\t{obj_name} keys: {obj_keys}' '\n\tMissed keys: {different_keys}'.format( order_name=export_outputs, order_keys=set(export_outputs.keys()), obj_name=model.output_names, obj_keys=set(model.output_names), different_keys=different_keys)) for key, export_output_cls in export_outputs.items(): final_export_outputs[key] = export_output_cls(predictions[key]) return model_fn_lib.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=final_export_outputs, scaffold=scaffold) return model_fn def _save_first_checkpoint(keras_model, custom_objects, config, save_object_ckpt): """Save first checkpoint for the keras Estimator. Args: keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. config: Estimator config. save_object_ckpt: Whether to save an object-based checkpoint. Returns: The path where keras model checkpoint is saved. """ # save checkpoint into subdirectory to allow warm start keras_model_dir = os.path.join(config.model_dir, 'keras') # Load weights and save to checkpoint if there is no checkpoint latest_path = tf.train.latest_checkpoint(keras_model_dir) if not latest_path: keras_weights = None if _any_weight_initialized(keras_model): keras_weights = keras_model.get_weights() if not tf.compat.v1.gfile.IsDirectory(keras_model_dir): tf.compat.v1.gfile.MakeDirs(keras_model_dir) with tf.Graph().as_default(): tf.compat.v1.random.set_random_seed(config.tf_random_seed) tf.compat.v1.train.create_global_step() model = _clone_and_build_model(ModeKeys.TRAIN, keras_model, custom_objects) # Init the train_function outside of the context of session. This is due # to the fact that train function will update the graph by adding backprop # parts. This will potentially trying to update the node in forward graph # which will fail if it is done within same session. # Always create the train_function here since the model is just cloned. # See https://github.com/tensorflow/tensorflow/issues/27750 for details. model._make_train_function() # pylint: disable=protected-access # save to checkpoint with tf.compat.v1.Session(config=config.session_config) as sess: if keras_weights: model.set_weights(keras_weights) # model._make_train_function() will potentially create the optimizer # variable, which will require another variable initialization. tf_keras_v2.__internal__.backend.initialize_variables(sess) if save_object_ckpt: model._track_trackable( # pylint: disable=protected-access tf.compat.v1.train.get_global_step(), 'estimator_global_step') latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') model.save_weights(latest_path) else: saver = tf.compat.v1.train.Saver() latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') saver.save(sess, latest_path) return latest_path def _get_file_from_google_storage(keras_model_path, model_dir): """Get file from google storage and download to local file. Args: keras_model_path: a google storage path for compiled keras model. model_dir: the directory from estimator config. Returns: The path where keras model is saved. Raises: ValueError: if storage object name does not end with .h5. """ try: from google.cloud import storage # pylint:disable=g-import-not-at-top except ImportError: raise TypeError('Could not save model to Google cloud storage; please ' 'install `google-cloud-storage` via ' '`pip install google-cloud-storage`.') storage_client = storage.Client() path, blob_name = os.path.split(keras_model_path) _, bucket_name = os.path.split(path) keras_model_dir = os.path.join(model_dir, 'keras') if not tf.compat.v1.gfile.Exists(keras_model_dir): tf.compat.v1.gfile.MakeDirs(keras_model_dir) file_name = os.path.join(keras_model_dir, 'keras_model.h5') try: blob = storage_client.get_bucket(bucket_name).blob(blob_name) blob.download_to_filename(file_name) except: raise ValueError('Failed to download keras model, please check ' 'environment variable GOOGLE_APPLICATION_CREDENTIALS ' 'and model path storage.googleapis.com/{bucket}/{object}.') tf.compat.v1.logging.info('Saving model to {}'.format(file_name)) del storage_client return file_name def model_to_estimator(keras_model=None, keras_model_path=None, custom_objects=None, model_dir=None, config=None, checkpoint_format=None, use_v2_estimator=False, metric_names_map=None, export_outputs=None): """Constructs an `Estimator` instance from given keras model. If you use infrastructure or other tooling that relies on Estimators, you can still build a Keras model and use model_to_estimator to convert the Keras model to an Estimator for use with downstream systems. For usage example, please see: [Creating estimators from Keras Models](https://www.tensorflow.org/guide/estimator#create_an_estimator_from_a_keras_model). Sample Weights: Estimators returned by `model_to_estimator` are configured so that they can handle sample weights (similar to `keras_model.fit(x, y, sample_weights)`). To pass sample weights when training or evaluating the Estimator, the first item returned by the input function should be a dictionary with keys `features` and `sample_weights`. Example below: ```python keras_model = tf_keras.Model(...) keras_model.compile(...) estimator = tf_keras.estimator.model_to_estimator(keras_model) def input_fn(): return dataset_ops.Dataset.from_tensors( ({'features': features, 'sample_weights': sample_weights}, targets)) estimator.train(input_fn, steps=1) ``` Example with customized export signature: ```python inputs = {'a': tf_keras.Input(..., name='a'), 'b': tf_keras.Input(..., name='b')} outputs = {'c': tf_keras.layers.Dense(..., name='c')(inputs['a']), 'd': tf_keras.layers.Dense(..., name='d')(inputs['b'])} keras_model = tf_keras.Model(inputs, outputs) keras_model.compile(...) export_outputs = {'c': tf.estimator.export.RegressionOutput, 'd': tf.estimator.export.ClassificationOutput} estimator = tf_keras.estimator.model_to_estimator( keras_model, export_outputs=export_outputs) def input_fn(): return dataset_ops.Dataset.from_tensors( ({'features': features, 'sample_weights': sample_weights}, targets)) estimator.train(input_fn, steps=1) ``` Note: We do not support creating weighted metrics in Keras and converting them to weighted metrics in the Estimator API using `model_to_estimator`. You will have to create these metrics directly on the estimator spec using the `add_metrics` function. Args: keras_model: A compiled Keras model object. This argument is mutually exclusive with `keras_model_path`. Estimator's `model_fn` uses the structure of the model to clone the model. Defaults to `None`. keras_model_path: Path to a compiled Keras model saved on disk, in HDF5 format, which can be generated with the `save()` method of a Keras model. This argument is mutually exclusive with `keras_model`. Defaults to `None`. custom_objects: Dictionary for cloning customized objects. This is used with classes that is not part of this pip package. For example, if user maintains a `relu6` class that inherits from `tf_keras.layers.Layer`, then pass `custom_objects={'relu6': relu6}`. Defaults to `None`. model_dir: Directory to save `Estimator` model parameters, graph, summary files for TensorBoard, etc. If unset a directory will be created with `tempfile.mkdtemp` config: `RunConfig` to config `Estimator`. Allows setting up things in `model_fn` based on configuration such as `num_ps_replicas`, or `model_dir`. Defaults to `None`. If both `config.model_dir` and the `model_dir` argument (above) are specified the `model_dir` **argument** takes precedence. checkpoint_format: Sets the format of the checkpoint saved by the estimator when training. May be `saver` or `checkpoint`, depending on whether to save checkpoints from `tf.compat.v1.train.Saver` or `tf.train.Checkpoint`. The default is `checkpoint`. Estimators use name-based `tf.train.Saver` checkpoints, while Keras models use object-based checkpoints from `tf.train.Checkpoint`. Currently, saving object-based checkpoints from `model_to_estimator` is only supported by Functional and Sequential models. use_v2_estimator: Whether to convert the model to a V2 Estimator or V1 Estimator. Defaults to `False`. metric_names_map: Optional dictionary mapping Keras model output metric names to custom names. This can be used to override the default Keras model output metrics names in a multi IO model use case and provide custom names for the `eval_metric_ops` in Estimator. The Keras model metric names can be obtained using `model.metrics_names` excluding any loss metrics such as total loss and output losses. For example, if your Keras model has two outputs `out_1` and `out_2`, with `mse` loss and `acc` metric, then `model.metrics_names` will be `['loss', 'out_1_loss', 'out_2_loss', 'out_1_acc', 'out_2_acc']`. The model metric names excluding the loss metrics will be `['out_1_acc', 'out_2_acc']`. export_outputs: Optional dictionary. This can be used to override the default Keras model output exports in a multi IO model use case and provide custom names for the `export_outputs` in `tf.estimator.EstimatorSpec`. Default is None, which is equivalent to {'serving_default': `tf.estimator.export.PredictOutput`}. A dict `{name: output}` where: * name: An arbitrary name for this output. This becomes the signature name in the SavedModel. * output: an `ExportOutput` object such as `ClassificationOutput`, `RegressionOutput`, or `PredictOutput`. Single-headed models only need to specify one entry in this dictionary. Multi-headed models should specify one entry for each head, one of which must be named using `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry is provided, a default `PredictOutput` mapping to `predictions` will be created. Returns: An Estimator from given keras model. Raises: ValueError: If neither keras_model nor keras_model_path was given. ValueError: If both keras_model and keras_model_path was given. ValueError: If the keras_model_path is a GCS URI. ValueError: If keras_model has not been compiled. ValueError: If an invalid checkpoint_format was given. """ if not (keras_model or keras_model_path): raise ValueError( 'Either `keras_model` or `keras_model_path` needs to be provided.') if keras_model and keras_model_path: raise ValueError( 'Please specity either `keras_model` or `keras_model_path`, ' 'but not both.') if keras_model: _assert_valid_model(keras_model, custom_objects) config = estimator_lib.maybe_overwrite_model_dir_and_session_config( config, model_dir) if not keras_model: if keras_model_path.startswith( 'gs://') or 'storage.googleapis.com' in keras_model_path: keras_model_path = _get_file_from_google_storage(keras_model_path, config.model_dir) tf.compat.v1.logging.info('Loading models from %s', keras_model_path) keras_model = tf_keras.models.load_model(keras_model_path) else: tf.compat.v1.logging.info('Using the Keras model provided.') keras_model = keras_model if checkpoint_format is None or checkpoint_format == 'checkpoint': if not (keras_model._is_graph_network or isinstance(keras_model, tf_keras.models.Sequential)): raise ValueError('Object-based checkpoints are currently not supported ' 'with subclassed models.') save_object_ckpt = True elif checkpoint_format == 'saver': save_object_ckpt = False else: raise ValueError( 'Checkpoint format must be one of "checkpoint" or "saver". Got {}' .format(checkpoint_format)) if not hasattr(keras_model, 'optimizer') or not keras_model.optimizer: raise ValueError('The given keras model has not been compiled yet. ' 'Please compile the model with `model.compile()` ' 'before calling `model_to_estimator()`.') keras_model_fn = _create_keras_model_fn( keras_model, custom_objects, save_object_ckpt, metric_names_map, export_outputs) if _any_weight_initialized(keras_model): # Warn if config passed to estimator tries to update GPUOptions. If a # session has already been created, the GPUOptions passed to the first # session sticks. if config.session_config.HasField('gpu_options'): tf.compat.v1.logging.warn( 'The Keras backend session has already been set. ' 'The _session_config passed to model_to_estimator will not be used.') else: # Pass the config into keras backend's default session. sess = tf.compat.v1.Session(config=config.session_config) tf_keras_v1.backend.set_session(sess) warm_start_path = None if keras_model._is_graph_network and config.is_chief: warm_start_path = _save_first_checkpoint(keras_model, custom_objects, config, save_object_ckpt) elif keras_model.built: tf.compat.v1.logging.warn( 'You are creating an Estimator from a Keras model manually ' 'subclassed from `Model`, that was already called on some ' 'inputs (and thus already had weights). We are currently ' 'unable to preserve the model\'s state (its weights) as ' 'part of the estimator in this case. Be warned that the ' 'estimator has been created using a freshly initialized ' 'version of your model.\n' 'Note that this doesn\'t affect the state of the model ' 'instance you passed as `keras_model` argument.') if use_v2_estimator: estimator_cls = estimator_lib.EstimatorV2 else: estimator_cls = estimator_lib.Estimator estimator = estimator_cls( keras_model_fn, config=config, warm_start_from=warm_start_path) return estimator def _assert_valid_model(model, custom_objects=None): is_subclass = (not model._is_graph_network and not isinstance(model, tf_keras.models.Sequential)) if is_subclass: try: custom_objects = custom_objects or {} with tf_keras.utils.CustomObjectScope(custom_objects): model.__class__.from_config(model.get_config()) except NotImplementedError: raise ValueError( 'Subclassed `Model`s passed to `model_to_estimator` must ' 'implement `Model.get_config` and `Model.from_config`.') def standardize_sample_weights(x_weight, output_names): """Maps `sample_weight` or `class_weight` to model outputs. Args: x_weight: User-provided `sample_weight` or `class_weight` argument. output_names: List of output names (strings) in the model. Returns: A list of `sample_weight` or `class_weight` where there are exactly one element per model output. Raises: ValueError: In case of invalid user-provided argument. """ if x_weight is None or (isinstance(x_weight, (list, tuple)) and len(x_weight) == 0): # pylint: disable=g-explicit-length-test return [None for _ in output_names] if len(output_names) == 1: if isinstance(x_weight, (list, tuple)) and len(x_weight) == 1: return x_weight if isinstance(x_weight, dict) and output_names[0] in x_weight: return [x_weight[output_names[0]]] else: return [x_weight] if isinstance(x_weight, (list, tuple)): if len(x_weight) != len(output_names): raise ValueError('Provided `sample_weights` was a list of ' + str(len(x_weight)) + ' elements, but the model has ' + str(len(output_names)) + ' outputs. ' 'You should provide one `sample_weights`' 'array per model output.') return x_weight if isinstance(x_weight, collections.abc.Mapping): unknown = set(x_weight.keys()).difference(output_names) if unknown: raise ValueError('Unknown entries in sample_weights dictionary: {}. ' 'Only expected following keys: {}'.format( list(unknown), output_names)) x_weights = [] for name in output_names: x_weights.append(x_weight.get(name)) return x_weights else: raise TypeError('The model has multiple outputs, so `sample_weights` ' 'should be either a list or a dict. ' 'Provided `sample_weights` type not understood: ' + str(x_weight)) ================================================ FILE: tensorflow_estimator/python/estimator/keras_premade_model_test.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for keras premade model in model_to_estimator routines.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import numpy as np import tensorflow as tf from tensorflow_estimator.python.estimator import keras_lib from tensorflow_estimator.python.estimator import run_config as run_config_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.util import tf_keras_v1 from tensorflow_estimator.python.estimator.inputs import numpy_io _RANDOM_SEED = 1337 def gen_input_fn(x, y=None, batch_size=32, num_epochs=10, shuffle=False): def input_fn(): ds = tf.compat.v1.data.Dataset.from_tensor_slices(( x, y) if y is not None else x) if shuffle: ds = ds.shuffle(1000) return ds.repeat(num_epochs).batch(batch_size) return input_fn def get_resource_for_simple_model(): input_name = 'input_1' output_name = 'output_1' np.random.seed(_RANDOM_SEED) x_train = np.random.uniform(low=-5, high=5, size=(64, 2)).astype('f') y_train = .3 * x_train[:, 0] + .2 * x_train[:, 1] x_test = np.random.uniform(low=-5, high=5, size=(64, 2)).astype('f') y_test = .3 * x_test[:, 0] + .2 * x_test[:, 1] train_input_fn = gen_input_fn( x=x_train, y=y_train, num_epochs=None, shuffle=False) evaluate_input_fn = gen_input_fn( x=randomize_io_type(x_test, input_name), y=randomize_io_type(y_test, output_name), num_epochs=1, shuffle=False) return (x_train, y_train), (x_test, y_test), train_input_fn, evaluate_input_fn def randomize_io_type(array, name): switch = np.random.random() if switch > 0.5: return array else: return {name: array} class KerasPremadeModelTest(tf.test.TestCase): def setUp(self): self._base_dir = os.path.join(self.get_temp_dir(), 'keras_estimator_test') tf.compat.v1.gfile.MakeDirs(self._base_dir) self._config = run_config_lib.RunConfig( tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir) super(KerasPremadeModelTest, self).setUp() def tearDown(self): # Make sure nothing is stuck in limbo. tf.compat.v1.summary.FileWriterCache.clear() if os.path.isdir(self._base_dir): tf.compat.v1.gfile.DeleteRecursively(self._base_dir) tf_keras.backend.clear_session() super(KerasPremadeModelTest, self).tearDown() def test_train_premade_linear_model_with_dense_features(self): vocab_list = ['alpha', 'beta', 'gamma'] vocab_val = [0.4, 0.6, 0.9] data = np.random.choice(vocab_list, size=256) y = np.zeros_like(data, dtype=np.float32) for vocab, val in zip(vocab_list, vocab_val): indices = np.where(data == vocab) y[indices] = val + np.random.uniform( low=-0.01, high=0.01, size=indices[0].shape) cat_column = tf.feature_column.categorical_column_with_vocabulary_list( key='symbol', vocabulary_list=vocab_list) ind_column = tf.feature_column.indicator_column(cat_column) keras_input = tf_keras.layers.Input( name='symbol', shape=3, dtype=tf.dtypes.string) feature_layer = tf_keras_v1.layers.DenseFeatures([ind_column]) h = feature_layer({'symbol': keras_input}) linear_model = tf_keras.experimental.LinearModel(units=1) h = linear_model(h) model = tf_keras.models.Model(inputs=keras_input, outputs=h) opt = tf_keras.optimizers.legacy.SGD(0.1) model.compile(opt, 'mse', ['mse']) train_input_fn = numpy_io.numpy_input_fn( x={'symbol': data}, y=y, num_epochs=20, shuffle=False) eval_input_fn = numpy_io.numpy_input_fn( x={'symbol': data}, y=y, num_epochs=20, shuffle=False) est = keras_lib.model_to_estimator( keras_model=model, config=self._config, checkpoint_format='saver') before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) est.train(input_fn=train_input_fn, steps=30) after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) self.assertLess(after_eval_results['loss'], 0.05) def test_train_premade_linear_model(self): (x_train, y_train), _, train_inp_fn, eval_inp_fn = get_resource_for_simple_model() linear_model = tf_keras.experimental.LinearModel(units=1) opt = tf_keras.optimizers.legacy.SGD(0.1) linear_model.compile(opt, 'mse', ['mse']) linear_model.fit(x_train, y_train, epochs=10) est = keras_lib.model_to_estimator( keras_model=linear_model, config=self._config, checkpoint_format='saver') before_eval_results = est.evaluate(input_fn=eval_inp_fn, steps=1) est.train(input_fn=train_inp_fn, steps=500) after_eval_results = est.evaluate(input_fn=eval_inp_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) self.assertLess(after_eval_results['loss'], 0.1) def test_train_premade_widedeep_model_with_feature_layers(self): vocab_list = ['alpha', 'beta', 'gamma'] vocab_val = [0.4, 0.6, 0.9] data = np.random.choice(vocab_list, size=256) y = np.zeros_like(data, dtype=np.float32) for vocab, val in zip(vocab_list, vocab_val): indices = np.where(data == vocab) y[indices] = val + np.random.uniform( low=-0.01, high=0.01, size=indices[0].shape) cat_column = tf.feature_column.categorical_column_with_vocabulary_list( key='symbol', vocabulary_list=vocab_list) ind_column = tf.feature_column.indicator_column(cat_column) # TODO(tanzheny): use emb column for dense part once b/139667019 is fixed. # emb_column = feature_column.embedding_column(cat_column, dimension=5) keras_input = tf_keras.layers.Input( name='symbol', shape=3, dtype=tf.dtypes.string) # build linear part with feature layer. linear_feature_layer = tf_keras_v1.layers.DenseFeatures([ind_column]) linear_model = tf_keras.experimental.LinearModel( units=1, name='Linear', kernel_initializer='zeros') combined_linear = tf_keras.models.Sequential([linear_feature_layer, linear_model]) # build dnn part with feature layer. dnn_feature_layer = tf_keras_v1.layers.DenseFeatures([ind_column]) dense_layer = tf_keras.layers.Dense( units=1, name='DNNDense', kernel_initializer='zeros') combined_dnn = tf_keras.models.Sequential([dnn_feature_layer, dense_layer]) # build and compile wide deep. wide_deep_model = tf_keras.experimental.WideDeepModel(combined_linear, combined_dnn) wide_deep_model._set_inputs({'symbol': keras_input}) sgd_opt = tf_keras.optimizers.legacy.SGD(0.1) adam_opt = tf_keras.optimizers.legacy.Adam(0.1) wide_deep_model.compile([sgd_opt, adam_opt], 'mse', ['mse']) # build estimator. train_input_fn = numpy_io.numpy_input_fn( x={'symbol': data}, y=y, num_epochs=20, shuffle=False) eval_input_fn = numpy_io.numpy_input_fn( x={'symbol': data}, y=y, num_epochs=20, shuffle=False) est = keras_lib.model_to_estimator( keras_model=wide_deep_model, config=self._config, checkpoint_format='saver') before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) est.train(input_fn=train_input_fn, steps=20) after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) self.assertLess(after_eval_results['loss'], 0.1) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/keras_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for training routines.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import math import os import tempfile from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow.python.ops.parsing_ops import gen_parsing_ops from tensorflow.python.saved_model import path_helpers from tensorflow.python.saved_model.model_utils import export_output from tensorflow.python.training import saver as saver_lib from tensorflow_estimator.python.estimator import keras_lib from tensorflow_estimator.python.estimator import run_config as run_config_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.util import tf_keras_v1 from tensorflow_estimator.python.estimator.export import export_lib from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.mode_keys import ModeKeys from tensorflow_estimator.python.estimator.util import tf_keras_v2 try: import h5py # pylint:disable=g-import-not-at-top except ImportError: h5py = None _RANDOM_SEED = 1337 _TRAIN_SIZE = 200 _INPUT_SIZE = (10,) _NUM_CLASS = 2 _TMP_DIR = '/tmp' def simple_sequential_model(): model = tf_keras.models.Sequential() model.add(tf_keras.layers.Dense(16, activation='relu', input_shape=_INPUT_SIZE)) model.add(tf_keras.layers.Dropout(0.1)) model.add(tf_keras.layers.Dense(_NUM_CLASS, activation='softmax')) return model def simple_functional_model(activation='relu'): a = tf_keras.layers.Input(shape=_INPUT_SIZE, name='input_layer') b = tf_keras.layers.Dense(16, activation=activation)(a) b = tf_keras.layers.Dropout(0.1)(b) b = tf_keras.layers.Dense(_NUM_CLASS, activation='softmax')(b) model = tf_keras.models.Model(inputs=[a], outputs=[b]) return model def simple_subclassed_model(): class SimpleModel(tf_keras.models.Model): def __init__(self): super(SimpleModel, self).__init__() self.dense1 = tf_keras.layers.Dense(16, activation='relu') self.dp = tf_keras.layers.Dropout(0.1) self.dense2 = tf_keras.layers.Dense(_NUM_CLASS, activation='softmax') def call(self, inputs): x = self.dense1(inputs) x = self.dp(x) return self.dense2(x) def get_config(self): return {} @classmethod def from_config(cls, config): return cls() return SimpleModel() def gen_input_fn(x, y=None, batch_size=128, num_epochs=1, shuffle=False): def input_fn(): ds = tf.compat.v1.data.Dataset.from_tensor_slices(( x, y) if y is not None else x) if shuffle: ds = ds.shuffle(1000) return ds.repeat(num_epochs).batch(batch_size) return input_fn def get_multi_inputs_multi_outputs_data(): (a_train, c_train), (a_test, c_test) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(16,), num_classes=3, random_seed=_RANDOM_SEED) (b_train, d_train), (b_test, d_test) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(16,), num_classes=2, random_seed=_RANDOM_SEED) (m_train, _), (m_test, _) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(8,), num_classes=2, random_seed=_RANDOM_SEED) c_train = tf_keras.utils.to_categorical(c_train) c_test = tf_keras.utils.to_categorical(c_test) d_train = tf_keras.utils.to_categorical(d_train) d_test = tf_keras.utils.to_categorical(d_test) train_data = { 'input_a': a_train, 'input_b': b_train, 'input_m': m_train, 'output_c': c_train, 'output_d': d_train } test_data = { 'input_a': a_test, 'input_b': b_test, 'input_m': m_test, 'output_c': c_test, 'output_d': d_test } return (train_data, test_data) def get_resource_for_simple_model( model_type='sequential', is_evaluate=False, ): if model_type == 'sequential': model = simple_sequential_model() model.build() elif model_type == 'subclass': model = simple_subclassed_model() else: assert model_type == 'functional' model = simple_functional_model() if model_type == 'subclass': input_name = 'input_1' output_name = 'output_1' else: input_name = model.input_names[0] output_name = model.output_names[0] np.random.seed(_RANDOM_SEED) (x_train, y_train), (x_test, y_test) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=_INPUT_SIZE, num_classes=_NUM_CLASS) y_train = tf_keras.utils.to_categorical(y_train) y_test = tf_keras.utils.to_categorical(y_test) train_input_fn = gen_input_fn( x=randomize_io_type(x_train, input_name), y=randomize_io_type(y_train, output_name), shuffle=False, num_epochs=None, batch_size=16) evaluate_input_fn = gen_input_fn( x=randomize_io_type(x_test, input_name), y=randomize_io_type(y_test, output_name), num_epochs=1, shuffle=False) predict_input_fn = gen_input_fn( x=randomize_io_type(x_test, input_name), num_epochs=1, shuffle=False) inference_input_fn = evaluate_input_fn if is_evaluate else predict_input_fn return model, (x_train, y_train), (x_test, y_test), train_input_fn, inference_input_fn def randomize_io_type(array, name): switch = np.random.random() if switch > 0.5: return array else: return {name: array} def multi_inputs_multi_outputs_model(): input_a = tf_keras.layers.Input(shape=(16,), name='input_a') input_b = tf_keras.layers.Input(shape=(16,), name='input_b') input_m = tf_keras.layers.Input(shape=(8,), dtype='string', name='input_m') dense = tf_keras.layers.Dense(8, name='dense_1') interm_a = dense(input_a) # Read m interm_m = tf_keras.layers.Lambda(gen_parsing_ops.string_to_number)(input_m) interm_s = tf_keras.layers.Lambda(lambda k: k[0] * k[1])([interm_m, interm_a]) interm_b = dense(input_b) merged = tf_keras.layers.concatenate([interm_s, interm_b], name='merge') output_c = tf_keras.layers.Dense(3, activation='softmax', name='dense_2')( merged) output_d = tf_keras.layers.Dense(2, activation='softmax', name='dense_3')( merged) model = tf_keras.models.Model( inputs=[input_a, input_b, input_m], outputs=[output_c, output_d]) model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics={ 'dense_2': 'categorical_accuracy', 'dense_3': 'categorical_accuracy' }) return model class MyHook(tf.compat.v1.train.SessionRunHook): def begin(self): _ = tf.compat.v1.get_variable('temp', [1]) class TestKerasEstimator(tf.test.TestCase, parameterized.TestCase): def setUp(self): self._base_dir = os.path.join(self.get_temp_dir(), 'keras_estimator_test') tf.compat.v1.gfile.MakeDirs(self._base_dir) self._config = run_config_lib.RunConfig( tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir) super(TestKerasEstimator, self).setUp() def tearDown(self): # Make sure nothing is stuck in limbo. tf.compat.v1.summary.FileWriterCache.clear() if os.path.isdir(self._base_dir): tf.compat.v1.gfile.DeleteRecursively(self._base_dir) tf_keras.backend.clear_session() super(TestKerasEstimator, self).tearDown() @parameterized.named_parameters( dict( testcase_name='functional', model_type='functional', checkpoint_format='saver'), dict( testcase_name='sequential', model_type='sequential', checkpoint_format='saver'), dict( testcase_name='subclass', model_type='subclass', optimizer='tf_rmsprop', checkpoint_format='saver'), dict( testcase_name='functional_object_ckpt', model_type='functional', checkpoint_format='checkpoint'), dict( testcase_name='sequential_object_ckpt_w_fit', model_type='sequential', checkpoint_format='checkpoint', fit_before_export=True, optimizer='tf_rmsprop'), dict( testcase_name='functional_w_fit', model_type='functional', fit_before_export=True, optimizer='tf_rmsprop', checkpoint_format='saver'), dict( testcase_name='subclass_w_fit', model_type='subclass', fit_before_export=True, optimizer='tf_rmsprop', checkpoint_format='saver'), # b/109935364 dict( testcase_name='hooks', model_type='subclass', hook=MyHook, optimizer='tf_rmsprop', checkpoint_format='saver'), dict( testcase_name='hooks_and_fit', model_type='subclass', hook=MyHook, fit_before_export=True, optimizer='tf_rmsprop', checkpoint_format='saver'), dict( testcase_name='tf_optimizer', model_type='subclass', hook=MyHook, optimizer='tf_rmsprop', fit_before_export=True, checkpoint_format='saver')) def test_train_keras_estimator(self, model_type, checkpoint_format=None, fit_before_export=False, optimizer='rmsprop', hook=None): hooks = [hook()] if hook else None tf_optimizer = False if optimizer == 'tf_rmsprop': tf_optimizer = True optimizer = tf.compat.v1.train.RMSPropOptimizer(1e-3) keras_model, (x_train, y_train), (_, _), train_input_fn, eval_input_fn = ( get_resource_for_simple_model(model_type=model_type, is_evaluate=True)) keras_model.compile( optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) if fit_before_export: keras_model.fit(x_train, y_train, epochs=1) est_keras = keras_lib.model_to_estimator( keras_model=keras_model, config=self._config, checkpoint_format=checkpoint_format) est_keras.train( input_fn=train_input_fn, steps=_TRAIN_SIZE / 16, hooks=hooks) before_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) est_keras.train( input_fn=train_input_fn, steps=_TRAIN_SIZE / 16, hooks=hooks) after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) if checkpoint_format == 'object' and tf_optimizer: latest_checkpoint = tf.train.latest_checkpoint(est_keras.model_dir) keras_model.load_weights(latest_checkpoint) def test_train_with_dense_features(self): feature_dict = { 'sex': np.int64([1, 1, 1, 1, 0]), 'cp': np.int64([0, 3, 3, 2, 1]), 'slope': np.int64([3, 2, 0, 3, 1]), } label = np.int64([0, 1, 0, 0, 0]) train_input_fn = numpy_io.numpy_input_fn( x=feature_dict, y=label, num_epochs=1, shuffle=False) feature_columns = list() input_features = dict() for feature_name, data_array in feature_dict.items(): feature_columns.append( tf.feature_column.indicator_column( tf.feature_column.categorical_column_with_identity( key=feature_name, num_buckets=np.size(np.unique(data_array))))) input_features[feature_name] = tf_keras.layers.Input( name=feature_name, shape=(np.size(np.unique(data_array)),), dtype=tf.dtypes.int64) x = tf_keras_v1.layers.DenseFeatures(feature_columns)(input_features) x = tf_keras.layers.Dense(16, activation='relu')(x) logits = tf_keras.layers.Dense(1, activation='linear')(x) model = tf_keras.models.Model(inputs=input_features, outputs=logits) model.compile( optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) estimator_model = keras_lib.model_to_estimator(keras_model=model) estimator_model.train(input_fn=train_input_fn, steps=5) # TODO(b/139845232): Enable after TF2 nightly's start. def DISABLED_test_train_with_dense_features_embedding(self): feature_dict = { 'sex': np.int64([1, 1, 1, 1, 0]), 'cp': np.int64([0, 3, 3, 2, 1]), 'slope': np.int64([3, 2, 0, 3, 1]), } label = np.int64([0, 1, 0, 0, 0]) train_input_fn = numpy_io.numpy_input_fn( x=feature_dict, y=label, num_epochs=1, shuffle=False) feature_columns = list() input_features = dict() for feature_name, data_array in feature_dict.items(): feature_columns.append( tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_identity( key=feature_name, num_buckets=np.size(np.unique(data_array))), dimension=3)) input_features[feature_name] = tf_keras.layers.Input( name=feature_name, shape=(np.size(np.unique(data_array)),), dtype=tf.dtypes.int64) df = tf_keras_v1.layers.DenseFeatures(feature_columns) x = df(input_features) x = tf_keras.layers.Dense(16, activation='relu')(x) logits = tf_keras.layers.Dense(1, activation='linear')(x) model = tf_keras.models.Model(inputs=input_features, outputs=logits) model.compile( optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) estimator_model = keras_lib.model_to_estimator(keras_model=model) estimator_model.train(input_fn=train_input_fn, steps=5) # We assert that we find the embedding_weights variables in the dependencies # for the DenseFeatures layer. dependency_names = list(df._trackable_children()) self.assertNotIn('embedding_weights', dependency_names) self.assertIn('cp_embedding/embedding_weights', dependency_names) self.assertIn('sex_embedding/embedding_weights', dependency_names) self.assertIn('slope_embedding/embedding_weights', dependency_names) # TODO(b/139845232): Enable after TF2 nightly's start. def DISABLED_test_train_with_dense_features_v2(self): feature_dict = { 'sex': np.int64([1, 1, 1, 1, 0]), 'cp': np.int64([0, 3, 3, 2, 1]), 'slope': np.int64([3, 2, 0, 3, 1]), } label = np.int64([0, 1, 0, 0, 0]) train_input_fn = numpy_io.numpy_input_fn( x=feature_dict, y=label, num_epochs=1, shuffle=False) feature_columns = list() input_features = dict() for feature_name, data_array in feature_dict.items(): feature_columns.append( tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_identity( key=feature_name, num_buckets=np.size(np.unique(data_array))), dimension=3)) input_features[feature_name] = tf_keras.layers.Input( name=feature_name, shape=(np.size(np.unique(data_array)),), dtype=tf.dtypes.int64) df = tf_keras_v2.layers.DenseFeatures(feature_columns) x = df(input_features) x = tf_keras.layers.Dense(16, activation='relu')(x) logits = tf_keras.layers.Dense(1, activation='linear')(x) model = tf_keras.models.Model(inputs=input_features, outputs=logits) model.compile( optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) estimator_model = keras_lib.model_to_estimator(keras_model=model) estimator_model.train(input_fn=train_input_fn, steps=5) # We assert that we find the embedding_weights variables in the dependencies # for the DenseFeatures layer. dependency_names = list(df._trackable_children()) self.assertNotIn('embedding_weights', dependency_names) self.assertIn('cp_embedding/embedding_weights', dependency_names) self.assertIn('sex_embedding/embedding_weights', dependency_names) self.assertIn('slope_embedding/embedding_weights', dependency_names) def test_evaluate(self): keras_model, (x_train, y_train), ( x_test, y_test), _, eval_input_fn = get_resource_for_simple_model( model_type='functional', is_evaluate=True) metrics = [ 'binary_accuracy', 'binary_crossentropy', 'categorical_accuracy', 'categorical_crossentropy', 'cosine_proximity', 'hinge', 'kullback_leibler_divergence', 'mean_absolute_error', 'mean_absolute_percentage_error', 'mean_squared_error', 'mean_squared_logarithmic_error', 'poisson', 'squared_hinge', 'top_k_categorical_accuracy' ] keras_model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=metrics) keras_model.fit(x_train, y_train, epochs=1) keras_eval = keras_model.evaluate(x_test, y_test, batch_size=32) keras_est = keras_lib.model_to_estimator( keras_model=keras_model, config=self._config) est_eval = keras_est.evaluate(input_fn=eval_input_fn) metrics = ['loss'] + metrics # Check loss and all metrics match between keras and estimator. def shift(val): if val == 0: return 0 else: return val / 10**int(math.log10(abs(val))) for i, metric_name in enumerate(metrics): if i == 0: continue # TODO(b/148461691): Investigate 1% diff in loss. self.assertAlmostEqual( shift(keras_eval[i]), shift(est_eval[metric_name]), places=4, msg='%s mismatch, keras model: %s, estimator: %s' % (metric_name, keras_eval[i], est_eval[metric_name])) def test_evaluate_multi_io_model(self): input_a = tf_keras.layers.Input(shape=(16,), name='input_a') input_b = tf_keras.layers.Input(shape=(16,), name='input_b') dense = tf_keras.layers.Dense(8, name='dense_1') interm_a = dense(input_a) interm_b = dense(input_b) merged = tf_keras.layers.concatenate([interm_a, interm_b], name='merge') output_a = tf_keras.layers.Dense( 3, activation='softmax', name='dense_2')( merged) output_b = tf_keras.layers.Dense( 2, activation='softmax', name='dense_3')( merged) keras_model = tf_keras.models.Model( inputs=[input_a, input_b], outputs=[output_a, output_b]) keras_model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics={ 'dense_2': 'categorical_accuracy', 'dense_3': 'categorical_accuracy' }) np.random.seed(_RANDOM_SEED) (x_train_1, y_train_1), (x_test_1, y_test_1) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(16,), num_classes=3) (x_train_2, y_train_2), (x_test_2, y_test_2) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(16,), num_classes=2) y_train_1 = tf_keras.utils.to_categorical(y_train_1) y_test_1 = tf_keras.utils.to_categorical(y_test_1) y_train_2 = tf_keras.utils.to_categorical(y_train_2) y_test_2 = tf_keras.utils.to_categorical(y_test_2) keras_model.fit((x_train_1, x_train_2), (y_train_1, y_train_2), epochs=1) keras_eval = keras_model.evaluate((x_test_1, x_test_2), (y_test_1, y_test_2), batch_size=32) def input_fn(): ds = tf.compat.v1.data.Dataset.from_tensor_slices( ((x_test_1, x_test_2), (y_test_1, y_test_2))) return ds.batch(128) keras_est = keras_lib.model_to_estimator( keras_model=keras_model, config=self._config) est_eval = keras_est.evaluate(input_fn=input_fn) def verify_correctness(metric_names): for i, metric_name in enumerate(metric_names): if i < 3: # TODO(b/148461691): Investigate 1% diff in loss. continue self.assertAlmostEqual( keras_eval[i], est_eval[metric_name], places=4, msg='%s mismatch, keras model: %s, estimator: %s' % (metric_name, keras_eval[i], est_eval[metric_name])) verify_correctness([ 'loss', 'dense_2_loss', 'dense_3_loss', 'dense_2_categorical_accuracy', 'dense_3_categorical_accuracy' ]) metric_names_map = { 'dense_2_categorical_accuracy': 'acc_1', 'dense_3_categorical_accuracy': 'acc_2', } keras_est = keras_lib.model_to_estimator( keras_model=keras_model, config=self._config, metric_names_map=metric_names_map) est_eval = keras_est.evaluate(input_fn=input_fn) verify_correctness( ['loss', 'dense_2_loss', 'dense_3_loss', 'acc_1', 'acc_2']) def test_invalid_metric_names_map(self): keras_model, (_, _), (_, _), _, eval_input_fn = get_resource_for_simple_model( model_type='functional', is_evaluate=True) keras_model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['binary_accuracy']) keras_est = keras_lib.model_to_estimator( keras_model=keras_model, config=self._config, metric_names_map={'binary_acc': ''}) with self.assertRaisesRegexp(ValueError, r'Invalid `metric_names_map`.*do not exist'): keras_est.evaluate(input_fn=eval_input_fn) keras_est = keras_lib.model_to_estimator( keras_model=keras_model, config=self._config, metric_names_map={ 'binary_accuracy': 'acc', 'abcde': '' }) with self.assertRaisesRegexp( ValueError, r'Invalid `metric_names_map`.*unexpected keys'): keras_est.evaluate(input_fn=eval_input_fn) def test_predict(self): # Check that predict on a pretrained model yield the same result. keras_model, (x_train, y_train), ( x_test, _), _, pred_input_fn = get_resource_for_simple_model( model_type='sequential', is_evaluate=False) keras_model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) def test_multi_inputs_multi_outputs_with_input_fn_as_dict(self): train_data, test_data = get_multi_inputs_multi_outputs_data() def train_input_fn(): input_dict = { 'input_a': train_data['input_a'], 'input_b': train_data['input_b'], 'input_m': train_data['input_m'].astype(str) } output_dict = { 'dense_2': train_data['output_c'], 'dense_3': train_data['output_d'] } return input_dict, output_dict def eval_input_fn(): input_dict = { 'input_a': test_data['input_a'], 'input_b': test_data['input_b'], 'input_m': test_data['input_m'].astype(str) } output_dict = { 'dense_2': test_data['output_c'], 'dense_3': test_data['output_d'] } return input_dict, output_dict def pred_input_fn(): input_dict = { 'input_a': test_data['input_a'], 'input_b': test_data['input_b'], 'input_m': test_data['input_m'].astype(str) } return input_dict self.do_test_multi_inputs_multi_outputs_with_input_fn( train_input_fn, eval_input_fn, pred_input_fn) def test_multi_inputs_multi_outputs_with_input_fn_as_list(self): train_data, test_data = get_multi_inputs_multi_outputs_data() def train_input_fn(): input_list = [ train_data['input_a'], train_data['input_b'], train_data['input_m'].astype(str) ] output_list = [train_data['output_c'], train_data['output_d']] return input_list, output_list def eval_input_fn(): input_list = [ test_data['input_a'], test_data['input_b'], test_data['input_m'].astype(str) ] output_list = [test_data['output_c'], test_data['output_d']] return input_list, output_list def pred_input_fn(): input_list = [ test_data['input_a'], test_data['input_b'], test_data['input_m'].astype(str) ] return input_list self.do_test_multi_inputs_multi_outputs_with_input_fn( train_input_fn, eval_input_fn, pred_input_fn) def do_test_multi_inputs_multi_outputs_with_input_fn(self, train_input_fn, eval_input_fn, pred_input_fn): model = multi_inputs_multi_outputs_model() est_keras = keras_lib.model_to_estimator( keras_model=model, config=self._config) baseline_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(eval_results['loss'], baseline_eval_results['loss']) est_keras.predict(input_fn=pred_input_fn) def test_init_from_file(self): if h5py is None: return # Skip test if models cannot be saved. keras_model, (x_train, y_train), ( x_test, _), _, pred_input_fn = get_resource_for_simple_model( model_type='functional', is_evaluate=False) keras_model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics=['categorical_accuracy']) keras_model.fit(x_train, y_train, epochs=1) keras_pred = [np.argmax(y) for y in keras_model.predict(x_test)] fname = os.path.join(self._base_dir, 'keras_model.h5') tf_keras.models.save_model(keras_model, fname) keras_est = keras_lib.model_to_estimator( keras_model_path=fname, config=self._config) est_pred = [ np.argmax(y[keras_model.output_names[0]]) for y in keras_est.predict(input_fn=pred_input_fn) ] self.assertAllEqual(est_pred, keras_pred) def test_keras_model_init_error(self): with self.assertRaisesRegexp(ValueError, 'Either'): keras_lib.model_to_estimator() keras_model = simple_sequential_model() with self.assertRaisesRegexp(ValueError, 'not both'): keras_lib.model_to_estimator( keras_model=keras_model, keras_model_path=tempfile.mkdtemp(dir=self._base_dir)) keras_model = simple_sequential_model() with self.assertRaisesRegexp(ValueError, 'compiled'): keras_lib.model_to_estimator(keras_model=keras_model) def test_invalid_ionames_error(self): (x_train, y_train), (_, _) = get_test_data( train_samples=_TRAIN_SIZE, test_samples=100, input_shape=(10,), num_classes=2) y_train = tf_keras.utils.to_categorical(y_train) def invald_input_name_input_fn(): input_dict = {'invalid_input_name': x_train} return input_dict, y_train def invald_output_name_input_fn(): input_dict = {'input_layer': x_train} output_dict = {'invalid_output_name': y_train} return input_dict, output_dict model = simple_functional_model() model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) est_keras = keras_lib.model_to_estimator( keras_model=model, config=self._config) regexp_pattern = r'{} keys:(\s|.)*{}(\s|.)*Missed keys:(\s|.)*{}' with self.assertRaisesRegexp( keras_lib.FormattedKeyError, regexp_pattern.format('features', 'invalid_input_name', 'input_layer')): est_keras.train(input_fn=invald_input_name_input_fn, steps=100) with self.assertRaisesRegexp( keras_lib.FormattedKeyError, regexp_pattern.format('labels', 'invalid_output_name', 'dense_1')): est_keras.train(input_fn=invald_output_name_input_fn, steps=100) def test_custom_objects(self): def custom_relu(x): return tf_keras.backend.relu(x, max_value=6) keras_model = simple_functional_model(activation=custom_relu) keras_model.compile(loss='categorical_crossentropy', optimizer='adam') custom_objects = {'custom_relu': custom_relu} (x_train, y_train), _ = get_test_data( train_samples=_TRAIN_SIZE, test_samples=50, input_shape=(10,), num_classes=2) y_train = tf_keras.utils.to_categorical(y_train, 2) input_name = keras_model.input_names[0] output_name = keras_model.output_names[0] train_input_fn = gen_input_fn( x=randomize_io_type(x_train, input_name), y=randomize_io_type(y_train, output_name), shuffle=False, num_epochs=None, batch_size=16) with self.assertRaisesRegex(Exception, 'custom_relu'): # Could be either a TypeError or ValueError est = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir)) est.train(input_fn=train_input_fn, steps=1) est = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir), custom_objects=custom_objects) est.train(input_fn=train_input_fn, steps=1) def test_tf_config(self): keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() keras_model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics=['mse', tf_keras.metrics.CategoricalAccuracy()]) tf_config = json.dumps({ 'cluster': { run_config_lib.TaskType.PS: ['localhost:1234'], run_config_lib.TaskType.WORKER: ['localhost:1236'], run_config_lib.TaskType.MASTER: ['localhost:1238'] }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 0 } }) with tf.compat.v1.test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): keras_lib.model_to_estimator( keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir)) def test_gpu_config(self): with tf.Graph().as_default(): keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() keras_model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics=['mse', tf_keras.metrics.CategoricalAccuracy()]) gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.3) sess_config = tf.compat.v1.ConfigProto(gpu_options=gpu_options) self._config._session_config = sess_config keras_lib.model_to_estimator(keras_model=keras_model, config=self._config) self.assertEqual( tf_keras_v1.backend.get_session( )._config.gpu_options.per_process_gpu_memory_fraction, gpu_options.per_process_gpu_memory_fraction) def test_with_empty_config(self): keras_model, _, _, _, _ = get_resource_for_simple_model( model_type='sequential', is_evaluate=True) keras_model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics=['mse', tf_keras.metrics.CategoricalAccuracy()]) est_keras = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=self._base_dir, config=run_config_lib.RunConfig()) self.assertEqual(run_config_lib.get_default_session_config(), est_keras._session_config) self.assertEqual(est_keras._session_config, est_keras._config.session_config) self.assertEqual(self._base_dir, est_keras._config.model_dir) self.assertEqual(self._base_dir, est_keras._model_dir) est_keras = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=self._base_dir, config=None) self.assertEqual(run_config_lib.get_default_session_config(), est_keras._session_config) self.assertEqual(est_keras._session_config, est_keras._config.session_config) self.assertEqual(self._base_dir, est_keras._config.model_dir) self.assertEqual(self._base_dir, est_keras._model_dir) def test_with_empty_config_and_empty_model_dir(self): keras_model, _, _, _, _ = get_resource_for_simple_model( model_type='sequential', is_evaluate=True) keras_model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics=['mse', tf_keras.metrics.CategoricalAccuracy()]) with tf.compat.v1.test.mock.patch.object( tempfile, 'mkdtemp', return_value=_TMP_DIR): est_keras = keras_lib.model_to_estimator( keras_model=keras_model, config=run_config_lib.RunConfig()) self.assertEqual(est_keras._model_dir, _TMP_DIR) def test_with_conflicting_model_dir_and_config(self): keras_model, _, _, _, _ = get_resource_for_simple_model( model_type='sequential', is_evaluate=True) keras_model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics=['mse', tf_keras.metrics.CategoricalAccuracy()]) with self.assertRaisesRegexp( ValueError, '`model_dir` are set both in ' 'constructor and `RunConfig`'): keras_lib.model_to_estimator( keras_model=keras_model, model_dir=self._base_dir, config=run_config_lib.RunConfig(model_dir=_TMP_DIR)) def test_pretrained_weights(self): keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() keras_model.compile( loss='categorical_crossentropy', optimizer=tf.compat.v1.train.RMSPropOptimizer(1e-3), metrics=['mse', tf_keras.metrics.CategoricalAccuracy()]) keras_model.train_on_batch( np.random.random((10,) + _INPUT_SIZE), np.random.random( (10, _NUM_CLASS))) weights = keras_model.get_weights() keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() keras_model.set_weights(weights) keras_model.compile( loss='categorical_crossentropy', optimizer='sgd', metrics=['mse', tf_keras.metrics.CategoricalAccuracy()]) keras_lib.model_to_estimator(keras_model=keras_model, config=self._config) def assert_increasing_global_step(self, optimizer): keras_model, _, _, train_input_fn, _ = get_resource_for_simple_model( model_type='sequential', is_evaluate=True) keras_model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['mse', tf_keras.metrics.CategoricalAccuracy()]) with self.cached_session() as sess: keras_model_fn = keras_lib._create_keras_model_fn(keras_model) global_step = tf.compat.v1.train.create_global_step() features, labels = train_input_fn().make_one_shot_iterator().get_next() spec = keras_model_fn(features, labels, mode=ModeKeys.TRAIN) sess.run(tf.compat.v1.initializers.global_variables()) sess.run(tf.compat.v1.initializers.local_variables()) self.assertEqual(global_step.eval(), 0) # Sanity check sess.run(spec.train_op) self.assertEqual(global_step.eval(), 1) @test_util.run_v1_only('training_util.create_global_step is v1 only.') def test_model_fn_increments_global_step_tf_optimizer(self): self.assert_increasing_global_step( tf.compat.v1.train.RMSPropOptimizer(1e-3)) @test_util.run_v1_only('training_util.create_global_step is v1 only.') def test_model_fn_increments_global_step_keras_optimizer(self): self.assert_increasing_global_step('rmsprop') @parameterized.named_parameters( dict(testcase_name='object_ckpt', checkpoint_format='checkpoint'), dict(testcase_name='name_ckpt', checkpoint_format='saver')) def test_export_keras_estimator(self, checkpoint_format): keras_model, (x_train, y_train), ( _, _), train_input_fn, _ = get_resource_for_simple_model( model_type='sequential', is_evaluate=False) keras_model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) keras_model.fit(x_train, y_train, epochs=1) bias_value = tf_keras.backend.get_value(keras_model.layers[0].bias) est_keras = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir), checkpoint_format=checkpoint_format) def serving_input_receiver_fn(): feature_spec = { 'dense_input': tf.io.FixedLenFeature([1], dtype=tf.dtypes.float32) } return export_lib.build_parsing_serving_input_receiver_fn(feature_spec) # Try immediately exporting, testing that (1) exported values are the same, # and (2) estimator can be exported without saving a checkpoint into the # model directory. saved_model_dir = est_keras.export_saved_model( tempfile.mkdtemp(dir=self._base_dir), serving_input_receiver_fn()) variables_path = path_helpers.get_variables_path(saved_model_dir) variable_name = 'dense/bias' if checkpoint_format == 'checkpoint': names_to_keys = saver_lib.object_graph_key_mapping(variables_path) variable_name = names_to_keys[variable_name] self.assertAllClose(bias_value, tf.train.load_variable(variables_path, variable_name)) # Export the estimator after training a bit. est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) saved_model_dir = est_keras.export_saved_model( tempfile.mkdtemp(dir=self._base_dir), serving_input_receiver_fn()) variables_path = path_helpers.get_variables_path(saved_model_dir) self.assertNotAllClose( bias_value, tf.train.load_variable(variables_path, variable_name)) @parameterized.named_parameters( dict(testcase_name='object_ckpt', checkpoint_format='checkpoint'), dict(testcase_name='name_ckpt', checkpoint_format='saver')) def test_export_keras_estimator_custom_signatures(self, checkpoint_format): inputs_a = np.random.random((320, 1)) inputs_b = np.random.random((320, 1)) outputs_c = np.random.random((320, 1)) outputs_d = np.random.random((320, 1)) dataset = tf.data.Dataset.from_tensor_slices(( {'a': inputs_a, 'b': inputs_b}, {'c': outputs_c, 'd': outputs_d})).batch(32) keras_inputs_a = tf_keras.Input(shape=(1,), dtype=tf.float32, name='a') keras_inputs_b = tf_keras.Input(shape=(1,), dtype=tf.float32, name='b') keras_outputs_c = tf_keras.layers.Dense(units=1, name='c')(keras_inputs_a) keras_outputs_d = tf_keras.layers.Dense( units=1, name='d', activation='sigmoid')(keras_inputs_b) keras_model = tf_keras.Model( inputs={'a': keras_inputs_a, 'b': keras_inputs_b}, outputs={'c': keras_outputs_c, 'd': keras_outputs_d}) keras_model.compile('sgd', {'c': 'mse', 'd': 'binary_crossentropy'}, []) keras_model.fit(dataset) est_keras = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir), checkpoint_format=checkpoint_format, export_outputs={'c': export_output.RegressionOutput, 'd': export_output.ClassificationOutput}) def serving_input_receiver_fn(): feature_spec = { 'a': tf.io.FixedLenFeature([1], dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature([1], dtype=tf.dtypes.float32), } return export_lib.build_parsing_serving_input_receiver_fn(feature_spec) # Try immediately exporting, testing exported signatures saved_model_dir = est_keras.export_saved_model( tempfile.mkdtemp(dir=self._base_dir), serving_input_receiver_fn()) imported_est = tf.saved_model.load(saved_model_dir) imported_signatures = imported_est.signatures assert 'c' in imported_signatures assert 'd' in imported_signatures assert 'serving_default' in imported_signatures @parameterized.named_parameters( dict(testcase_name='object_ckpt', checkpoint_format='checkpoint'), dict(testcase_name='name_ckpt', checkpoint_format='saver')) def test_export_keras_estimator_unknown_signatures(self, checkpoint_format): inputs_a = np.random.random((320, 1)) inputs_b = np.random.random((320, 1)) outputs_c = np.random.random((320, 1)) outputs_d = np.random.random((320, 1)) dataset = tf.data.Dataset.from_tensor_slices(( {'a': inputs_a, 'b': inputs_b}, {'c': outputs_c, 'd': outputs_d})).batch(32) keras_inputs_a = tf_keras.Input(shape=(1,), dtype=tf.float32, name='a') keras_inputs_b = tf_keras.Input(shape=(1,), dtype=tf.float32, name='b') keras_outputs_c = tf_keras.layers.Dense(units=1, name='c')(keras_inputs_a) keras_outputs_d = tf_keras.layers.Dense( units=1, name='d', activation='sigmoid')(keras_inputs_b) keras_model = tf_keras.Model( inputs={'a': keras_inputs_a, 'b': keras_inputs_b}, outputs={'c': keras_outputs_c, 'd': keras_outputs_d}) keras_model.compile('sgd', {'c': 'mse', 'd': 'binary_crossentropy'}, []) keras_model.fit(dataset) with self.assertRaisesRegex( keras_lib.FormattedKeyError, r'Missed keys'): est_keras = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir), checkpoint_format=checkpoint_format, export_outputs={'c': export_output.RegressionOutput, 'p': export_output.ClassificationOutput}) def serving_input_receiver_fn(): feature_spec = { 'a': tf.io.FixedLenFeature([1], dtype=tf.dtypes.float32), 'b': tf.io.FixedLenFeature([1], dtype=tf.dtypes.float32), } return export_lib.build_parsing_serving_input_receiver_fn(feature_spec) # Try immediately exporting, testing exported signatures _ = est_keras.export_saved_model( tempfile.mkdtemp(dir=self._base_dir), serving_input_receiver_fn()) def test_export_subclassed_model_retains_model_state(self): keras_model, (x_train, y_train), ( _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model( model_type='subclass', is_evaluate=True) keras_model.compile( optimizer=tf.compat.v1.train.RMSPropOptimizer(1e-3), loss='categorical_crossentropy', metrics=['accuracy']) keras_model.fit(x_train, y_train, epochs=1) iterations = tf_keras.backend.get_value(keras_model.optimizer.iterations) optimizer = keras_model.optimizer est_keras = keras_lib.model_to_estimator( keras_model=keras_model, config=self._config, checkpoint_format='saver') est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) # Subclassed models resets the model object. Assert that attributes are # properly restored. iterations_after = tf_keras.backend.get_value( keras_model.optimizer.iterations) self.assertEqual(optimizer, keras_model.optimizer) self.assertEqual(iterations, iterations_after) # TODO(b/132839451): model.fit results in an error after model_to_estimator. # keras_model.fit(x_train, y_train, epochs=1) def test_warm_start_from_keras_ckpt(self): keras_model, (x_train, y_train), ( _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model( model_type='functional', is_evaluate=True) keras_model.compile( optimizer=tf.compat.v1.train.RMSPropOptimizer(1e-3), loss='categorical_crossentropy', metrics=['accuracy']) keras_model.fit(x_train, y_train, epochs=1) warm_start_path = os.path.join(self._config.model_dir, 'keras', 'warm_start.ckpt') keras_model.save_weights(warm_start_path) est_keras = keras_lib.model_to_estimator( keras_model=keras_model, config=self._config, checkpoint_format='saver') self.assertEqual(warm_start_path, est_keras._warm_start_settings.ckpt_to_initialize_from) before_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) def test_sample_weights(self): # Create simple pass-through model input_layer = tf_keras.layers.Input(shape=1, name='input_layer') keras_model = tf_keras.models.Model(inputs=input_layer, outputs=input_layer) keras_model.compile(loss='mean_absolute_error', optimizer='adam') features = [[0.], [0], [1], [1]] sample_weights = [0, .4, 1, 1] targets = [[0], [1], [0], [1]] expected_loss = keras_model.test_on_batch( tf.constant(features), tf.constant(targets), tf.constant(sample_weights)) def input_fn(): dataset = tf.compat.v1.data.Dataset.from_tensors(({ 'features': features, 'sample_weights': sample_weights }, targets)) return dataset est_keras = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir)) eval_results = est_keras.evaluate(input_fn, steps=1) self.assertAllClose(expected_loss, eval_results['loss']) # Test multiple with outputs and sample weights. keras_model = tf_keras.models.Model( inputs=input_layer, outputs=[input_layer, input_layer]) keras_model.compile(loss='mean_absolute_error', optimizer='adam') expected_loss = keras_model.test_on_batch( tf.constant(features), [tf.constant(targets), tf.constant(targets)], [tf.constant(sample_weights), tf.constant(sample_weights)])[0] def input_fn_multiple_targets(): dataset = tf.compat.v1.data.Dataset.from_tensors( (features, sample_weights, targets)) dataset = dataset.map(lambda x, y, z: ({ 'features': x, 'sample_weights': (y, y) }, (z, z))) return dataset est_keras = keras_lib.model_to_estimator( keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir)) eval_results = est_keras.evaluate(input_fn_multiple_targets, steps=1) self.assertAllClose(expected_loss, eval_results['loss']) @parameterized.parameters([tf_keras_v2.layers.LSTM, tf_keras_v2.layers.GRU]) def test_model_to_estimator_with_rnn(self, layer): # See https://github.com/tensorflow/tensorflow/issues/27750 for details. timestep = 10 rnn_cell_size = 8 layers = [ tf_keras.layers.Reshape([timestep, 1], input_shape=[ timestep, ]), layer(rnn_cell_size, return_sequences=True), layer(rnn_cell_size), tf_keras.layers.Dense(1) ] model = tf_keras.models.Sequential(layers) model.compile(loss='mse', optimizer='sgd') keras_lib.model_to_estimator( keras_model=model, checkpoint_format='checkpoint', model_dir=tempfile.mkdtemp(dir=self._base_dir)) def get_test_data(train_samples, test_samples, input_shape, num_classes, random_seed=None): if random_seed is not None: np.random.seed(random_seed) num_sample = train_samples + test_samples templates = 2 * num_classes * np.random.random((num_classes,) + input_shape) y = np.random.randint(0, num_classes, size=(num_sample,)) x = np.zeros((num_sample,) + input_shape, dtype=np.float32) for i in range(num_sample): x[i] = templates[y[i]] + np.random.normal(loc=0, scale=1., size=input_shape) return ((x[:train_samples], y[:train_samples]), (x[train_samples:], y[train_samples:])) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/mode_keys.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Exporting ModeKeys to tf.estimator namespace.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.saved_model.model_utils.mode_keys import EstimatorModeKeys as ModeKeys from tensorflow_estimator.python.estimator.estimator_export import estimator_export estimator_export('estimator.ModeKeys')(ModeKeys) ================================================ FILE: tensorflow_estimator/python/estimator/model_fn.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Classes and methods related to model_fn.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import six import tensorflow as tf from tensorflow.python.saved_model import model_utils as export_utils from tensorflow.python.tpu import tensor_tracer from tensorflow.python.util import function_utils from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.mode_keys import ModeKeys LOSS_METRIC_KEY = 'loss' AVERAGE_LOSS_METRIC_KEY = 'average_loss' @estimator_export('estimator.EstimatorSpec') class EstimatorSpec( collections.namedtuple('EstimatorSpec', [ 'mode', 'predictions', 'loss', 'train_op', 'eval_metric_ops', 'export_outputs', 'training_chief_hooks', 'training_hooks', 'scaffold', 'evaluation_hooks', 'prediction_hooks' ])): """Ops and objects returned from a `model_fn` and passed to an `Estimator`. `EstimatorSpec` fully defines the model to be run by an `Estimator`. """ def __new__(cls, mode, predictions=None, loss=None, train_op=None, eval_metric_ops=None, export_outputs=None, training_chief_hooks=None, training_hooks=None, scaffold=None, evaluation_hooks=None, prediction_hooks=None): """Creates a validated `EstimatorSpec` instance. Depending on the value of `mode`, different arguments are required. Namely * For `mode == ModeKeys.TRAIN`: required fields are `loss` and `train_op`. * For `mode == ModeKeys.EVAL`: required field is `loss`. * For `mode == ModeKeys.PREDICT`: required fields are `predictions`. model_fn can populate all arguments independent of mode. In this case, some arguments will be ignored by an `Estimator`. E.g. `train_op` will be ignored in eval and infer modes. Example: ```python def my_model_fn(features, labels, mode): predictions = ... loss = ... train_op = ... return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op) ``` Alternatively, model_fn can just populate the arguments appropriate to the given mode. Example: ```python def my_model_fn(features, labels, mode): if (mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL): loss = ... else: loss = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = ... else: train_op = None if mode == tf.estimator.ModeKeys.PREDICT: predictions = ... else: predictions = None return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op) ``` Args: mode: A `ModeKeys`. Specifies if this is training, evaluation or prediction. predictions: Predictions `Tensor` or dict of `Tensor`. loss: Training loss `Tensor`. Must be either scalar, or with shape `[1]`. train_op: Op for the training step. eval_metric_ops: Dict of metric results keyed by name. The values of the dict can be one of the following: (1) instance of `Metric` class. (2) Results of calling a metric function, namely a `(metric_tensor, update_op)` tuple. `metric_tensor` should be evaluated without any impact on state (typically is a pure computation results based on variables.). For example, it should not trigger the `update_op` or requires any input fetching. export_outputs: Describes the output signatures to be exported to `SavedModel` and used during serving. A dict `{name: output}` where: * name: An arbitrary name for this output. * output: an `ExportOutput` object such as `ClassificationOutput`, `RegressionOutput`, or `PredictOutput`. Single-headed models only need to specify one entry in this dictionary. Multi-headed models should specify one entry for each head, one of which must be named using `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry is provided, a default `PredictOutput` mapping to `predictions` will be created. training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. training_hooks: Iterable of `tf.train.SessionRunHook` objects to run on all workers during training. scaffold: A `tf.train.Scaffold` object that can be used to set initialization, saver, and more to be used in training. evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to run during evaluation. prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to run during predictions. Returns: A validated `EstimatorSpec` object. Raises: ValueError: If validation fails. TypeError: If any of the arguments is not the expected type. """ train_op = _validate_estimator_spec_train_op(train_op, mode) loss = _validate_estimator_spec_loss(loss, mode) predictions = _validate_estimator_spec_predictions(predictions, mode) export_outputs = _validate_estimator_spec_export_outputs( export_outputs, predictions, mode) training_hooks = _validate_estimator_spec_hooks(training_hooks) evaluation_hooks = _validate_estimator_spec_hooks(evaluation_hooks) prediction_hooks = _validate_estimator_spec_hooks(prediction_hooks) training_chief_hooks = _validate_estimator_spec_hooks(training_chief_hooks) eval_metric_ops = _validate_eval_metric_ops(eval_metric_ops) scaffold = _validate_scaffold(scaffold) # By default, Tensor Tracer is not enabled and the block below is an no-op. if tensor_tracer.TensorTracer.is_enabled() and train_op is not None: # If Tensor Tracer is enabled via environment flags, loss and train_op # will be used to determine the execution path that will be traced. A # `tf.identity` of loss that enforces the execution of tracing ops will be # returned. tt = tensor_tracer.TensorTracer() loss = tt.trace_cpu(tf.compat.v1.get_default_graph(), loss, train_op) return super(EstimatorSpec, cls).__new__( cls, mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, training_chief_hooks=training_chief_hooks, training_hooks=training_hooks, scaffold=scaffold, evaluation_hooks=evaluation_hooks, prediction_hooks=prediction_hooks) def _replace(self, **kwds): """Return a new EstimatorSpec replacing specified fields with new values.""" if 'mode' in kwds: if self.mode != kwds['mode']: raise ValueError('mode of EstimatorSpec cannot be changed.') new_fields = map(kwds.pop, self._fields, list(self)) return EstimatorSpec(*new_fields) class _TPUEstimatorSpec( collections.namedtuple('TPUEstimatorSpec', [ 'mode', 'predictions', 'loss', 'train_op', 'eval_metrics', 'export_outputs', 'scaffold_fn', 'host_call', 'training_hooks', 'evaluation_hooks', 'prediction_hooks' ])): """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`. This is a simplified implementation of `tf.contrib.tpu.EstimatorSpec`. See tensorflow/contrib/tpu/python/tpu/tpu_estimator.py for more detailed documentation. """ def __new__(cls, mode, predictions=None, loss=None, train_op=None, eval_metrics=None, export_outputs=None, scaffold_fn=None, host_call=None, training_hooks=None, evaluation_hooks=None, prediction_hooks=None): """Creates a `_TPUEstimatorSpec` instance.""" train_op = _validate_estimator_spec_train_op(train_op, mode) loss = _validate_estimator_spec_loss(loss, mode) predictions = _validate_estimator_spec_predictions(predictions, mode) export_outputs = _validate_estimator_spec_export_outputs( export_outputs, predictions, mode) training_hooks = _validate_estimator_spec_hooks(training_hooks) evaluation_hooks = _validate_estimator_spec_hooks(evaluation_hooks) prediction_hooks = _validate_estimator_spec_hooks(prediction_hooks) return super(_TPUEstimatorSpec, cls).__new__( cls, mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metrics=eval_metrics, export_outputs=export_outputs, scaffold_fn=scaffold_fn, host_call=host_call, training_hooks=training_hooks, evaluation_hooks=evaluation_hooks, prediction_hooks=prediction_hooks) def as_estimator_spec(self): """Creates an equivalent `EstimatorSpec` used by CPU train/eval.""" if not self.eval_metrics: eval_metric_ops = None else: metric_fn, tensors = self.eval_metrics eval_metric_ops = metric_fn(**tensors) return EstimatorSpec( mode=self.mode, predictions=self.predictions, loss=self.loss, train_op=self.train_op, eval_metric_ops=eval_metric_ops, export_outputs=self.export_outputs, training_hooks=self.training_hooks, evaluation_hooks=self.evaluation_hooks, prediction_hooks=self.prediction_hooks) # Used to generate possible error causes if the user provides a `Tensor` to an # EstimatorSpec that is not in the default graph. _default_graph_error_message_template = ( '{0} with "{1}" must be from the default graph. ' 'Possible causes of this error include: \n\n' '1) {0} was created outside the context of the default graph.' '\n\n' '2) The object passed through to EstimatorSpec was not created ' 'in the most recent call to "model_fn".') def _validate_estimator_spec_train_op(train_op, mode): """Validate train_op inputs for EstimatorSpec or TPUEstimatorSpec. Args: train_op: Op for the training step. mode: A `ModeKeys`. Used to determine whether the train_op is acceptable for use in the current mode; for example, if we are not training, this can be None. Returns: train_op: Op for the training step. Raises: ValueError: If no train_op is passed during training. TypeError: If: - train_op is neither a `Tensor` nor an Op. - train_op is not part of the default graph. """ if train_op is None: if mode == ModeKeys.TRAIN: raise ValueError('Missing train_op.') else: default_graph = tf.compat.v1.get_default_graph() _check_is_tensor_or_operation(train_op, 'train_op') if isinstance(train_op, tf.Variable): train_op = train_op.op if not (tf.executing_eagerly() or train_op.graph is default_graph): raise ValueError( _default_graph_error_message_template.format('train_op', train_op.name)) return train_op def _validate_estimator_spec_loss(loss, mode): """Validate loss inputs for EstimatorSpec or TPUEstimatorSpec. Args: loss: Training loss `Tensor`. Must either be scalar, or with shape `[1]`. mode: A `ModeKeys`. Used to determine whether the loss is acceptable for use in the current mode; for example, None is acceptable if we are not training or evaluating. Returns: loss: Training loss `Tensor`. Raises: ValueError: If the loss `Tensor` is not appropriately formatted. TypeError: If: - a non-`Tensor`, non-None input is passed. - the loss `Tensor` is not part of the default graph. """ if loss is None: if mode in (ModeKeys.TRAIN, ModeKeys.EVAL): raise ValueError('Missing loss.') else: default_graph = tf.compat.v1.get_default_graph() # Loss must be a tensor. loss = _check_is_tensor(loss, 'loss') loss_shape = loss.get_shape() if loss_shape.num_elements() not in (None, 1): raise ValueError('Loss must be scalar, given: {}'.format(loss)) if not loss_shape.is_compatible_with(tf.TensorShape([])): loss = tf.reshape(loss, []) if not (tf.executing_eagerly() or loss.graph is default_graph): raise ValueError( _default_graph_error_message_template.format('loss', loss.name)) return loss def _validate_estimator_spec_predictions(predictions, mode): """Validate predictions inputs for EstimatorSpec or TPUEstimatorSpec. Args: predictions: Predictions `Tensor` or dict of `Tensor`. mode: A `ModeKeys`. Used to determine whether the predictions are acceptable for use in the current mode; None is acceptable if we are not making predictions. Returns: predictions: Predictions `Tensor` or dict of `Tensor`. Raises: ValueError: If: - predictions is None and we are in predict mode. - predictions `Tensor` is not in default_graph or else it is a dict of `Tensor` where at least one is not in default_graph. TypeError: If predictions is not a `Tensor` or dict of `Tensor`. """ if predictions is None: if mode == ModeKeys.PREDICT: raise ValueError('Missing predictions.') predictions = {} else: default_graph = tf.compat.v1.get_default_graph() if isinstance(predictions, dict): predictions = { k: _check_is_tensor(v, 'predictions[{}]'.format(k)) for k, v in six.iteritems(predictions) } if not tf.executing_eagerly(): for key, value in six.iteritems(predictions): if value.graph is not default_graph: raise ValueError( _default_graph_error_message_template.format( 'prediction values', '{0}: {1}'.format(key, value.name))) else: # Predictions should be a tensor. predictions = _check_is_tensor(predictions, 'predictions') if not (tf.executing_eagerly() or predictions.graph is default_graph): raise ValueError( _default_graph_error_message_template.format( 'prediction values', predictions.name)) return predictions def _validate_estimator_spec_export_outputs(export_outputs, predictions, mode): """Validate export_outputs inputs for EstimatorSpec or TPUEstimatorSpec. Args: export_outputs: Describes the output signatures to be exported to `SavedModel` and used during serving. A dict `{name: output}` where: * name: An arbitrary name for this output. * output: an `ExportOutput` object such as `ClassificationOutput` `RegressionOutput`, or `PredictOutput`. Single-headed models should only need to specify one entry in this dictionary. Multi-headed models should specify one entry for each head, one of which must be named using `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry is provided, a default `PredictOutput` mapping to predictions will be created. predictions: Predictions `Tensor` or dict of `Tensor`. Used in generation of default outputs. mode: A `ModeKeys`. Used to determine whether to validate at all; if the EstimatorSpec is not for making predictions we can skip validation. Returns: ValueError: If validation fails. TypeError: If the export_outputs is not a dict or the values of the dict are not instances of type `ExportOutput`. """ if mode == ModeKeys.PREDICT: export_outputs = export_utils.get_export_outputs(export_outputs, predictions) return export_outputs def _validate_estimator_spec_hooks(hooks): """Validate SessionRunHooks for use in EstimatorSpec or TPUEstimatorSpec. Args: hooks: Iterable of `tf.train.SessionRunHook` objects to run on all workers. Returns: hooks: Iterable of `tf.train.SessionRunHook` objects. Raises: ValueError: If validation fails. TypeError: If any element of the iterable is not a SessionRunHook. """ hooks = tuple(hooks or []) for hook in hooks: if not isinstance(hook, tf.compat.v1.train.SessionRunHook): raise TypeError( 'All hooks must be SessionRunHook instances, given: {}'.format(hook)) return hooks def _validate_eval_metric_ops(eval_metric_ops): """Validate eval_metric_ops for use in EstimatorSpec. Args: eval_metric_ops: Dict of metric results keyed by name. The values of the dict can be one of the following: (1) instance of `Metric` class. (2) Results of calling a metric_function, namely a `(metric_tensor, update_op)` tuple. `metric_tensor` should be evaluated without any impact on state (typically it is a pure computation based on variables.). For example, it should not trigger the `update_op` or require any input fetching. Returns: eval_metric_ops: Dict of metric results keyed by name. Raises: ValueError: If: - one of the eval_metric_ops `Metric` objects has no updates. - there is at least one `Metric` update or result, `Tensor`, or Op that is not in the default graph. TypeError: If: - eval_metric_ops is not a dict or None. - an element of eval_metric_ops is not a `Metric` or a 2-tuple. - an element of eval_metric_ops has a sub-element that is not a `Tensor` or an Op. """ if eval_metric_ops is None: eval_metric_ops = {} else: if not isinstance(eval_metric_ops, dict): raise TypeError( 'eval_metric_ops must be a dict, given: {}'.format(eval_metric_ops)) for key, value in six.iteritems(eval_metric_ops): # TODO(psv): When we deprecate the old metrics, throw an error here if # the value is not an instance of `Metric` class. if isinstance(value, tf_keras.metrics.Metric): if not value.updates: # Check if metric updates are available. raise ValueError( 'Please call update_state(...) on the "{metric_name}" metric' .format(metric_name=value.name)) else: if not isinstance(value, tuple) or len(value) != 2: raise TypeError( 'Values of eval_metric_ops must be (metric_value, update_op) ' 'tuples, given: {} for key: {}'.format(value, key)) # Verify all tensors and ops are from default graph. default_graph = tf.compat.v1.get_default_graph() for key, value in list(six.iteritems(eval_metric_ops)): if isinstance(value, tf_keras.metrics.Metric): values_to_check = value.updates[:] values_to_check.append(value.result()) else: values_to_check = tf.nest.flatten(value) for val in values_to_check: if not (tf.executing_eagerly() or val.graph is default_graph): raise ValueError( _default_graph_error_message_template.format( 'eval_metric_ops', '{0}: {1}'.format(key, val.name))) # Metric variables are by default not added to any collections. The variables # are appended to the LOCAL_VARIABLES collection for initialization, and # METRIC_VARIABLES for TFMA compatibility. Note that although collections are # officially deprecated in TensorFlow 2, Estimators will continue using # collections as long as it supports V1 graph mode. vars_to_add = set() for key, value in six.iteritems(eval_metric_ops): if isinstance(value, tf_keras.metrics.Metric): vars_to_add.update(value.variables) # Convert Metric instances to (value_tensor, update_op) tuple. eval_metric_ops[key] = (value.result(), value.updates[0]) _update_variable_collection(tf.compat.v1.GraphKeys.LOCAL_VARIABLES, vars_to_add) _update_variable_collection(tf.compat.v1.GraphKeys.METRIC_VARIABLES, vars_to_add) return eval_metric_ops def _update_variable_collection(collection_name, vars_to_add): """Add variables to collection.""" collection = set(tf.compat.v1.get_collection(collection_name)) # Skip variables that are in the collection already. vars_to_add = vars_to_add.difference(collection) for v in vars_to_add: tf.compat.v1.add_to_collection(collection_name, v) def _validate_scaffold(scaffold): """Validate scaffold input for EstimatorSpec. Args: scaffold: A `tf.train.Scaffold` object that can be used to set initialization, saver, and more to be used in training. Returns: scaffold: A `tf.train.Scaffold` object. If no scaffold is provided, then a default is generated. Raises: TypeError: If the scaffold is not of type `monitored_session.Scaffold` or None. """ scaffold = scaffold or tf.compat.v1.train.Scaffold() if not isinstance(scaffold, tf.compat.v1.train.Scaffold): raise TypeError( 'scaffold must be tf.train.Scaffold. Given: {}'.format(scaffold)) return scaffold def _check_is_tensor_or_operation(x, name): # TODO(b/154650521): Use tf.Tensor instead of core.Tensor. if not isinstance(x, (tf.Operation, tf.compat.v2.__internal__.types.Tensor)): raise TypeError('{} must be Operation or Tensor, given: {}'.format(name, x)) def _check_is_tensor(x, tensor_name): """Returns `x` if it is a `Tensor`, raises TypeError otherwise.""" if not isinstance(x, tf.compat.v2.__internal__.types.Tensor): raise TypeError('{} must be Tensor, given: {}'.format(tensor_name, x)) return x @estimator_export('estimator.experimental.call_logit_fn') def call_logit_fn(logit_fn, features, mode, params, config): """Calls logit_fn (experimental). THIS FUNCTION IS EXPERIMENTAL. Keras layers/models are the recommended APIs for logit and model composition. A utility function that calls the provided logit_fn with the relevant subset of provided arguments. Similar to tf.estimator._call_model_fn(). Args: logit_fn: A logit_fn as defined above. features: The features dict. mode: TRAIN / EVAL / PREDICT ModeKeys. params: The hyperparameter dict. config: The configuration object. Returns: A logit Tensor, the output of logit_fn. Raises: ValueError: if logit_fn does not return a Tensor or a dictionary mapping strings to Tensors. """ logit_fn_args = function_utils.fn_args(logit_fn) kwargs = {} if 'mode' in logit_fn_args: kwargs['mode'] = mode if 'params' in logit_fn_args: kwargs['params'] = params if 'config' in logit_fn_args: kwargs['config'] = config logit_fn_results = logit_fn(features=features, **kwargs) result_is_valid_dictionary = ( isinstance(logit_fn_results, dict) and all([(isinstance(k, six.string_types) and isinstance(v, tf.Tensor)) for k, v in six.iteritems(logit_fn_results)])) result_is_tensor = isinstance(logit_fn_results, tf.Tensor) if not (result_is_valid_dictionary or result_is_tensor): raise ValueError('logit_fn should return a Tensor or a dictionary mapping ' 'strings to Tensors. logit_fn returned: %s' % logit_fn_results) return logit_fn_results _VALID_MODEL_FN_ARGS = set( ['features', 'labels', 'mode', 'params', 'self', 'config']) def verify_model_fn_args(model_fn, params): """Verifies `model_fn` arguments.""" args = set(function_utils.fn_args(model_fn)) if 'features' not in args: raise ValueError('model_fn (%s) must include features argument.' % model_fn) if params is not None and 'params' not in args: raise ValueError('model_fn (%s) does not include params argument, ' 'but params (%s) is passed to Estimator.' % (model_fn, params)) if params is None and 'params' in args: tf.compat.v1.logging.warn( 'Estimator\'s model_fn (%s) includes params ' 'argument, but params are not passed to Estimator.', model_fn) non_valid_args = list(args - _VALID_MODEL_FN_ARGS) if non_valid_args: raise ValueError('model_fn (%s) has following not expected args: %s' % (model_fn, non_valid_args)) ================================================ FILE: tensorflow_estimator/python/estimator/model_fn_test.py ================================================ # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for model_fn.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow_estimator.python.estimator import model_fn from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.mode_keys import ModeKeys class _FakeHook(tf.compat.v1.train.SessionRunHook): """Fake implementation of `SessionRunHook`.""" class _InvalidHook(object): """Invalid hook (not a subclass of `SessionRunHook`).""" class _InvalidScaffold(object): """Invalid scaffold (not a subclass of `Scaffold`).""" class EstimatorSpecTrainTest(tf.test.TestCase): """Tests EstimatorSpec in train mode.""" def testRequiredArgumentsSet(self): """Tests that no errors are raised when all required arguments are set.""" with tf.Graph().as_default(), self.cached_session(): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant(1.), train_op=tf.no_op()) def testAllArgumentsSet(self): """Tests that no errors are raised when all arguments are set.""" with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) predictions = {'loss': loss} classes = tf.constant('hello') metric_obj = tf_keras.metrics.Mean() metric_obj.update_state(loss) model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, predictions=predictions, loss=loss, train_op=tf.no_op(), eval_metric_ops={ 'loss': (tf.no_op(), loss), 'mean': metric_obj, }, export_outputs={ 'head_name': export_output.ClassificationOutput(classes=classes) }, training_chief_hooks=[_FakeHook()], training_hooks=[_FakeHook()], scaffold=tf.compat.v1.train.Scaffold(), evaluation_hooks=[_FakeHook()], prediction_hooks=[_FakeHook()]) def testLossNumber(self): """Tests that error is raised when loss is a number (not Tensor).""" with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=1., train_op=tf.no_op()) def testLoss1DTensor(self): """Tests that no errors are raised when loss is 1D tensor.""" with tf.Graph().as_default(), self.cached_session(): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant([1.]), train_op=tf.no_op()) def testLossMissing(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'Missing loss'): model_fn.EstimatorSpec(mode=ModeKeys.TRAIN, train_op=tf.no_op()) def testLossNotScalar(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant([1., 2.]), train_op=tf.no_op()) def testLossSparseTensor(self): with tf.Graph().as_default(), self.cached_session(): loss = tf.sparse.SparseTensor(indices=[[0]], values=[0.], dense_shape=[1]) with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=loss, train_op=tf.no_op()) def testLossFromDifferentGraph(self): with tf.Graph().as_default(): loss = tf.constant(1.) with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'must be from the default graph'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=loss, train_op=tf.no_op()) def testTrainOpMissing(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'Missing train_op'): model_fn.EstimatorSpec(mode=ModeKeys.TRAIN, loss=tf.constant(1.)) def testTrainOpNotOperationAndTensor(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(TypeError, 'train_op must be Operation or Tensor'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant(1.), train_op='Not an Operation or Tensor') def testTrainOpFromDifferentGraph(self): with tf.Graph().as_default(): train_op = tf.no_op() with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'must be from the default graph'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant(1.), train_op=train_op) def testTrainingChiefHookInvalid(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp( TypeError, 'All hooks must be SessionRunHook instances'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant(1.), train_op=tf.no_op(), training_chief_hooks=[_InvalidHook()]) def testTrainingHookInvalid(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp( TypeError, 'All hooks must be SessionRunHook instances'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant(1.), train_op=tf.no_op(), training_hooks=[_InvalidHook()]) def testScaffoldInvalid(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(TypeError, r'scaffold must be tf\.train\.Scaffold'): model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant(1.), train_op=tf.no_op(), scaffold=_InvalidScaffold()) def testReturnDefaultScaffold(self): with tf.Graph().as_default(), self.cached_session(): estimator_spec = model_fn.EstimatorSpec( mode=ModeKeys.TRAIN, loss=tf.constant(1.), train_op=tf.no_op()) self.assertIsNotNone(estimator_spec.scaffold) class EstimatorSpecEvalTest(tf.test.TestCase): """Tests EstimatorSpec in eval mode.""" def testRequiredArgumentsSet(self): """Tests that no errors are raised when all required arguments are set.""" with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss) def testAllArgumentsSet(self): """Tests that no errors are raised when all arguments are set.""" with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) predictions = {'loss': loss} classes = tf.constant('hello') metric_obj = tf_keras.metrics.Mean() metric_obj.update_state(loss) model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions=predictions, loss=loss, train_op=tf.no_op(), eval_metric_ops={ 'loss': (tf.no_op(), loss), 'mean': metric_obj, }, export_outputs={ 'head_name': export_output.ClassificationOutput(classes=classes) }, training_chief_hooks=[_FakeHook()], training_hooks=[_FakeHook()], scaffold=tf.compat.v1.train.Scaffold(), evaluation_hooks=[_FakeHook()]) def testEvaluationHookInvalid(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp( TypeError, 'All hooks must be SessionRunHook instances'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, loss=tf.constant(1.), evaluation_hooks=[_InvalidHook()]) def testTupleMetric(self): """Tests that no errors are raised when a metric is tuple-valued.""" with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) model_fn.EstimatorSpec( mode=ModeKeys.EVAL, loss=loss, eval_metric_ops={ 'some_metric': ((loss, loss, (tf.constant(2), loss)), tf.no_op()) }) def testLoss1DTensor(self): """Tests that no errors are raised when loss is 1D tensor.""" with tf.Graph().as_default(), self.cached_session(): loss = tf.constant([1.]) model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss) def testLossNumber(self): """Tests that error is raised when loss is a number (not Tensor).""" with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': tf.constant(1.)}, loss=1.) def testLossMissing(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'Missing loss'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': tf.constant(1.)}) def testLossNotScalar(self): with tf.Graph().as_default(), self.cached_session(): loss = tf.constant([1., 2.]) with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss) def testLossSparseTensor(self): with tf.Graph().as_default(), self.cached_session(): loss = tf.sparse.SparseTensor(indices=[[0]], values=[0.], dense_shape=[1]) with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'prediction': tf.constant(1.)}, loss=loss) def testLossFromDifferentGraph(self): with tf.Graph().as_default(): loss = tf.constant(1.) with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'must be from the default graph'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'prediction': tf.constant(1.)}, loss=loss) def testReplaceRaisesConstructorChecks(self): with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) spec = model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss) with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'): spec._replace(loss=tf.constant([1., 2.])) def testReplaceDoesReplace(self): with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) spec = model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss) new_spec = spec._replace(predictions={'m': loss}) self.assertEqual(['m'], list(new_spec.predictions.keys())) def testReplaceNotAllowModeChange(self): with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) spec = model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss) spec._replace(mode=ModeKeys.EVAL) with self.assertRaisesRegexp(ValueError, 'mode of EstimatorSpec cannot be changed'): spec._replace(mode=ModeKeys.TRAIN) def testPredictionsMissingIsOkay(self): with tf.Graph().as_default(), self.cached_session(): model_fn.EstimatorSpec(mode=ModeKeys.EVAL, loss=tf.constant(1.)) def testPredictionsTensor(self): """Tests that no error is raised when predictions is Tensor (not dict).""" with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) model_fn.EstimatorSpec(mode=ModeKeys.EVAL, predictions=loss, loss=loss) def testPredictionsNumber(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(TypeError, r'predictions\[number\] must be Tensor'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'number': 1.}, loss=tf.constant(1.)) def testPredictionsSparseTensor(self): with tf.Graph().as_default(), self.cached_session(): predictions = { 'sparse': tf.sparse.SparseTensor( indices=[[0]], values=[0.], dense_shape=[1]) } with self.assertRaisesRegexp(TypeError, r'predictions\[sparse\] must be Tensor'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions=predictions, loss=tf.constant(1.)) def testPredictionsFromDifferentGraph(self): with tf.Graph().as_default(): predictions = {'loss': tf.constant(1.)} with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'must be from the default graph'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions=predictions, loss=tf.constant(1.)) def testEvalMetricOpsNoDict(self): with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) with self.assertRaisesRegexp(TypeError, 'eval_metric_ops must be a dict'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss, eval_metric_ops=loss) def testEvalMetricOpsNoTuple(self): with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) with self.assertRaisesRegexp( TypeError, (r'Values of eval_metric_ops must be \(metric_value, update_op\) ' 'tuples')): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss, eval_metric_ops={'loss': loss}) def testEvalMetricOpsFromDifferentGraphWithMetricTuple(self): with tf.Graph().as_default(): eval_metric_ops = {'loss': (tf.no_op(), tf.constant(1.))} with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) with self.assertRaisesRegexp(ValueError, 'must be from the default graph'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss, eval_metric_ops=eval_metric_ops) def testEvalMetricOpsFromDifferentGraphWithMetricObject(self): with tf.Graph().as_default(): metric_obj = tf_keras.metrics.Mean() metric_obj.update_state(tf.constant(1.)) eval_metric_ops = {'metric': metric_obj} with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) with self.assertRaisesRegexp(ValueError, 'must be from the default graph'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss, eval_metric_ops=eval_metric_ops) def testEvalMetricOpsWithoutUpdates(self): with tf.Graph().as_default(): eval_metric_ops = {'mean': tf_keras.metrics.Mean()} with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) with self.assertRaisesRegexp(ValueError, 'Please call update_state(...)'): model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions={'loss': loss}, loss=loss, eval_metric_ops=eval_metric_ops) def testMetricVariablesAddedToCollections(self): def in_collection(collection_name, variables): """Returns whether all variables are in the collection.""" return set(tf.compat.v1.get_collection(collection_name)).issuperset( set(variables)) with tf.Graph().as_default(): metric_obj = tf_keras.metrics.Mean() metric_obj.update_state(tf.constant(1.)) self.assertFalse( in_collection(tf.compat.v1.GraphKeys.LOCAL_VARIABLES, metric_obj.variables)) self.assertFalse( in_collection(tf.compat.v1.GraphKeys.METRIC_VARIABLES, metric_obj.variables)) model_fn.EstimatorSpec( mode=ModeKeys.EVAL, predictions=tf.constant(1.), loss=tf.constant(1.), eval_metric_ops={'metric': metric_obj}) self.assertTrue( in_collection(tf.compat.v1.GraphKeys.LOCAL_VARIABLES, metric_obj.variables)) self.assertTrue( in_collection(tf.compat.v1.GraphKeys.METRIC_VARIABLES, metric_obj.variables)) class EstimatorSpecInferTest(tf.test.TestCase): """Tests EstimatorSpec in infer mode.""" def testRequiredArgumentsSet(self): """Tests that no errors are raised when all required arguments are set.""" with tf.Graph().as_default(), self.cached_session(): model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions={'loss': tf.constant(1.)}) def testAllArgumentsSet(self): """Tests that no errors are raised when all arguments are set.""" with tf.Graph().as_default(), self.cached_session(): loss = tf.constant(1.) predictions = {'loss': loss} classes = tf.constant('hello') metric_obj = tf_keras.metrics.Mean() metric_obj.update_state(loss) model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=predictions, loss=loss, train_op=tf.no_op(), eval_metric_ops={ 'loss': (tf.no_op(), loss), 'mean': metric_obj, }, export_outputs={ 'head_name': export_output.ClassificationOutput(classes=classes) }, training_chief_hooks=[_FakeHook()], training_hooks=[_FakeHook()], scaffold=tf.compat.v1.train.Scaffold(), evaluation_hooks=[_FakeHook()], prediction_hooks=[_FakeHook()]) def testPredictionHookInvalid(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp( TypeError, 'All hooks must be SessionRunHook instances'): model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=tf.constant(1.), prediction_hooks=[_InvalidHook()]) def testPredictionsMissing(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(ValueError, 'Missing predictions'): model_fn.EstimatorSpec(mode=ModeKeys.PREDICT) def testPredictionsTensor(self): """Tests that no error is raised when predictions is Tensor (not dict).""" with tf.Graph().as_default(), self.cached_session(): model_fn.EstimatorSpec(mode=ModeKeys.PREDICT, predictions=tf.constant(1.)) def testPredictionsNumber(self): with tf.Graph().as_default(), self.cached_session(): with self.assertRaisesRegexp(TypeError, r'predictions\[number\] must be Tensor'): model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions={'number': 1.}) def testPredictionsSparseTensor(self): with tf.Graph().as_default(), self.cached_session(): predictions = { 'sparse': tf.sparse.SparseTensor( indices=[[0]], values=[0.], dense_shape=[1]) } with self.assertRaisesRegexp(TypeError, r'predictions\[sparse\] must be Tensor'): model_fn.EstimatorSpec(mode=ModeKeys.PREDICT, predictions=predictions) def testExportOutputsNoDict(self): with tf.Graph().as_default(), self.cached_session(): predictions = {'loss': tf.constant(1.)} classes = tf.constant('hello') with self.assertRaisesRegexp(TypeError, '[`]*export_outputs[`]* must be dict'): model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=predictions, export_outputs=export_output.ClassificationOutput(classes=classes)) def testExportOutputsValueNotExportOutput(self): with tf.Graph().as_default(), self.cached_session(): predictions = {'loss': tf.constant(1.)} with self.assertRaisesRegexp( TypeError, r'Values in [`]*export_outputs[`]* must be ExportOutput objects.'): model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=predictions, export_outputs={'head_name': predictions}) def testExportOutputsSingleheadMissingDefault(self): with tf.Graph().as_default(), self.cached_session(): predictions = {'loss': tf.constant(1.)} output_1 = tf.constant([1.]) regression_output = export_output.RegressionOutput(value=output_1) export_outputs = { 'head-1': regression_output, } estimator_spec = model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=predictions, export_outputs=export_outputs) expected_export_outputs = { tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: regression_output, 'head-1': regression_output, } self.assertEqual(expected_export_outputs, estimator_spec.export_outputs) def testExportOutputsMultiheadWithDefault(self): with tf.Graph().as_default(), self.cached_session(): predictions = {'loss': tf.constant(1.)} output_1 = tf.constant([1.]) output_2 = tf.constant(['2']) output_3 = tf.constant(['3']) export_outputs = { tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: export_output.RegressionOutput(value=output_1), 'head-2': export_output.ClassificationOutput(classes=output_2), 'head-3': export_output.PredictOutput(outputs={'some_output_3': output_3}) } estimator_spec = model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=predictions, export_outputs=export_outputs) self.assertEqual(export_outputs, estimator_spec.export_outputs) def testExportOutputsMultiheadMissingDefault(self): with tf.Graph().as_default(), self.cached_session(): predictions = {'loss': tf.constant(1.)} output_1 = tf.constant([1.]) output_2 = tf.constant(['2']) output_3 = tf.constant(['3']) export_outputs = { 'head-1': export_output.RegressionOutput(value=output_1), 'head-2': export_output.ClassificationOutput(classes=output_2), 'head-3': export_output.PredictOutput(outputs={'some_output_3': output_3}) } with self.assertRaisesRegexp( ValueError, 'Multiple [`]*export_outputs[`]* were provided'): model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=predictions, export_outputs=export_outputs) def testDefaultExportOutputCreated(self): """Ensure that a default PredictOutput is created for export.""" with tf.Graph().as_default(), self.cached_session(): predictions = tf.constant(1.) self._assertDefaultExportOutputForPredictions(predictions) def testDefaultExportOutputCreatedDict(self): """Ensure that a default PredictOutput is created for export for dicts.""" with tf.Graph().as_default(), self.cached_session(): predictions = {'loss': tf.constant(1.), 'score': tf.constant(10.)} self._assertDefaultExportOutputForPredictions(predictions) def _assertDefaultExportOutputForPredictions(self, predictions): spec = model_fn.EstimatorSpec( mode=ModeKeys.PREDICT, predictions=predictions) expected = export_output.PredictOutput(predictions).outputs serving_output = spec.export_outputs[ tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] self.assertEqual(serving_output.outputs, expected) class LogitFnTest(tf.test.TestCase): def test_simple_call_logit_fn(self): def dummy_logit_fn(features, mode): if mode == ModeKeys.TRAIN: return features['f1'] else: return features['f2'] features = {'f1': tf.constant([[2., 3.]]), 'f2': tf.constant([[4., 5.]])} logit_fn_result = model_fn.call_logit_fn(dummy_logit_fn, features, ModeKeys.EVAL, 'fake_params', 'fake_config') with self.cached_session(): self.assertAllClose([[4., 5.]], self.evaluate(logit_fn_result)) def test_simple_call_multi_logit_fn(self): def dummy_logit_fn(features): return {u'head1': features['f1'], 'head2': features['f2']} features = {'f1': tf.constant([[2., 3.]]), 'f2': tf.constant([[4., 5.]])} logit_fn_result = model_fn.call_logit_fn(dummy_logit_fn, features, ModeKeys.TRAIN, 'fake_params', 'fake_config') with self.cached_session(): self.assertAllClose([[2., 3.]], self.evaluate(logit_fn_result['head1'])) self.assertAllClose([[4., 5.]], self.evaluate(logit_fn_result['head2'])) def test_invalid_logit_fn_results(self): def invalid_logit_fn(features, params): return [ features['f1'] * params['input_multiplier'], features['f2'] * params['input_multiplier'] ] features = {'f1': tf.constant([[2., 3.]]), 'f2': tf.constant([[4., 5.]])} params = {'learning_rate': 0.001, 'input_multiplier': 2.0} with self.assertRaisesRegexp( ValueError, 'logit_fn should return a Tensor or a dictionary mapping ' 'strings to Tensors'): model_fn.call_logit_fn(invalid_logit_fn, features, 'fake_mode', params, 'fake_config') def test_invalid_logit_fn_results_dict(self): def invalid_logit_fn(features): return {'head1': features['f1'], 'head2': features['f2']} features = {'f1': tf.constant([[2., 3.]]), 'f2': 'some string'} with self.assertRaisesRegexp( ValueError, 'logit_fn should return a Tensor or a dictionary mapping ' 'strings to Tensors'): model_fn.call_logit_fn(invalid_logit_fn, features, 'fake_mode', 'fake_params', 'fake_config') if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/object_checkpointing_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Integration tests for Estimator + object checkpointing.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import os # pylint: disable=g-import-not-at-top try: from tensorflow.python.checkpoint import checkpoint as util except ImportError: # TODO(allenl): Remove this after cl/229814711 syncs from tensorflow.python.training.checkpointable import util from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.export import export_lib class SubclassedModel(tf_keras.models.Model): def __init__(self): super(SubclassedModel, self).__init__() self.dense_one = tf_keras.layers.Dense(5) self.dense_two = tf_keras.layers.Dense(1) def call(self, inputs): return self.dense_two(self.dense_one(inputs)) def _serving_input_receiver_fn(): receiver = tf.compat.v1.placeholder( tf.dtypes.float32, shape=[None, 1], name='input') return export_lib.ServingInputReceiver( features={'feature': receiver}, receiver_tensors=receiver) class ObjectCheckpointingTest(tf.test.TestCase): def _make_estimator(self, model_dir): def _model_fn(features, labels, mode): del labels model = SubclassedModel() optimizer = tf_keras.optimizers.Adam(0.01) checkpoint = util.Checkpoint( step=tf.compat.v1.train.get_or_create_global_step(), optimizer=optimizer, model=model) # Make the save counter to satisfy the assert_consumed() assertion later checkpoint.save_counter # pylint: disable=pointless-statement with tf.GradientTape() as tape: output = model(features['feature']) loss = tf.math.reduce_sum(output) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = tf.group( optimizer.apply_gradients(zip(gradients, variables)), checkpoint.step.assign_add(1)) return model_fn_lib.EstimatorSpec( mode, loss=loss, train_op=train_op, predictions=dict( output=output, bias=tf.tile(model.dense_two.bias[None, :], [tf.compat.v1.shape(output)[0], 1]), step=tf.tile(checkpoint.step[None], [tf.compat.v1.shape(output)[0]])), scaffold=tf.compat.v1.train.Scaffold(saver=checkpoint)) est = estimator_lib.EstimatorV2(model_fn=_model_fn, model_dir=model_dir) def _input_map_fn(tensor): """Converts a tensor into `features, labels` format used by Estimator.""" return {'feature': tensor}, tensor def _input_fn(): return tf.compat.v1.data.Dataset.from_tensors( [1.]).repeat().batch(10).map(_input_map_fn) return est, _input_fn def testTwoWayCompatibility(self): save_model_dir = os.path.join(self.get_temp_dir(), 'model_dir') save_est, input_fn = self._make_estimator(save_model_dir) save_est.train(input_fn, steps=3) model = SubclassedModel() optimizer = tf_keras.optimizers.Adam(0.01) checkpoint = util.Checkpoint( step=tf.Variable(0, dtype=tf.dtypes.int64), optimizer=optimizer, model=model) status = checkpoint.restore(tf.train.latest_checkpoint(save_model_dir)) self.assertEqual(3, self.evaluate(checkpoint.step)) with tf.GradientTape() as tape: output = model(tf.constant([[1.]])) loss = tf.math.reduce_sum(output) variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) status.assert_consumed() # The optimizer uses this for some reason... tf_keras.backend.clear_session() load_model_dir = os.path.join(self.get_temp_dir(), 'load_model_dir/') checkpoint.step.assign(40) checkpoint.model.dense_two.bias.assign([13.]) checkpoint.save(load_model_dir) load_est, input_fn = self._make_estimator(load_model_dir) predictions = load_est.predict(input_fn) predictions = next(predictions) self.assertAllClose([13.], predictions['bias']) self.assertEqual(40, predictions['step']) def testSavedModelExport(self): model_dir = os.path.join(self.get_temp_dir(), 'estimator_train_dir') estimator, input_fn = self._make_estimator(model_dir) estimator.train(input_fn, steps=1) # Train to generate a checkpoint. export_dir_base = os.path.join(self.get_temp_dir(), 'estimator_export_dir') export_dir = estimator.export_saved_model(export_dir_base, _serving_input_receiver_fn) # Check the saved model loads and simple inference runs. model = tf.compat.v2.saved_model.load(export_dir) model.signatures['serving_default'](tf.constant([[1.]])) if __name__ == '__main__': tf.compat.v1.enable_eager_execution() tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/run_config.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Environment configuration object for Estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import json import os import six import tensorflow as tf from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.distribute import estimator_training as distribute_coordinator_training from tensorflow.python.util import function_utils from tensorflow_estimator.python.estimator.estimator_export import estimator_export _USE_DEFAULT = object() _VALID_DEVICE_FN_ARGS = set(['op']) # A list of the property names in RunConfig that the user is allowed to change. _DEFAULT_REPLACEABLE_LIST = [ 'model_dir', 'tf_random_seed', 'save_summary_steps', 'save_checkpoints_steps', 'save_checkpoints_secs', 'session_config', 'keep_checkpoint_max', 'keep_checkpoint_every_n_hours', 'log_step_count_steps', 'train_distribute', 'device_fn', 'protocol', 'eval_distribute', 'experimental_distribute', 'experimental_max_worker_delay_secs', 'session_creation_timeout_secs', 'checkpoint_save_graph_def' ] _SAVE_CKPT_ERR = ( '`save_checkpoints_steps` and `save_checkpoints_secs` cannot be both set.') _TF_CONFIG_ENV = 'TF_CONFIG' _TASK_ENV_KEY = 'task' _TASK_TYPE_KEY = 'type' _TASK_ID_KEY = 'index' _CLUSTER_KEY = 'cluster' _SERVICE_KEY = 'service' _SESSION_MASTER_KEY = 'session_master' _EVAL_SESSION_MASTER_KEY = 'eval_session_master' _MODEL_DIR_KEY = 'model_dir' _LOCAL_MASTER = '' _GRPC_SCHEME = 'grpc://' def _get_session_master(cluster_spec, task_type, task_id, tf_config): """Returns the appropriate address for TensorFlow master. The order of precedence to determine the TF session master is as follows: 1. If `tf_session_master` is set in TF_CONFIG environment variable, takes it. 2. If the cluster has only one node, returns empty string ''. 3. Returns the grpc address according to the task type and id in the cluster. This is between-graph replication. Note: task_type and task_id must be validated. Typically, validated using `_validate_task_type_and_task_id`. Args: cluster_spec: A `ClusterSpec` instance. task_type: String. Task type for current node. task_id: Int. Task id for current node. tf_config: Dict. Python dict for the TF_CONFIG environment variable. Raises: RuntimeError: If `cluster_spec` is not set. """ if _SESSION_MASTER_KEY in tf_config: return tf_config[_SESSION_MASTER_KEY] if not cluster_spec: raise RuntimeError('Internal error: `_get_session_master` ' 'does not expect empty cluster_spec.') jobs = cluster_spec.jobs # If there is only one node in the cluster, do things locally by setting # master to ''. If a service or user sets TF_CONFIG with a single node, it's # more performant to use a direct master rather than an RPC service. if len(jobs) == 1 and len(cluster_spec.job_tasks(jobs[0])) == 1: return _LOCAL_MASTER # Lookup the master in cluster_spec using task_type and task_id, # if possible. addresses = cluster_spec.job_tasks(task_type) return _GRPC_SCHEME + addresses[task_id] def _get_eval_session_master(task_type, tf_config): """Returns the appropriate address for TensorFlow evaluation master.""" if task_type == TaskType.EVALUATOR: return tf_config.get(_EVAL_SESSION_MASTER_KEY, _LOCAL_MASTER) return _LOCAL_MASTER def _count_ps(cluster_spec): """Counts the number of parameter servers in cluster_spec.""" if not cluster_spec: raise RuntimeError( 'Internal error: `_count_ps` does not expect empty cluster_spec.') return len(cluster_spec.as_dict().get(TaskType.PS, [])) def _count_worker(cluster_spec, chief_task_type): """Counts the number of workers (including chief) in cluster_spec.""" if not cluster_spec: raise RuntimeError( 'Internal error: `_count_worker` does not expect empty cluster_spec.') return (len(cluster_spec.as_dict().get(TaskType.WORKER, [])) + len(cluster_spec.as_dict().get(chief_task_type, []))) def _validate_service(service): """Validates the service key.""" if service is not None and not isinstance(service, dict): raise TypeError( 'If "service" is set in TF_CONFIG, it must be a dict. Given %s' % type(service)) return service def _validate_task_type_and_task_id(cluster_spec, task_env, chief_task_type): """Validates the task type and index in `task_env` according to cluster.""" if chief_task_type not in cluster_spec.jobs: raise ValueError( 'If "cluster" is set in TF_CONFIG, it must have one "%s" node.' % chief_task_type) if len(cluster_spec.job_tasks(chief_task_type)) > 1: raise ValueError( 'The "cluster" in TF_CONFIG must have only one "%s" node.' % chief_task_type) task_type = task_env.get(_TASK_TYPE_KEY, None) task_id = task_env.get(_TASK_ID_KEY, None) if not task_type: raise ValueError('If "cluster" is set in TF_CONFIG, task type must be set.') if task_id is None: raise ValueError( 'If "cluster" is set in TF_CONFIG, task index must be set.') task_id = int(task_id) # Check the task id bounds. Upper bound is not necessary as # - for evaluator, there is no upper bound. # - for non-evaluator, task id is upper bounded by the number of jobs in # cluster spec, which will be checked later (when retrieving the `master`) if task_id < 0: raise ValueError('Task index must be non-negative number.') # Evaluator is not part of the training cluster. if task_type == TaskType.EVALUATOR: return task_type, task_id if task_type not in cluster_spec.jobs: raise ValueError( '%s is not a valid task_type in the cluster_spec:\n' '%s\n\n' 'Note that these values may be coming from the TF_CONFIG environment ' 'variable.' % (task_type, cluster_spec)) addresses = cluster_spec.job_tasks(task_type) if not 0 <= task_id < len(addresses): raise ValueError( '%d is not a valid task_id for task_type %s in the cluster_spec:\n' '%s\n\n' 'Note that these values may be coming from the TF_CONFIG environment ' 'variable.' % (task_id, task_type, cluster_spec)) return task_type, task_id def _get_global_id_in_cluster(cluster_spec, task_type, task_id, chief_task_type): """Returns the global id in cluster.""" # Note: This is implementation details, which user should not rely on. # The first id is 0, which is always for the `chief` node. All other nodes, # except `ps`, are ordered alphabetical based on task type (alphabetically) # and task id (ascendingly). `ps` are ordered last. # Sort task names in cluster task_type_ordered_list = [chief_task_type] task_type_ordered_list.extend([ t for t in sorted(cluster_spec.jobs) if t != chief_task_type and t != TaskType.PS ]) if TaskType.PS in cluster_spec.jobs: task_type_ordered_list.append(TaskType.PS) next_global_id = 0 for t in task_type_ordered_list: if t == task_type: return next_global_id + task_id next_global_id += len(cluster_spec.job_tasks(t)) # This should never happen. raise RuntimeError('Internal Error: `task_type` ({}) is not in ' 'cluster_spec ({}).'.format(task_type, cluster_spec)) def _validate_save_ckpt_with_replaced_keys(new_copy, replaced_keys): """Validates the save ckpt properties.""" # Ensure one (and only one) of save_steps and save_secs is not None. # Also, if user sets one save ckpt property, say steps, the other one (secs) # should be set as None to improve usability. save_steps = new_copy.save_checkpoints_steps save_secs = new_copy.save_checkpoints_secs if ('save_checkpoints_steps' in replaced_keys and 'save_checkpoints_secs' in replaced_keys): # If user sets both properties explicitly, we need to error out if both # are set or neither of them are set. if save_steps is not None and save_secs is not None: raise ValueError(_SAVE_CKPT_ERR) elif 'save_checkpoints_steps' in replaced_keys and save_steps is not None: new_copy._save_checkpoints_secs = None # pylint: disable=protected-access elif 'save_checkpoints_secs' in replaced_keys and save_secs is not None: new_copy._save_checkpoints_steps = None # pylint: disable=protected-access def _validate_properties(run_config): """Validates the properties.""" def _validate(property_name, cond, message): property_value = getattr(run_config, property_name) if property_value is not None and not cond(property_value): raise ValueError(message) def _validate_delay(delay): """Check that delay is an integer value. Since this has to work for both Python2 and Python3 and PEP237 defines long to be basically int, we cannot just use a lambda function. """ try: return isinstance(delay, (int, long)) except NameError: # PEP237 redefines long to int for Python3 return isinstance(delay, int) _validate( 'model_dir', lambda dir: dir, message='model_dir should be non-empty') _validate( 'save_summary_steps', lambda steps: steps >= 0, message='save_summary_steps should be >= 0') _validate( 'save_checkpoints_steps', lambda steps: steps >= 0, message='save_checkpoints_steps should be >= 0') _validate( 'save_checkpoints_secs', lambda secs: secs >= 0, message='save_checkpoints_secs should be >= 0') _validate( 'session_config', lambda sc: isinstance(sc, tf.compat.v1.ConfigProto), message='session_config must be instance of ConfigProto') _validate( 'keep_checkpoint_max', lambda keep_max: keep_max >= 0, message='keep_checkpoint_max should be >= 0') _validate( 'keep_checkpoint_every_n_hours', lambda keep_hours: keep_hours > 0, message='keep_checkpoint_every_n_hours should be > 0') _validate( 'log_step_count_steps', lambda num_steps: num_steps > 0, message='log_step_count_steps should be > 0') _validate( 'tf_random_seed', lambda seed: isinstance(seed, six.integer_types), message='tf_random_seed must be integer.') _validate( 'experimental_max_worker_delay_secs', _validate_delay, message='experimental_max_worker_delay_secs must be an integer if' ' set.') _validate( 'session_creation_timeout_secs', lambda timeout_secs: timeout_secs > 0, message='session_creation_timeout_secs should be > 0') _validate( 'device_fn', lambda device_fn: six.callable(device_fn) and set( function_utils.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS, message='device_fn must be callable with exactly' ' one argument "op".') _validate( 'protocol', lambda protocol: protocol in (None, 'grpc', 'grpc+verbs'), message='protocol should be grpc or grpc+verbs') def get_default_session_config(): """Returns tf.ConfigProto instance.""" rewrite_opts = rewriter_config_pb2.RewriterConfig( meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE) graph_opts = tf.compat.v1.GraphOptions(rewrite_options=rewrite_opts) return tf.compat.v1.ConfigProto( allow_soft_placement=True, graph_options=graph_opts) class TaskType(object): MASTER = 'master' PS = 'ps' WORKER = 'worker' CHIEF = 'chief' EVALUATOR = 'evaluator' @estimator_export('estimator.RunConfig') class RunConfig(object): """This class specifies the configurations for an `Estimator` run.""" def __init__(self, model_dir=None, tf_random_seed=None, save_summary_steps=100, save_checkpoints_steps=_USE_DEFAULT, save_checkpoints_secs=_USE_DEFAULT, session_config=None, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, log_step_count_steps=100, train_distribute=None, device_fn=None, protocol=None, eval_distribute=None, experimental_distribute=None, experimental_max_worker_delay_secs=None, session_creation_timeout_secs=7200, checkpoint_save_graph_def=True): """Constructs a RunConfig. All distributed training related properties `cluster_spec`, `is_chief`, `master` , `num_worker_replicas`, `num_ps_replicas`, `task_id`, and `task_type` are set based on the `TF_CONFIG` environment variable, if the pertinent information is present. The `TF_CONFIG` environment variable is a JSON object with attributes: `cluster` and `task`. `cluster` is a JSON serialized version of `ClusterSpec`'s Python dict from `server_lib.py`, mapping task types (usually one of the `TaskType` enums) to a list of task addresses. `task` has two attributes: `type` and `index`, where `type` can be any of the task types in `cluster`. When `TF_CONFIG` contains said information, the following properties are set on this class: * `cluster_spec` is parsed from `TF_CONFIG['cluster']`. Defaults to {}. If present, must have one and only one node in the `chief` attribute of `cluster_spec`. * `task_type` is set to `TF_CONFIG['task']['type']`. Must set if `cluster_spec` is present; must be `worker` (the default value) if `cluster_spec` is not set. * `task_id` is set to `TF_CONFIG['task']['index']`. Must set if `cluster_spec` is present; must be 0 (the default value) if `cluster_spec` is not set. * `master` is determined by looking up `task_type` and `task_id` in the `cluster_spec`. Defaults to ''. * `num_ps_replicas` is set by counting the number of nodes listed in the `ps` attribute of `cluster_spec`. Defaults to 0. * `num_worker_replicas` is set by counting the number of nodes listed in the `worker` and `chief` attributes of `cluster_spec`. Defaults to 1. * `is_chief` is determined based on `task_type` and `cluster`. There is a special node with `task_type` as `evaluator`, which is not part of the (training) `cluster_spec`. It handles the distributed evaluation job. Example of non-chief node: ``` cluster = {'chief': ['host0:2222'], 'ps': ['host1:2222', 'host2:2222'], 'worker': ['host3:2222', 'host4:2222', 'host5:2222']} os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'worker', 'index': 1}}) config = RunConfig() assert config.master == 'host4:2222' assert config.task_id == 1 assert config.num_ps_replicas == 2 assert config.num_worker_replicas == 4 assert config.cluster_spec == server_lib.ClusterSpec(cluster) assert config.task_type == 'worker' assert not config.is_chief ``` Example of chief node: ``` cluster = {'chief': ['host0:2222'], 'ps': ['host1:2222', 'host2:2222'], 'worker': ['host3:2222', 'host4:2222', 'host5:2222']} os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'chief', 'index': 0}}) config = RunConfig() assert config.master == 'host0:2222' assert config.task_id == 0 assert config.num_ps_replicas == 2 assert config.num_worker_replicas == 4 assert config.cluster_spec == server_lib.ClusterSpec(cluster) assert config.task_type == 'chief' assert config.is_chief ``` Example of evaluator node (evaluator is not part of training cluster): ``` cluster = {'chief': ['host0:2222'], 'ps': ['host1:2222', 'host2:2222'], 'worker': ['host3:2222', 'host4:2222', 'host5:2222']} os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'evaluator', 'index': 0}}) config = RunConfig() assert config.master == '' assert config.evaluator_master == '' assert config.task_id == 0 assert config.num_ps_replicas == 0 assert config.num_worker_replicas == 0 assert config.cluster_spec == {} assert config.task_type == 'evaluator' assert not config.is_chief ``` N.B.: If `save_checkpoints_steps` or `save_checkpoints_secs` is set, `keep_checkpoint_max` might need to be adjusted accordingly, especially in distributed training. For example, setting `save_checkpoints_secs` as 60 without adjusting `keep_checkpoint_max` (defaults to 5) leads to situation that checkpoint would be garbage collected after 5 minutes. In distributed training, the evaluation job starts asynchronously and might fail to load or find the checkpoint due to race condition. Args: model_dir: directory where model parameters, graph, etc are saved. If `PathLike` object, the path will be resolved. If `None`, will use a default value set by the Estimator. tf_random_seed: Random seed for TensorFlow initializers. Setting this value allows consistency between reruns. save_summary_steps: Save summaries every this many steps. save_checkpoints_steps: Save checkpoints every this many steps. Can not be specified with `save_checkpoints_secs`. save_checkpoints_secs: Save checkpoints every this many seconds. Can not be specified with `save_checkpoints_steps`. Defaults to 600 seconds if both `save_checkpoints_steps` and `save_checkpoints_secs` are not set in constructor. If both `save_checkpoints_steps` and `save_checkpoints_secs` are `None`, then checkpoints are disabled. session_config: a ConfigProto used to set session parameters, or `None`. keep_checkpoint_max: The maximum number of recent checkpoint files to keep. As new files are created, older files are deleted. If `None` or 0, all checkpoint files are kept. Defaults to 5 (that is, the 5 most recent checkpoint files are kept). If a saver is passed to the estimator, this argument will be ignored. keep_checkpoint_every_n_hours: Number of hours between each checkpoint to be saved. The default value of 10,000 hours effectively disables the feature. log_step_count_steps: The frequency, in number of global steps, that the global step and the loss will be logged during training. Also controls the frequency that the global steps / s will be logged (and written to summary) during training. train_distribute: An optional instance of `tf.distribute.Strategy`. If specified, then Estimator will distribute the user's model during training, according to the policy specified by that strategy. Setting `experimental_distribute.train_distribute` is preferred. device_fn: A callable invoked for every `Operation` that takes the `Operation` and returns the device string. If `None`, defaults to the device function returned by `tf.train.replica_device_setter` with round-robin strategy. protocol: An optional argument which specifies the protocol used when starting server. `None` means default to grpc. eval_distribute: An optional instance of `tf.distribute.Strategy`. If specified, then Estimator will distribute the user's model during evaluation, according to the policy specified by that strategy. Setting `experimental_distribute.eval_distribute` is preferred. experimental_distribute: An optional `tf.contrib.distribute.DistributeConfig` object specifying DistributionStrategy-related configuration. The `train_distribute` and `eval_distribute` can be passed as parameters to `RunConfig` or set in `experimental_distribute` but not both. experimental_max_worker_delay_secs: An optional integer specifying the maximum time a worker should wait before starting. By default, workers are started at staggered times, with each worker being delayed by up to 60 seconds. This is intended to reduce the risk of divergence, which can occur when many workers simultaneously update the weights of a randomly initialized model. Users who warm-start their models and train them for short durations (a few minutes or less) should consider reducing this default to improve training times. session_creation_timeout_secs: Max time workers should wait for a session to become available (on initialization or when recovering a session) with MonitoredTrainingSession. Defaults to 7200 seconds, but users may want to set a lower value to detect problems with variable / session (re)-initialization more quickly. checkpoint_save_graph_def: Whether to save the GraphDef and MetaGraphDef to `checkpoint_dir`. The GraphDef is saved after the session is created as `graph.pbtxt`. MetaGraphDefs are saved out for every checkpoint as `model.ckpt-*.meta`. Raises: ValueError: If both `save_checkpoints_steps` and `save_checkpoints_secs` are set. """ if (save_checkpoints_steps == _USE_DEFAULT and save_checkpoints_secs == _USE_DEFAULT): save_checkpoints_steps = None save_checkpoints_secs = 600 elif save_checkpoints_secs == _USE_DEFAULT: save_checkpoints_secs = None elif save_checkpoints_steps == _USE_DEFAULT: save_checkpoints_steps = None elif (save_checkpoints_steps is not None and save_checkpoints_secs is not None): raise ValueError(_SAVE_CKPT_ERR) self._verify_strategy_compatibility(train_distribute, eval_distribute) tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV, '{}')) if tf_config: tf.compat.v1.logging.info('TF_CONFIG environment variable: %s', tf_config) model_dir = _get_model_dir(tf_config, path_to_str(model_dir)) RunConfig._replace( self, allowed_properties_list=_DEFAULT_REPLACEABLE_LIST, model_dir=model_dir, tf_random_seed=tf_random_seed, save_summary_steps=save_summary_steps, save_checkpoints_steps=save_checkpoints_steps, save_checkpoints_secs=save_checkpoints_secs, session_config=session_config, keep_checkpoint_max=keep_checkpoint_max, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, log_step_count_steps=log_step_count_steps, train_distribute=train_distribute, device_fn=device_fn, protocol=protocol, eval_distribute=eval_distribute, experimental_distribute=experimental_distribute, experimental_max_worker_delay_secs=experimental_max_worker_delay_secs, session_creation_timeout_secs=session_creation_timeout_secs, checkpoint_save_graph_def=checkpoint_save_graph_def) # TODO(frankchn,priyag): Eventually use distributed coordinator for TPUs. if ((train_distribute and not train_distribute.__class__.__name__.startswith('TPUStrategy')) or (eval_distribute and not eval_distribute.__class__.__name__.startswith('TPUStrategy')) or experimental_distribute): tf.compat.v1.logging.info( 'Initializing RunConfig with distribution strategies.') distribute_coordinator_training.init_run_config(self, tf_config) else: self._init_distributed_setting_from_environment_var(tf_config) self._maybe_overwrite_session_config_for_distributed_training() def _verify_strategy_compatibility(self, train_distribute, eval_distribute): if ((train_distribute is not None and train_distribute.__class__ == tf.compat.v2.distribute.experimental.ParameterServerStrategy) or (eval_distribute is not None and eval_distribute.__class__ == tf.compat.v2.distribute.experimental.ParameterServerStrategy)): raise ValueError('Please use `tf.compat.v1.distribute.experimental.Param' 'eterServerStrategy` for parameter server strategy with ' 'estimator.') def _maybe_overwrite_session_config_for_distributed_training(self): """Overwrites the session_config for distributed training. The default overwrite is optimized for between-graph training. Subclass should override this method if necessary. """ # Get session_config only for between-graph distributed mode (cluster_spec # is present). if not self._session_config and self._cluster_spec: RunConfig._replace( self, allowed_properties_list=_DEFAULT_REPLACEABLE_LIST, session_config=self._get_default_session_config_distributed()) def _get_default_session_config_distributed(self): """Returns None or tf.ConfigProto instance with default device_filters set. Device filters are set such that chief/master and worker communicates with only ps. session_config=None for evaluators or any other TaskType. """ rewrite_opts = rewriter_config_pb2.RewriterConfig( meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE) graph_opts = tf.compat.v1.GraphOptions(rewrite_options=rewrite_opts) device_filters = None if self._task_type == TaskType.MASTER: device_filters = ['/job:ps', '/job:master'] elif self._task_type == TaskType.CHIEF: device_filters = ['/job:ps', '/job:chief'] elif self._task_type == TaskType.WORKER: device_filters = ['/job:ps', '/job:worker/task:%d' % self._task_id] elif self._task_type == TaskType.PS: device_filters = ['/job:ps', '/job:worker', '/job:chief', '/job:master'] else: # If the task_type is `EVALUATOR` or something other than the ones in # TaskType then don't set any device filters. return None return tf.compat.v1.ConfigProto( allow_soft_placement=True, graph_options=graph_opts, device_filters=device_filters) def _init_distributed_setting_from_environment_var(self, tf_config): """Initialize distributed properties based on `tf_config`.""" self._service = _validate_service(tf_config.get(_SERVICE_KEY)) self._cluster_spec = tf.train.ClusterSpec(tf_config.get(_CLUSTER_KEY, {})) task_env = tf_config.get(_TASK_ENV_KEY, {}) if self._cluster_spec and TaskType.MASTER in self._cluster_spec.jobs: return self._init_distributed_setting_from_environment_var_with_master( tf_config) if self._cluster_spec: # Distributed mode. self._task_type, self._task_id = _validate_task_type_and_task_id( self._cluster_spec, task_env, TaskType.CHIEF) self._evaluation_master = _get_eval_session_master( self._task_type, tf_config) if self._task_type != TaskType.EVALUATOR: self._master = _get_session_master(self._cluster_spec, self._task_type, self._task_id, tf_config) self._num_ps_replicas = _count_ps(self._cluster_spec) self._num_worker_replicas = _count_worker( self._cluster_spec, chief_task_type=TaskType.CHIEF) self._global_id_in_cluster = _get_global_id_in_cluster( self._cluster_spec, self._task_type, self._task_id, chief_task_type=TaskType.CHIEF) else: # Evaluator is not part of the training cluster. self._cluster_spec = tf.train.ClusterSpec({}) self._master = _LOCAL_MASTER self._num_ps_replicas = 0 self._num_worker_replicas = 0 self._global_id_in_cluster = None # undefined self._is_chief = self._task_type == TaskType.CHIEF else: # Local mode. self._task_type = task_env.get(_TASK_TYPE_KEY, TaskType.WORKER) self._task_id = int(task_env.get(_TASK_ID_KEY, 0)) self._global_id_in_cluster = 0 if self._task_type != TaskType.WORKER: raise ValueError( 'If "cluster" is not set in TF_CONFIG, task type must be WORKER.') if self._task_id != 0: raise ValueError( 'If "cluster" is not set in TF_CONFIG, task index must be 0.') self._master = tf_config.get(_SESSION_MASTER_KEY, _LOCAL_MASTER) self._evaluation_master = tf_config.get(_EVAL_SESSION_MASTER_KEY, _LOCAL_MASTER) self._is_chief = True self._num_ps_replicas = 0 self._num_worker_replicas = 1 def _init_distributed_setting_from_environment_var_with_master( self, tf_config): """Initialize distributed properties for legacy cluster with `master`.""" # There is no tech reason, why user cannot have chief and master in the same # cluster, but it is super confusing (which is really the chief?). So, block # this case. if TaskType.CHIEF in self._cluster_spec.jobs: raise ValueError('If `master` node exists in `cluster`, job ' '`chief` is not supported.') task_env = tf_config.get(_TASK_ENV_KEY, {}) self._task_type, self._task_id = _validate_task_type_and_task_id( self._cluster_spec, task_env, TaskType.MASTER) if self._task_type == TaskType.EVALUATOR: raise ValueError('If `master` node exists in `cluster`, task_type ' '`evaluator` is not supported.') self._global_id_in_cluster = _get_global_id_in_cluster( self._cluster_spec, self._task_type, self._task_id, chief_task_type=TaskType.MASTER) self._master = _get_session_master(self._cluster_spec, self._task_type, self._task_id, tf_config) self._evaluation_master = _get_eval_session_master(self._task_type, tf_config) self._num_ps_replicas = _count_ps(self._cluster_spec) self._num_worker_replicas = _count_worker( self._cluster_spec, chief_task_type=TaskType.MASTER) self._is_chief = self._task_type == TaskType.MASTER @property def cluster_spec(self): return self._cluster_spec @property def device_fn(self): """Returns the device_fn. If device_fn is not `None`, it overrides the default device function used in `Estimator`. Otherwise the default one is used. """ return self._device_fn @property def evaluation_master(self): return self._evaluation_master @property def is_chief(self): return self._is_chief @property def master(self): return self._master @property def num_ps_replicas(self): return self._num_ps_replicas @property def num_worker_replicas(self): return self._num_worker_replicas @property def task_id(self): return self._task_id @property def global_id_in_cluster(self): """The global id in the training cluster. All global ids in the training cluster are assigned from an increasing sequence of consecutive integers. The first id is 0. Note: Task id (the property field `task_id`) is tracking the index of the node among all nodes with the SAME task type. For example, given the cluster definition as follows: ``` cluster = {'chief': ['host0:2222'], 'ps': ['host1:2222', 'host2:2222'], 'worker': ['host3:2222', 'host4:2222', 'host5:2222']} ``` Nodes with task type `worker` can have id 0, 1, 2. Nodes with task type `ps` can have id, 0, 1. So, `task_id` is not unique, but the pair (`task_type`, `task_id`) can uniquely determine a node in the cluster. Global id, i.e., this field, is tracking the index of the node among ALL nodes in the cluster. It is uniquely assigned. For example, for the cluster spec given above, the global ids are assigned as: ``` task_type | task_id | global_id -------------------------------- chief | 0 | 0 worker | 0 | 1 worker | 1 | 2 worker | 2 | 3 ps | 0 | 4 ps | 1 | 5 ``` Returns: An integer id. """ return self._global_id_in_cluster @property def experimental_max_worker_delay_secs(self): return self._experimental_max_worker_delay_secs @property def task_type(self): return self._task_type @property def tf_random_seed(self): return self._tf_random_seed @property def save_summary_steps(self): return self._save_summary_steps @property def save_checkpoints_secs(self): return self._save_checkpoints_secs @property def session_config(self): return self._session_config @property def save_checkpoints_steps(self): return self._save_checkpoints_steps @property def checkpoint_save_graph_def(self): return self._checkpoint_save_graph_def @property def keep_checkpoint_max(self): return self._keep_checkpoint_max @property def session_creation_timeout_secs(self): return self._session_creation_timeout_secs @property def keep_checkpoint_every_n_hours(self): return self._keep_checkpoint_every_n_hours @property def log_step_count_steps(self): return self._log_step_count_steps @property def model_dir(self): return self._model_dir @property def service(self): """Returns the platform defined (in TF_CONFIG) service dict.""" return self._service @property def train_distribute(self): """Optional `tf.distribute.Strategy` for training.""" return self._train_distribute @property def eval_distribute(self): """Optional `tf.distribute.Strategy` for evaluation.""" return self._eval_distribute @property def protocol(self): """Returns the optional protocol value.""" return self._protocol def replace(self, **kwargs): """Returns a new instance of `RunConfig` replacing specified properties. Only the properties in the following list are allowed to be replaced: - `model_dir`, - `tf_random_seed`, - `save_summary_steps`, - `save_checkpoints_steps`, - `save_checkpoints_secs`, - `session_config`, - `keep_checkpoint_max`, - `keep_checkpoint_every_n_hours`, - `log_step_count_steps`, - `train_distribute`, - `device_fn`, - `protocol`. - `eval_distribute`, - `experimental_distribute`, - `experimental_max_worker_delay_secs`, In addition, either `save_checkpoints_steps` or `save_checkpoints_secs` can be set (should not be both). Args: **kwargs: keyword named properties with new values. Raises: ValueError: If any property name in `kwargs` does not exist or is not allowed to be replaced, or both `save_checkpoints_steps` and `save_checkpoints_secs` are set. Returns: a new instance of `RunConfig`. """ return RunConfig._replace( copy.deepcopy(self), allowed_properties_list=_DEFAULT_REPLACEABLE_LIST, **kwargs) @staticmethod def _replace(config, allowed_properties_list=None, **kwargs): """See `replace`. N.B.: This implementation assumes that for key named "foo", the underlying property the RunConfig holds is "_foo" (with one leading underscore). Args: config: The RunConfig to replace the values of. allowed_properties_list: The property name list allowed to be replaced. **kwargs: keyword named properties with new values. Raises: ValueError: If any property name in `kwargs` does not exist or is not allowed to be replaced, or both `save_checkpoints_steps` and `save_checkpoints_secs` are set. Returns: a new instance of `RunConfig`. """ allowed_properties_list = allowed_properties_list or [] for key, new_value in six.iteritems(kwargs): if key in allowed_properties_list: setattr(config, '_' + key, new_value) continue raise ValueError( 'Replacing {} is not supported. Allowed properties are {}.'.format( key, allowed_properties_list)) _validate_save_ckpt_with_replaced_keys(config, kwargs.keys()) _validate_properties(config) return config def _get_model_dir(tf_config, model_dir): """Returns `model_dir` based user provided `tf_config` or `model_dir`.""" # pylint: disable=g-explicit-bool-comparison # Empty string is treated as False in Python condition check, which triggers # some confusing error messages. For example, 'a or b' returns None if a is '' # and b is None. `None` is allowed for model_dir but '' is not allowed. Here, # explicitly check empty string to provide clear error message. if model_dir == '': raise ValueError('model_dir should be non-empty.') model_dir_in_tf_config = tf_config.get('model_dir') if model_dir_in_tf_config == '': raise ValueError('model_dir in TF_CONFIG should be non-empty.') if model_dir_in_tf_config: if model_dir and model_dir_in_tf_config != model_dir: raise ValueError( '`model_dir` provided in RunConfig construct, if set, ' 'must have the same value as the model_dir in TF_CONFIG. ' 'model_dir: {}\nTF_CONFIG["model_dir"]: {}.\n'.format( model_dir, model_dir_in_tf_config)) tf.compat.v1.logging.info('Using model_dir in TF_CONFIG: %s', model_dir_in_tf_config) return model_dir or model_dir_in_tf_config def path_to_str(path): """Returns the file system path representation of a `PathLike` object, else as it is. Args: path: An object that can be converted to path representation. Returns: A `str` object. """ if hasattr(path, '__fspath__'): path = tf.compat.as_str_any(path.__fspath__()) return path ================================================ FILE: tensorflow_estimator/python/estimator/run_config_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """RunConfig tests.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import tensorflow as tf from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow_estimator.python.estimator import run_config as run_config_lib _TEST_DIR = 'test_dir' _MASTER = 'master_' _NOT_SUPPORTED_REPLACE_PROPERTY_MSG = 'Replacing .*is not supported' _SAVE_CKPT_ERR = ( '`save_checkpoints_steps` and `save_checkpoints_secs` cannot be both set.') _MODEL_DIR_ERR = 'model_dir should be non-empty' _MODEL_DIR_TF_CONFIG_ERR = 'model_dir in TF_CONFIG should be non-empty' _MODEL_DIR_MISMATCH_ERR = ( '`model_dir` provided in RunConfig construct, if set, ' 'must have the same value as the model_dir in TF_CONFIG. ') _SAVE_SUMMARY_STEPS_ERR = 'save_summary_steps should be >= 0' _SAVE_CKPT_STEPS_ERR = 'save_checkpoints_steps should be >= 0' _SAVE_CKPT_SECS_ERR = 'save_checkpoints_secs should be >= 0' _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto' _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0' _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0' _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer' _DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".' _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.' _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.' _MISSING_CHIEF_ERR = 'If "cluster" is set .* it must have one "chief" node' _MISSING_TASK_TYPE_ERR = 'If "cluster" is set .* task type must be set' _MISSING_TASK_ID_ERR = 'If "cluster" is set .* task index must be set' _INVALID_TASK_INDEX_ERR = 'is not a valid task_id' _NEGATIVE_TASK_INDEX_ERR = 'Task index must be non-negative number.' _INVALID_TASK_TYPE_ERR = 'is not a valid task_type' _INVALID_TASK_TYPE_FOR_LOCAL_ERR = ( 'If "cluster" is not set in TF_CONFIG, task type must be WORKER.') _INVALID_TASK_INDEX_FOR_LOCAL_ERR = ( 'If "cluster" is not set in TF_CONFIG, task index must be 0.') _INVALID_EVALUATOR_IN_CLUSTER_WITH_MASTER_ERR = ( 'If `master` node exists in `cluster`, task_type `evaluator` is not ' 'supported.') _INVALID_CHIEF_IN_CLUSTER_WITH_MASTER_ERR = ( 'If `master` node exists in `cluster`, job `chief` is not supported.') _INVALID_SERVICE_TYPE_ERR = ( 'If "service" is set in TF_CONFIG, it must be a dict. Given') _EXPERIMENTAL_MAX_WORKER_DELAY_SECS_ERR = ( 'experimental_max_worker_delay_secs must be an integer if set.') _SESSION_CREATION_TIMEOUT_SECS_ERR = ('session_creation_timeout_secs should be ' '> 0') def _create_run_config_with_cluster_spec(tf_config, **kwargs): with tf.compat.v1.test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): return run_config_lib.RunConfig(**kwargs) class RunConfigTest(tf.test.TestCase): def test_default_property_values(self): config = run_config_lib.RunConfig() self.assertIsNone(config.model_dir) self.assertIsNone(config.session_config) self.assertIsNone(config.tf_random_seed) self.assertEqual(100, config.save_summary_steps) self.assertEqual(600, config.save_checkpoints_secs) self.assertIsNone(config.save_checkpoints_steps) self.assertEqual(5, config.keep_checkpoint_max) self.assertEqual(10000, config.keep_checkpoint_every_n_hours) self.assertIsNone(config.service) self.assertIsNone(config.device_fn) self.assertIsNone(config.experimental_max_worker_delay_secs) self.assertEqual(7200, config.session_creation_timeout_secs) self.assertTrue(config.checkpoint_save_graph_def) def test_model_dir(self): empty_config = run_config_lib.RunConfig() self.assertIsNone(empty_config.model_dir) new_config = empty_config.replace(model_dir=_TEST_DIR) self.assertEqual(_TEST_DIR, new_config.model_dir) def test_replace_with_allowed_properties(self): session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) device_fn = lambda op: '/cpu:0' config = run_config_lib.RunConfig().replace( tf_random_seed=11, save_summary_steps=12, save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, keep_checkpoint_every_n_hours=17, device_fn=device_fn, session_creation_timeout_secs=18, checkpoint_save_graph_def=False) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) self.assertEqual(device_fn, config.device_fn) self.assertEqual(18, config.session_creation_timeout_secs) self.assertFalse(config.checkpoint_save_graph_def) def test_replace_none_value(self): config = run_config_lib.RunConfig().replace( tf_random_seed=None, model_dir=None, save_summary_steps=None, save_checkpoints_secs=None, save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, keep_checkpoint_every_n_hours=None, device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) self.assertIsNone(config.save_checkpoints_secs) self.assertIsNone(config.save_checkpoints_steps) self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) self.assertIsNone(config.device_fn) def test_replace_with_disallowallowed_properties(self): config = run_config_lib.RunConfig() with self.assertRaises(ValueError): # tf_random_seed is not allowed to be replaced. config.replace(master='_master') with self.assertRaises(ValueError): config.replace(some_undefined_property=123) def test_replace(self): config = run_config_lib.RunConfig() with self.assertRaisesRegexp(ValueError, _NOT_SUPPORTED_REPLACE_PROPERTY_MSG): # master is not allowed to be replaced. config.replace(master=_MASTER) with self.assertRaisesRegexp(ValueError, _NOT_SUPPORTED_REPLACE_PROPERTY_MSG): config.replace(some_undefined_property=_MASTER) def test_replace_invalid_values(self): config = run_config_lib.RunConfig() with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR): config.replace(model_dir='') with self.assertRaisesRegexp(ValueError, _SAVE_SUMMARY_STEPS_ERR): config.replace(save_summary_steps=-1) with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_STEPS_ERR): config.replace(save_checkpoints_steps=-1) with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_SECS_ERR): config.replace(save_checkpoints_secs=-1) with self.assertRaisesRegexp(ValueError, _SESSION_CONFIG_ERR): config.replace(session_config={}) with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_MAX_ERR): config.replace(keep_checkpoint_max=-1) with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_HOURS_ERR): config.replace(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _SESSION_CREATION_TIMEOUT_SECS_ERR): config.replace(session_creation_timeout_secs=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): config.replace(tf_random_seed=1.0) with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): config.replace(device_fn=lambda x, y: 0) with self.assertRaisesRegexp(ValueError, _EXPERIMENTAL_MAX_WORKER_DELAY_SECS_ERR): config.replace(experimental_max_worker_delay_secs='5') def test_init_with_allowed_properties(self): session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) device_fn = lambda op: '/cpu:0' config = run_config_lib.RunConfig( tf_random_seed=11, save_summary_steps=12, save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, keep_checkpoint_every_n_hours=17, device_fn=device_fn, experimental_max_worker_delay_secs=10) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) self.assertEqual(device_fn, config.device_fn) self.assertEqual(10, config.experimental_max_worker_delay_secs) def test_init_none_value(self): config = run_config_lib.RunConfig( tf_random_seed=None, model_dir=None, save_summary_steps=None, save_checkpoints_secs=None, save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, keep_checkpoint_every_n_hours=None, device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) self.assertIsNone(config.save_checkpoints_secs) self.assertIsNone(config.save_checkpoints_steps) self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) self.assertIsNone(config.device_fn) def test_init_invalid_values(self): with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR): run_config_lib.RunConfig(model_dir='') with self.assertRaisesRegexp(ValueError, _SAVE_SUMMARY_STEPS_ERR): run_config_lib.RunConfig(save_summary_steps=-1) with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_STEPS_ERR): run_config_lib.RunConfig(save_checkpoints_steps=-1) with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_SECS_ERR): run_config_lib.RunConfig(save_checkpoints_secs=-1) with self.assertRaisesRegexp(ValueError, _SESSION_CONFIG_ERR): run_config_lib.RunConfig(session_config={}) with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_MAX_ERR): run_config_lib.RunConfig(keep_checkpoint_max=-1) with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_HOURS_ERR): run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): run_config_lib.RunConfig(tf_random_seed=1.0) with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): run_config_lib.RunConfig(device_fn=lambda x: '/cpu:0') with self.assertRaisesRegexp(ValueError, _EXPERIMENTAL_MAX_WORKER_DELAY_SECS_ERR): run_config_lib.RunConfig(experimental_max_worker_delay_secs='5') def test_incompatible_train_strategy(self): with self.assertRaisesRegex( ValueError, 'Please use `tf.compat.v1.distribut' 'e.experimental.ParameterServerStrategy`'): run_config_lib.RunConfig( train_distribute=tf.compat.v2.distribute.experimental .ParameterServerStrategy.__new__( tf.compat.v2.distribute.experimental.ParameterServerStrategy)) def test_incompatible_eval_strategy(self): with self.assertRaisesRegex( ValueError, 'Please use `tf.compat.v1.distribut' 'e.experimental.ParameterServerStrategy`'): run_config_lib.RunConfig( eval_distribute=tf.compat.v2.distribute.experimental.ParameterServerStrategy .__new__(tf.compat.v2.distribute.experimental.ParameterServerStrategy)) class RunConfigDistributedSettingTest(tf.test.TestCase): def _assert_distributed_properties( self, run_config, expected_cluster_spec, expected_task_type, expected_task_id, expected_master, expected_evaluation_master, expected_is_chief, expected_num_worker_replicas, expected_num_ps_replicas): self.assertEqual(expected_cluster_spec, run_config.cluster_spec.as_dict()) self.assertEqual(expected_task_type, run_config.task_type) self.assertEqual(expected_task_id, run_config.task_id) self.assertEqual(expected_master, run_config.master) self.assertEqual(expected_evaluation_master, run_config.evaluation_master) self.assertEqual(expected_is_chief, run_config.is_chief) self.assertEqual(expected_num_worker_replicas, run_config.num_worker_replicas) self.assertEqual(expected_num_ps_replicas, run_config.num_ps_replicas) def test_default_values(self): self._assert_distributed_properties( run_config=run_config_lib.RunConfig(), expected_cluster_spec={}, expected_task_type=run_config_lib.TaskType.WORKER, expected_task_id=0, expected_master='', expected_evaluation_master='', expected_is_chief=True, expected_num_worker_replicas=1, expected_num_ps_replicas=0) def test_tf_config_for_local(self): tf_config = {'task': {'type': run_config_lib.TaskType.WORKER, 'index': 0}} run_config = _create_run_config_with_cluster_spec(tf_config) self._assert_distributed_properties( run_config=run_config, expected_cluster_spec={}, expected_task_type=run_config_lib.TaskType.WORKER, expected_task_id=0, expected_master='', expected_evaluation_master='', expected_is_chief=True, expected_num_worker_replicas=1, expected_num_ps_replicas=0) self.assertEqual(0, run_config.global_id_in_cluster) self.assertIsNone(run_config.session_config, None) def test_session_master_for_local(self): tf_config = {'session_master': '_my_master'} self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec={}, expected_task_type=run_config_lib.TaskType.WORKER, expected_task_id=0, expected_master='_my_master', expected_evaluation_master='', expected_is_chief=True, expected_num_worker_replicas=1, expected_num_ps_replicas=0) def test_eval_session_master_for_local(self): tf_config = {'eval_session_master': '_my_eval_master'} self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec={}, expected_task_type=run_config_lib.TaskType.WORKER, expected_task_id=0, expected_master='', expected_evaluation_master='_my_eval_master', expected_is_chief=True, expected_num_worker_replicas=1, expected_num_ps_replicas=0) def test_invalid_task_type_for_local(self): tf_config = {'task': {'type': run_config_lib.TaskType.CHIEF, 'index': 0}} with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_FOR_LOCAL_ERR): _create_run_config_with_cluster_spec(tf_config) def test_invalid_task_index_for_local(self): tf_config = {'task': {'type': run_config_lib.TaskType.WORKER, 'index': 1}} with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_FOR_LOCAL_ERR): _create_run_config_with_cluster_spec(tf_config) def test_chief_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec=tf_config['cluster'], expected_task_type=run_config_lib.TaskType.CHIEF, expected_task_id=0, expected_master='grpc://host0:0', expected_evaluation_master='', expected_is_chief=True, expected_num_worker_replicas=4, expected_num_ps_replicas=2) def test_session_master_from_single_node_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 }, 'session_master': '_my_master' } self.assertEqual('_my_master', _create_run_config_with_cluster_spec(tf_config).master) def test_session_master_from_multiple_nodes_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 }, 'session_master': '_my_master' } self.assertEqual('_my_master', _create_run_config_with_cluster_spec(tf_config).master) def test_fail_with_multiple_chief_nodes(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0', 'host:6:6'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, } with self.assertRaisesRegexp(ValueError, _ONE_CHIEF_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_missing_chief_node(self): tf_config = { 'cluster': { run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, } with self.assertRaisesRegexp(ValueError, _MISSING_CHIEF_ERR): _create_run_config_with_cluster_spec(tf_config) def test_single_chief_node(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec=tf_config['cluster'], expected_task_type=run_config_lib.TaskType.CHIEF, expected_task_id=0, expected_master='', expected_evaluation_master='', expected_is_chief=True, expected_num_worker_replicas=1, expected_num_ps_replicas=0) def test_fail_with_missing_task_type_for_distributed(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host3:3'] }, } with self.assertRaisesRegexp(ValueError, _MISSING_TASK_TYPE_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_missing_task_index_for_distributed(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, } } with self.assertRaisesRegexp(ValueError, _MISSING_TASK_ID_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_index_is_too_large(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 1 } } with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_invalid_task_index(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': -1 } } with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_invalid_task_type(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 0 } } with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_ERR): _create_run_config_with_cluster_spec(tf_config) def test_worker_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 1 } } self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec=tf_config['cluster'], expected_task_type=run_config_lib.TaskType.WORKER, expected_task_id=1, expected_master='grpc://host4:4', expected_evaluation_master='', expected_is_chief=False, expected_num_worker_replicas=4, expected_num_ps_replicas=2) def test_ps_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.PS, 'index': 0 } } self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec=tf_config['cluster'], expected_task_type=run_config_lib.TaskType.PS, expected_task_id=0, expected_master='grpc://host1:1', expected_evaluation_master='', expected_is_chief=False, expected_num_worker_replicas=4, expected_num_ps_replicas=2) def test_evaluator_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.EVALUATOR, 'index': 12 } } run_config = _create_run_config_with_cluster_spec(tf_config) self._assert_distributed_properties( run_config=run_config, expected_cluster_spec={}, expected_task_type=run_config_lib.TaskType.EVALUATOR, expected_task_id=12, expected_master='', expected_evaluation_master='', expected_is_chief=False, # evaluator is never chief. expected_num_worker_replicas=0, # evaluator is not in training cluster. expected_num_ps_replicas=0) self.assertIsNone(run_config.global_id_in_cluster) def test_eval_master_for_evaluator(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.EVALUATOR, 'index': 12 }, 'eval_session_master': 'grpc://123', } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual('grpc://123', run_config.evaluation_master) def test_fail_with_invalid_task_index_for_evaluator(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.EVALUATOR, 'index': -1 } } with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR): _create_run_config_with_cluster_spec(tf_config) def test_global_id_in_cluster_for_chief(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'], run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0, }, } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(0, run_config.global_id_in_cluster) def test_global_id_in_cluster_for_worker(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'], run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5'] }, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 2, }, } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(3, run_config.global_id_in_cluster) def test_global_id_in_cluster_for_ps(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'], run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5'] }, 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1, }, } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(5, run_config.global_id_in_cluster) def test_global_id_in_cluster_for_multipe_worker_types(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], 'worker': ['host3:3', 'host4:4', 'host5:5'], 'other_type': ['host3:1', 'host4:2'], run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5'] }, 'task': { 'type': 'other_type', 'index': 1, }, } # Though 'other_type' is defined after 'worker', based on alphabetical # order, the task type order should be 'chief', 'other_type', 'worker', # 'ps', where 'chief' and 'ps' are predefined to be the top and last in the # order list. run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(2, run_config.global_id_in_cluster) class RunConfigDistributedSettingWithMasterTest(tf.test.TestCase): def _assert_distributed_properties( self, run_config, expected_cluster_spec, expected_task_type, expected_task_id, expected_master, expected_evaluation_master, expected_is_chief, expected_num_worker_replicas, expected_num_ps_replicas): self.assertEqual(expected_cluster_spec, run_config.cluster_spec.as_dict()) self.assertEqual(expected_task_type, run_config.task_type) self.assertEqual(expected_task_id, run_config.task_id) self.assertEqual(expected_master, run_config.master) self.assertEqual(expected_evaluation_master, run_config.evaluation_master) self.assertEqual(expected_is_chief, run_config.is_chief) self.assertEqual(expected_num_worker_replicas, run_config.num_worker_replicas) self.assertEqual(expected_num_ps_replicas, run_config.num_ps_replicas) def test_invalid_task_type_for_local(self): tf_config = {'task': {'type': run_config_lib.TaskType.MASTER, 'index': 0}} with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_FOR_LOCAL_ERR): _create_run_config_with_cluster_spec(tf_config) def test_master_node(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 0 } } self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec=tf_config['cluster'], expected_task_type=run_config_lib.TaskType.MASTER, expected_task_id=0, expected_master='grpc://host0:0', expected_evaluation_master='', expected_is_chief=True, expected_num_worker_replicas=4, expected_num_ps_replicas=2) def test_session_master_in_single_node_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 0 }, 'session_master': '_my_master' } self.assertEqual('_my_master', _create_run_config_with_cluster_spec(tf_config).master) def test_session_master_in_multiple_nodes_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 0 }, 'session_master': '_my_master' } self.assertEqual('_my_master', _create_run_config_with_cluster_spec(tf_config).master) def test_fail_with_multiple_master_nodes(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0', 'host:6:6'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, } with self.assertRaisesRegexp(ValueError, _ONE_MASTER_ERR): _create_run_config_with_cluster_spec(tf_config) def test_single_master_node(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 0 } } self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec=tf_config['cluster'], expected_task_type=run_config_lib.TaskType.MASTER, expected_task_id=0, expected_master='', expected_evaluation_master='', expected_is_chief=True, expected_num_worker_replicas=1, expected_num_ps_replicas=0) def test_fail_with_missing_task_type_for_distributed(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host3:3'] }, } with self.assertRaisesRegexp(ValueError, _MISSING_TASK_TYPE_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_missing_task_index_for_distributed(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.MASTER, } } with self.assertRaisesRegexp(ValueError, _MISSING_TASK_ID_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_index_is_too_large(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 1 } } with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_invalid_task_index(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': -1 } } with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_invalid_task_type(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host3:3'] }, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 0 } } with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_ERR): _create_run_config_with_cluster_spec(tf_config) def test_worker_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 1 } } self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec=tf_config['cluster'], expected_task_type=run_config_lib.TaskType.WORKER, expected_task_id=1, expected_master='grpc://host4:4', expected_evaluation_master='', expected_is_chief=False, expected_num_worker_replicas=4, expected_num_ps_replicas=2) def test_ps_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.PS, 'index': 0 } } self._assert_distributed_properties( run_config=_create_run_config_with_cluster_spec(tf_config), expected_cluster_spec=tf_config['cluster'], expected_task_type=run_config_lib.TaskType.PS, expected_task_id=0, expected_master='grpc://host1:1', expected_evaluation_master='', expected_is_chief=False, expected_num_worker_replicas=4, expected_num_ps_replicas=2) def test_fail_with_evaluator(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.EVALUATOR, 'index': 1 } } with self.assertRaisesRegexp(ValueError, _INVALID_EVALUATOR_IN_CLUSTER_WITH_MASTER_ERR): _create_run_config_with_cluster_spec(tf_config) def test_fail_with_chief(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.CHIEF: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1 } } with self.assertRaisesRegexp(ValueError, _INVALID_CHIEF_IN_CLUSTER_WITH_MASTER_ERR): _create_run_config_with_cluster_spec(tf_config) def test_global_id_in_cluster_for_master(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'], run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5'] }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 0, }, } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(0, run_config.global_id_in_cluster) def test_global_id_in_cluster_for_worker(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'], run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5'] }, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 2, }, } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(3, run_config.global_id_in_cluster) def test_global_id_in_cluster_for_ps(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'], run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5'] }, 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1, }, } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(5, run_config.global_id_in_cluster) def test_global_id_in_cluster_for_multipe_worker_types(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], 'worker': ['host3:3', 'host4:4', 'host5:5'], 'other_type': ['host3:1', 'host4:2'], run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5'] }, 'task': { 'type': 'other_type', 'index': 1, }, } # Though 'other_type' is defined after 'worker', based on alphabetical # order, the task type order should be 'chief', 'other_type', 'worker', # 'ps', where 'chief' and 'ps' are predefined to be the top and last in the # order list. run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(2, run_config.global_id_in_cluster) class RunConfigSaveCheckpointsTest(tf.test.TestCase): def test_save_checkpoint(self): empty_config = run_config_lib.RunConfig() self.assertEqual(600, empty_config.save_checkpoints_secs) self.assertIsNone(empty_config.save_checkpoints_steps) config_with_steps = empty_config.replace(save_checkpoints_steps=100) del empty_config self.assertEqual(100, config_with_steps.save_checkpoints_steps) self.assertIsNone(config_with_steps.save_checkpoints_secs) config_with_secs = config_with_steps.replace(save_checkpoints_secs=200) del config_with_steps self.assertEqual(200, config_with_secs.save_checkpoints_secs) self.assertIsNone(config_with_secs.save_checkpoints_steps) def test_save_checkpoint_both_steps_and_secs_are_not_none(self): empty_config = run_config_lib.RunConfig() with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_ERR): empty_config.replace( save_checkpoints_steps=100, save_checkpoints_secs=200) with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_ERR): run_config_lib.RunConfig( save_checkpoints_steps=100, save_checkpoints_secs=200) def test_save_checkpoint_both_steps_and_secs_are_none(self): config_with_secs = run_config_lib.RunConfig() config_without_ckpt = config_with_secs.replace( save_checkpoints_steps=None, save_checkpoints_secs=None) self.assertIsNone(config_without_ckpt.save_checkpoints_steps) self.assertIsNone(config_without_ckpt.save_checkpoints_secs) def test_save_checkpoint_flip_secs_to_none(self): config_with_secs = run_config_lib.RunConfig() config_without_ckpt = config_with_secs.replace(save_checkpoints_secs=None) self.assertIsNone(config_without_ckpt.save_checkpoints_steps) self.assertIsNone(config_without_ckpt.save_checkpoints_secs) def test_save_checkpoint_flip_steps_to_none(self): config_with_steps = run_config_lib.RunConfig().replace( save_checkpoints_steps=100) config_without_ckpt = config_with_steps.replace(save_checkpoints_steps=None) self.assertIsNone(config_without_ckpt.save_checkpoints_steps) self.assertIsNone(config_without_ckpt.save_checkpoints_secs) class RunConfigServiceKeyTest(tf.test.TestCase): def test_arbitrary_key_value_pairs(self): tf_config = { 'service': { 'key1': [1, 2], 'key2': { 'a': 3, 'b': 4 }, 'key3': 789, }, } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual(tf_config['service'], run_config.service) def test_missing_service_key(self): tf_config = { 'model_dir': '/tmp/123', } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertIsNone(run_config.service) def test_fail_with_non_dict(self): tf_config = { 'service': 789, } with self.assertRaisesRegexp(TypeError, _INVALID_SERVICE_TYPE_ERR): _create_run_config_with_cluster_spec(tf_config) class RunConfigModelDirTest(tf.test.TestCase): def test_default(self): run_config = run_config_lib.RunConfig() self.assertIsNone(run_config.model_dir) def test_model_dir_in_constructor(self): run_config = run_config_lib.RunConfig(model_dir='/tmp/123') self.assertEqual('/tmp/123', run_config.model_dir) def test_model_dir_in_tf_config(self): tf_config = { 'model_dir': '/tmp/123', } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertEqual('/tmp/123', run_config.model_dir) def test_model_dir_both_set_in_both_constructor_and_tf_config(self): model_dir = '/tmp/123' tf_config = {'model_dir': model_dir} kwargs = {'model_dir': model_dir} run_config = _create_run_config_with_cluster_spec(tf_config, **kwargs) self.assertEqual('/tmp/123', run_config.model_dir) def test_model_dir_different_in_both_constructor_and_tf_config(self): tf_config = {'model_dir': '/tmp/123'} kwargs = {'model_dir': '/tmp/456'} with self.assertRaisesRegexp(ValueError, _MODEL_DIR_MISMATCH_ERR): _create_run_config_with_cluster_spec(tf_config, **kwargs) def test_fail_with_empty_string_in_constructor(self): with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR): run_config_lib.RunConfig(model_dir='') def test_fail_with_empty_string_in_tf_config(self): with self.assertRaisesRegexp(ValueError, _MODEL_DIR_TF_CONFIG_ERR): tf_config = {'model_dir': ''} _create_run_config_with_cluster_spec(tf_config) class RunConfigSessionConfigTest(tf.test.TestCase): def _assert_equal_session_config(self, session_config, expected_device_filters): rewrite_opts = rewriter_config_pb2.RewriterConfig( meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE) graph_opts = tf.compat.v1.GraphOptions(rewrite_options=rewrite_opts) expected_session_config = tf.compat.v1.ConfigProto( allow_soft_placement=True, graph_options=graph_opts, device_filters=expected_device_filters) self.assertEqual(session_config, expected_session_config) def test_master_session_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 0 } } run_config = _create_run_config_with_cluster_spec(tf_config) self._assert_equal_session_config(run_config.session_config, ['/job:ps', '/job:master']) def test_chief_session_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } run_config = _create_run_config_with_cluster_spec(tf_config) self._assert_equal_session_config(run_config.session_config, ['/job:ps', '/job:chief']) def test_worker_session_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 1 } } run_config = _create_run_config_with_cluster_spec(tf_config) self._assert_equal_session_config(run_config.session_config, ['/job:ps', '/job:worker/task:1']) def test_ps_session_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1 } } run_config = _create_run_config_with_cluster_spec(tf_config) self._assert_equal_session_config( run_config.session_config, ['/job:ps', '/job:worker', '/job:chief', '/job:master']) def test_evaluator_session_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.EVALUATOR, 'index': 0 } } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertIsNone(run_config.session_config) def test_other_type_session_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], 'other_type': ['host3:1', 'host4:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': 'other_type', 'index': 0 } } run_config = _create_run_config_with_cluster_spec(tf_config) self.assertIsNone(run_config.session_config) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tf_estimator_doctest.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Run doctests for tensorflow.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import re import sys import textwrap import tensorflow as tf import numpy as np from absl import flags from absl.testing import absltest import tensorflow_estimator.python.estimator.estimator_lib as tfe import tensorflow.compat.v2 as tf tf.estimator = tfe tf.compat.v1.enable_v2_behavior() # We put doctest after absltest so that it picks up the unittest monkeypatch. # Otherwise doctest tests aren't runnable at all. import doctest # pylint: disable=g-import-not-at-top, g-bad-import-order FLAGS = flags.FLAGS flags.DEFINE_string('module', None, 'A specific module to run doctest on.') flags.DEFINE_boolean('list', None, 'List all the modules in the core package imported.') flags.DEFINE_string('file', None, 'A specific file to run doctest on.') flags.mark_flags_as_mutual_exclusive(['module', 'file']) flags.mark_flags_as_mutual_exclusive(['list', 'file']) PACKAGE = 'tensorflow_estimator.python.' def find_modules(): """Finds all the modules in the core package imported. Returns: A list containing all the modules in tensorflow.python. """ tf_modules = [] for name, module in sys.modules.items(): if name.startswith(PACKAGE): tf_modules.append(module) return tf_modules def filter_on_submodules(all_modules, submodule): """Filters all the modules based on the module flag. The module flag has to be relative to the core package imported. For example, if `submodule=keras.layers` then, this function will return all the modules in the submodule. Args: all_modules: All the modules in the core package. submodule: Submodule to filter from all the modules. Returns: All the modules in the submodule. """ filtered_modules = [ mod for mod in all_modules if PACKAGE + submodule in mod.__name__ ] return filtered_modules def get_module_and_inject_docstring(file_path): """Replaces the docstring of the module with the changed file's content. Args: file_path: Path to the file Returns: A list containing the module changed by the file. """ file_path = os.path.abspath(file_path) mod_index = file_path.find(PACKAGE.replace('.', os.sep)) file_mod_name, _ = os.path.splitext(file_path[mod_index:]) file_module = sys.modules[file_mod_name.replace(os.sep, '.')] with open(file_path, 'r') as f: content = f.read() file_module.__doc__ = content return [file_module] class TfTestCase(tf.test.TestCase): def set_up(self, test): self.setUp() def tear_down(self, test): self.tearDown() class CustomOutputChecker(doctest.OutputChecker): """Changes the `want` and `got` strings. This allows it to be customized before they are compared. """ ID_RE = re.compile(r'\bid=(\d+)\b') ADDRESS_RE = re.compile(r'\bat 0x[0-9a-f]*?>') def check_output(self, want, got, optionflags): # Replace tf.Tensor's id with ellipsis(...) because tensor's id can change # on each execution. Users may forget to use ellipsis while writing # examples in docstrings, so replacing the id with `...` makes it safe. want = self.ID_RE.sub('id=...', want) want = self.ADDRESS_RE.sub('at ...>', want) return doctest.OutputChecker.check_output(self, want, got, optionflags) _MESSAGE = textwrap.dedent("""\n ############################################################# Check the documentation (go/testable-docstrings) on how to write testable docstrings. #############################################################""") def output_difference(self, example, got, optionflags): got = got + self._MESSAGE return doctest.OutputChecker.output_difference(self, example, got, optionflags) def load_tests(unused_loader, tests, unused_ignore): """Loads all the tests in the docstrings and runs them.""" tf_modules = find_modules() if FLAGS.module: tf_modules = filter_on_submodules(tf_modules, FLAGS.module) if FLAGS.list: print('**************************************************') for mod in tf_modules: print(mod.__name__) print('**************************************************') return tests if FLAGS.file: tf_modules = get_module_and_inject_docstring(FLAGS.file) for module in tf_modules: testcase = TfTestCase() tests.addTests( doctest.DocTestSuite( module, test_finder=doctest.DocTestFinder(exclude_empty=False), extraglobs={ 'tf': tf, 'np': np, 'os': os }, setUp=testcase.set_up, tearDown=testcase.tear_down, checker=CustomOutputChecker(), optionflags=(doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL | doctest.DONT_ACCEPT_BLANKLINE), )) return tests if __name__ == '__main__': absltest.main() ================================================ FILE: tensorflow_estimator/python/estimator/tools/__init__.py ================================================ ================================================ FILE: tensorflow_estimator/python/estimator/tools/analytics.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Analytics helpers library.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function def track_usage(tool_id, tags): """No usage tracking for external library. Args: tool_id: A string identifier for tool to be tracked. tags: list of string tags that will be added to the tracking. """ del tool_id, tags # Unused externally. def track_numerical_issues(exc_info): """No tracking for external library. Args: exc_info: Output from `sys.exc_info` (type, value, traceback) """ del exc_info ================================================ FILE: tensorflow_estimator/python/estimator/tools/checkpoint_converter.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== r"""Checkpoint converter for Canned Estimators in TF 1.x. This checkpoint converter tool is mainly for Canned Estimators, including DNN Linear and DNNLinearCombined estimators. The allowed optimizers to be converted include Adam, Adagrad, Ftrl, RMSProp, and SGD. Note that, this converter is not suitable for the case where 'dnn_optimizer' and 'linear_optimizer' in DNNLinearCombined model are the same. If your current canned estimators and checkpoints are from TF 1.x, after you migrate the canned estimator to v2 with `tf_keras.optimizers.*`, the converted checkpoint allow you to restore and retrain the model in TF 2.0. Usage: python checkpoint_convert.py '/path/to/checkpoint' '/path/to/graph.pbtxt' \ '/path/to/new_checkpoint' For example, if there is a V1 checkpoint to be converted and the files include: /tmp/my_checkpoint/model.ckpt-100.data-00000-of-00001 /tmp/my_checkpoint/model.ckpt-100.index /tmp/my_checkpoint/model.ckpt-100.meta /tmp/my_checkpoint/graph.pbtxt use the following command: mkdir /tmp/my_converted_checkpoint && python checkpoint_convert.py \ /tmp/my_checkpoint/model.ckpt-100 /tmp/my_checkpoint/graph.pbtxt \ /tmp/my_converted_checkpoint/model.ckpt-100 This will generate three converted checkpoint files corresponding to the three old checkpoint files in the new directory: /tmp/my_converted_checkpoint/model.ckpt-100.data-00000-of-00001 /tmp/my_converted_checkpoint/model.ckpt-100.index /tmp/my_converted_checkpoint/model.ckpt-100.meta """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse import sys import tensorflow as tf from google.protobuf import text_format from tensorflow_estimator.python.estimator.util import tf_keras # Optimizer name mapping from v1 to v2. OPT_NAME_V1_TO_V2 = { 'Adagrad': 'Adagrad', 'RMSProp': 'RMSprop', 'Ftrl': 'Ftrl', 'Adam': 'Adam', 'SGD': 'SGD', } # Hyper-paratmeters of optimizer in checkpoint. HP_IN_CKPT = { 'Adam': { 'beta1_power': 'training/Adam/beta_1', 'beta2_power': 'training/Adam/beta_2', }, } # Optimzier variable name mapping from v1 to v2. OPT_VAR_NAME_V1_TO_V2 = { 'Adam': { 'Adam': 'm', 'Adam_1': 'v', }, 'Ftrl': { 'Ftrl': 'accumulator', 'Ftrl_1': 'linear', }, 'RMSProp': { 'RMSProp': 'rms', 'RMSProp_1': None, }, 'Adagrad': { 'Adagrad': 'accumulator', }, } # Hyper-paratmeters of optimizer in graph. HP_IN_GRAPH = { 'Adam': ['decay', 'learning_rate'], 'Ftrl': [ 'decay', 'l1_regularization_strength', 'l2_regularization_strength', 'beta', 'learning_rate', 'learning_rate_power' ], 'RMSProp': ['decay', 'learning_rate', 'momentum', 'rho'], 'Adagrad': ['decay', 'learning_rate'], 'SGD': ['decay', 'learning_rate', 'momentum'], } # optimizer v2 instance. OPT_V2_INSTANCE = { 'Adagrad': tf_keras.optimizers.legacy.Adagrad(), 'Adam': tf_keras.optimizers.legacy.Adam(), 'Ftrl': tf_keras.optimizers.legacy.Ftrl(), 'RMSProp': tf_keras.optimizers.legacy.RMSprop(), 'SGD': tf_keras.optimizers.legacy.SGD(), } def _add_new_variable(initial_value, var_name_v2, var_name_v1, var_map, var_names_map): """Creates a new variable and add it to the variable maps.""" var = tf.Variable(initial_value, name=var_name_v2) var_map[var_name_v2] = var var_names_map[var_name_v2] = var_name_v1 def _add_opt_variable(opt_name_v2, var_name_v1, idx, suffix_v2, reader, var_map, var_names_map): """Adds a new optimizer v2 variable.""" var_name_v2 = 'training/' + opt_name_v2 + '/' + var_name_v1[:idx] + suffix_v2 tensor = reader.get_tensor(var_name_v1) _add_new_variable(tensor, var_name_v2, var_name_v1, var_map, var_names_map) def _convert_variables_in_ckpt(opt_name_v1, reader, variable_names, var_map, var_names_map, est_type): """Converts all variables in checkpoint from v1 to v2.""" global_step = None hp_ckpt = None # Global step is needed for Adam for hyper parameter conversion. if opt_name_v1 == 'Adam': global_step = reader.get_tensor('global_step') if opt_name_v1 in HP_IN_CKPT: hp_ckpt = HP_IN_CKPT[opt_name_v1] opt_name_v2 = OPT_NAME_V1_TO_V2[opt_name_v1] # For variables with equivalent mapping in checkpoint. There are three types: # 1) Hyper parameters. This is mainly for Adam optimizer. # 2) Optimizer variables. # 3) Model variables. for var_name in variable_names: # If a hyper parameter variable is in the checkpoint. if hp_ckpt and any(hp_name in var_name for hp_name in hp_ckpt): for hp_name in hp_ckpt: if hp_name in var_name: var_name_v2 = hp_ckpt[hp_name] tensor = reader.get_tensor(var_name) # For Adam optimizer, in the old checkpoint, the optimizer variables # are beta1_power and beta2_power. The corresponding variables in the # new checkpoint are beta_1 and beta_2, and # beta_1 = pow(beta1_power, 1/global_step) # beta_2 = pow(beta2_power, 1/global_step) tensor = tf.math.pow(tensor, 1.0 / global_step) _add_new_variable(tensor, var_name_v2, var_name, var_map, var_names_map) break # If it's an optimizer variable. elif opt_name_v1 in var_name: suffix_mapping = OPT_VAR_NAME_V1_TO_V2[opt_name_v1] suffix_v1 = var_name.rsplit('/')[-1] suffix_v2 = suffix_mapping[suffix_v1] if suffix_v2: # For DNN model. if est_type == 'dnn': # The optimizer variable of DNN model in TF 1.x has 't_0' in its # name (b/131719899). This is amended in TF 2.0. idx = var_name.rfind('t_0') _add_opt_variable(opt_name_v2, var_name, idx, suffix_v2, reader, var_map, var_names_map) # for Linear model. elif est_type == 'linear': # The optimizer variable of Linear model in TF 1.x has 'part_0' in its # name (b/131719899). This is amended in TF 2.0. idx = var_name.rfind('part_0') _add_opt_variable(opt_name_v2, var_name, idx, suffix_v2, reader, var_map, var_names_map) # for DNNLinearCombined model. else: idx = var_name.rfind(suffix_v1) _add_opt_variable(opt_name_v2, var_name, idx, suffix_v2, reader, var_map, var_names_map) # If it's a model variable which is already backward compatible. else: tensor = reader.get_tensor(var_name) _add_new_variable(tensor, var_name, var_name, var_map, var_names_map) def _convert_hyper_params_in_graph(graph_from_path, opt_name_v1, var_map, var_names_map): """Generates hyper parameters for optimizer v2 from graph.pbtxt.""" with tf.io.gfile.GFile(graph_from_path) as f: graph_def = text_format.Parse(f.read(), tf.compat.v1.GraphDef()) # In keras optimizer, the hyper parameters are also stored in the checkpoint, # while v1 checkpoint doesn't contain any hyper parameters. For the # hyper parameter variables, there are two cases: # 1) The hyper parameter exist in the graph. # If so, the hyper parameter value needs to be extracted from the graph # node. # 2) The hyper parameter doesn't exist in the graph. # The value of the hyper parameter is set as the default value from the # config. nodes_full = HP_IN_GRAPH[opt_name_v1] nodes_in_graph = [] opt_name_v2 = OPT_NAME_V1_TO_V2[opt_name_v1] tf.compat.v1.logging.info('For hyper parameter variables that are in Graph:') for node in graph_def.node: node_name = node.name.rsplit('/')[-1] # For case 1), if the hyper parameter of the keras optimizer can be found # in the graph, the graph node value is extracted as the hyper parameter # variable value, and added to the new variable list. if opt_name_v1 + '/' + node_name in nodes_full: hp_value = node.attr.get('value').tensor.float_val[0] hp_name_v2 = 'training/' + opt_name_v2 + '/' + node_name tf.compat.v1.logging.info( 'Hyper parameter {} with value {} found in Graph.'.format( hp_name_v2, hp_value)) _add_new_variable(hp_value, hp_name_v2, node_name, var_map, var_names_map) # Adds this node to nodes_in_graph nodes_in_graph.append(node_name) # For case 2), if the hyper parameter is not in graph, we need to add it # manually. The tensor value is its default value from optimizer v2 config. nodes_not_in_graph = sorted(list(set(nodes_full) - set(nodes_in_graph))) opt_v2_config = OPT_V2_INSTANCE[opt_name_v1].get_config() tf.compat.v1.logging.info( 'For hyper parameter variables that are NOT in Graph:') for node_name in nodes_not_in_graph: hp_name_v2 = 'training/' + opt_name_v2 + '/' + node_name tf.compat.v1.logging.info( 'Hyper parameter {} with default value {} is added.'.format( hp_name_v2, opt_v2_config[node_name])) _add_new_variable(opt_v2_config[node_name], hp_name_v2, node_name, var_map, var_names_map) def convert_checkpoint(estimator_type, source_checkpoint, source_graph, target_checkpoint): """Converts checkpoint from TF 1.x to TF 2.0 for CannedEstimator. Args: estimator_type: The type of estimator to be converted. So far, the allowed args include 'dnn', 'linear', and 'combined'. source_checkpoint: Path to the source checkpoint file to be read in. source_graph: Path to the source graph file to be read in. target_checkpoint: Path to the target checkpoint to be written out. """ with tf.Graph().as_default(): # Get v1 optimizer names and it's corresponding variable name reader = tf.compat.v1.train.NewCheckpointReader(source_checkpoint) variable_names = sorted(reader.get_variable_to_shape_map()) opt_names_v1 = {} for var_name in variable_names: for opt_name in OPT_NAME_V1_TO_V2: if opt_name in var_name: opt_names_v1[opt_name] = var_name # SGD doesn't appear in optimizer variables, so we need to add it manually # if no optimizer is found in checkpoint for DNN or Linear model. if not opt_names_v1: if estimator_type == 'dnn' or estimator_type == 'linear': opt_names_v1['SGD'] = '' # As the case is not handled in the converter if dnn_optimizer and # linear_optimizer in DNNLinearCombined model are the same, an error is # is raised if two SGD optimizers are used in DNNLinearCombined model. elif estimator_type == 'combined': raise ValueError('Two `SGD` optimizers are used in DNNLinearCombined ' 'model, and this is not handled by the checkpoint ' 'converter.') # A dict mapping from v2 variable name to the v2 variable. var_map = {} # A dict mapping from v2 variable name to v1 variable name. var_names_map = {} # Determine the names of dnn_optimizer and linear_optimizer in # DNNLinearCombined model. if estimator_type == 'combined': linear_opt_v1 = None if len(opt_names_v1) == 1: # When one of the optimizer is 'SGD'. key = list(opt_names_v1.keys())[0] # Case 1: linear_optimizer is non-SGD, and dnn_optimizer is SGD. if opt_names_v1[key].startswith('linear/linear_model/'): linear_opt_v1 = key # Case 2: linear_optimizer is SGD, and dnn_optimizer is non-SGD. if not linear_opt_v1: linear_opt_v1 = 'SGD' opt_names_v1['SGD'] = '' else: # two non-SGD optimizers for key in opt_names_v1: if opt_names_v1[key].startswith('linear/linear_model/'): linear_opt_v1 = key # Add the 'iter' hyper parameter to the new checkpoint for # linear_optimizer. Note dnn_optimizer uses global_step. tensor = reader.get_tensor('global_step') var_name_v2 = 'training/' + OPT_NAME_V1_TO_V2[linear_opt_v1] + '/iter' var_name_v1 = 'global_step' _add_new_variable(tensor, var_name_v2, var_name_v1, var_map, var_names_map) for opt_name_v1 in opt_names_v1: # Convert all existing variables from checkpoint. _convert_variables_in_ckpt(opt_name_v1, reader, variable_names, var_map, var_names_map, estimator_type) # Convert hyper parameters for optimizer v2 from the graph. _convert_hyper_params_in_graph(source_graph, opt_name_v1, var_map, var_names_map) # Log the variable mapping from opt v1 to v2. tf.compat.v1.logging.info( '<----- Variable names converted (v1 --> v2): ----->') for name_v2 in var_names_map: tf.compat.v1.logging.info('%s --> %s' % (var_names_map[name_v2], name_v2)) # Save to checkpoint v2. saver = tf.compat.v1.train.Saver(var_list=var_map) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.initializers.global_variables()) tf.compat.v1.logging.info('Writing checkpoint_to_path %s' % target_checkpoint) saver.save(sess, target_checkpoint) def main(_): convert_checkpoint( FLAGS.estimator_type, FLAGS.source_checkpoint, FLAGS.source_graph, FLAGS.target_checkpoint, ) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( 'estimator_type', type=str, choices=['dnn', 'linear', 'combined'], help='The type of estimator to be converted. So far, the checkpoint ' 'converter only supports Canned Estimator. So the allowed types ' 'include linear, dnn and combined.') parser.add_argument( 'source_checkpoint', type=str, help='Path to source checkpoint file to be read in.') parser.add_argument( 'source_graph', type=str, help='Path to source graph file to be read in.') parser.add_argument( 'target_checkpoint', type=str, help='Path to checkpoint file to be written out.') FLAGS, unparsed = parser.parse_known_args() tf.compat.v1.app.run(main=main, argv=[sys.argv[0]] + unparsed) ================================================ FILE: tensorflow_estimator/python/estimator/tools/checkpoint_converter_test.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for checkpoint_converter.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import shutil import numpy as np import tensorflow as tf from tensorflow.python.feature_column import feature_column from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import dnn_linear_combined from tensorflow_estimator.python.estimator.canned import head as head_lib from tensorflow_estimator.python.estimator.canned import linear from tensorflow_estimator.python.estimator.head import regression_head from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.tools import checkpoint_converter class DNNCheckpointConverterTest(tf.test.TestCase): def setUp(self): self._old_ckpt_dir = os.path.join(self.get_temp_dir(), 'source_ckpt') self._new_ckpt_dir = os.path.join(self.get_temp_dir(), 'target_ckpt') def tearDown(self): if os.path.exists(self._old_ckpt_dir): tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._old_ckpt_dir) if os.path.exists(self._new_ckpt_dir): tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._new_ckpt_dir) def _test_ckpt_converter(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, optimizer): # Create checkpoint in CannedEstimator v1. feature_columns_v1 = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] est_v1 = dnn.DNNEstimator( head=head_lib._regression_head(label_dimension=label_dimension), hidden_units=(2, 2), feature_columns=feature_columns_v1, model_dir=self._old_ckpt_dir, optimizer=optimizer) # Train num_steps = 10 est_v1.train(train_input_fn, steps=num_steps) self.assertIsNotNone(est_v1.latest_checkpoint()) self.assertTrue(est_v1.latest_checkpoint().startswith(self._old_ckpt_dir)) # Convert checkpoint from v1 to v2. source_checkpoint = os.path.join(self._old_ckpt_dir, 'model.ckpt-10') source_graph = os.path.join(self._old_ckpt_dir, 'graph.pbtxt') target_checkpoint = os.path.join(self._new_ckpt_dir, 'model.ckpt-10') checkpoint_converter.convert_checkpoint('dnn', source_checkpoint, source_graph, target_checkpoint) # Create CannedEstimator V2 and restore from the converted checkpoint. feature_columns_v2 = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est_v2 = dnn.DNNEstimatorV2( head=regression_head.RegressionHead(label_dimension=label_dimension), hidden_units=(2, 2), feature_columns=feature_columns_v2, model_dir=self._new_ckpt_dir, optimizer=optimizer) # Train extra_steps = 10 est_v2.train(train_input_fn, steps=extra_steps) self.assertIsNotNone(est_v2.latest_checkpoint()) self.assertTrue(est_v2.latest_checkpoint().startswith(self._new_ckpt_dir)) # Make sure estimator v2 restores from the converted checkpoint, and # continues training extra steps. self.assertEqual( num_steps + extra_steps, est_v2.get_variable_value(tf.compat.v1.GraphKeys.GLOBAL_STEP)) def _create_input_fn(self, label_dimension, batch_size): """Creates input_fn for integration test.""" data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) return train_input_fn, eval_input_fn, predict_input_fn def _test_ckpt_converter_with_an_optimizer(self, opt): """Tests checkpoint converter with an optimizer.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_ckpt_converter( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, optimizer=opt) def test_ckpt_converter_with_adagrad(self): """Tests checkpoint converter with Adagrad.""" self._test_ckpt_converter_with_an_optimizer('Adagrad') def test_ckpt_converter_with_rmsprop(self): """Tests checkpoint converter with RMSProp.""" self._test_ckpt_converter_with_an_optimizer('RMSProp') def test_ckpt_converter_with_ftrl(self): """Tests checkpoint converter with Ftrl.""" self._test_ckpt_converter_with_an_optimizer('Ftrl') def test_ckpt_converter_with_adam(self): """Tests checkpoint converter with Adam.""" self._test_ckpt_converter_with_an_optimizer('Adam') def test_ckpt_converter_with_sgd(self): """Tests checkpoint converter with SGD.""" self._test_ckpt_converter_with_an_optimizer('SGD') class LinearCheckpointConverterTest(tf.test.TestCase): def setUp(self): self._old_ckpt_dir = os.path.join(self.get_temp_dir(), 'source_ckpt') self._new_ckpt_dir = os.path.join(self.get_temp_dir(), 'target_ckpt') def tearDown(self): if os.path.exists(self._old_ckpt_dir): tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._old_ckpt_dir) if os.path.exists(self._new_ckpt_dir): tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._new_ckpt_dir) def _test_ckpt_converter(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, optimizer): # Create checkpoint in CannedEstimator v1. feature_columns_v1 = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] est_v1 = linear.LinearEstimator( head=head_lib._regression_head(label_dimension=label_dimension), feature_columns=feature_columns_v1, model_dir=self._old_ckpt_dir, optimizer=optimizer) # Train num_steps = 10 est_v1.train(train_input_fn, steps=num_steps) self.assertIsNotNone(est_v1.latest_checkpoint()) self.assertTrue(est_v1.latest_checkpoint().startswith(self._old_ckpt_dir)) # Convert checkpoint from v1 to v2. source_checkpoint = os.path.join(self._old_ckpt_dir, 'model.ckpt-10') source_graph = os.path.join(self._old_ckpt_dir, 'graph.pbtxt') target_checkpoint = os.path.join(self._new_ckpt_dir, 'model.ckpt-10') checkpoint_converter.convert_checkpoint('linear', source_checkpoint, source_graph, target_checkpoint) # Create CannedEstimator V2 and restore from the converted checkpoint. feature_columns_v2 = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est_v2 = linear.LinearEstimatorV2( head=regression_head.RegressionHead(label_dimension=label_dimension), feature_columns=feature_columns_v2, model_dir=self._new_ckpt_dir, optimizer=optimizer) # Train extra_steps = 10 est_v2.train(train_input_fn, steps=extra_steps) self.assertIsNotNone(est_v2.latest_checkpoint()) self.assertTrue(est_v2.latest_checkpoint().startswith(self._new_ckpt_dir)) # Make sure estimator v2 restores from the converted checkpoint, and # continues training extra steps. self.assertEqual( num_steps + extra_steps, est_v2.get_variable_value(tf.compat.v1.GraphKeys.GLOBAL_STEP)) def _create_input_fn(self, label_dimension, batch_size): """Creates input_fn for integration test.""" data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) return train_input_fn, eval_input_fn, predict_input_fn def _test_ckpt_converter_with_an_optimizer(self, opt): """Tests checkpoint converter with an optimizer.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_ckpt_converter( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, optimizer=opt) def test_ckpt_converter_with_adagrad(self): """Tests checkpoint converter with Adagrad.""" self._test_ckpt_converter_with_an_optimizer('Adagrad') def test_ckpt_converter_with_rmsprop(self): """Tests checkpoint converter with RMSProp.""" self._test_ckpt_converter_with_an_optimizer('RMSProp') def test_ckpt_converter_with_ftrl(self): """Tests checkpoint converter with Ftrl.""" self._test_ckpt_converter_with_an_optimizer('Ftrl') def test_ckpt_converter_with_adam(self): """Tests checkpoint converter with Adam.""" self._test_ckpt_converter_with_an_optimizer('Adam') def test_ckpt_converter_with_sgd(self): """Tests checkpoint converter with SGD.""" self._test_ckpt_converter_with_an_optimizer('SGD') class DNNLinearCombinedCheckpointConverterTest(tf.test.TestCase): def setUp(self): self._old_ckpt_dir = os.path.join(self.get_temp_dir(), 'source_ckpt') self._new_ckpt_dir = os.path.join(self.get_temp_dir(), 'target_ckpt') def tearDown(self): if os.path.exists(self._old_ckpt_dir): tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._old_ckpt_dir) if os.path.exists(self._new_ckpt_dir): tf.compat.v1.summary.FileWriterCache.clear() shutil.rmtree(self._new_ckpt_dir) def _test_ckpt_converter(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, dnn_optimizer, linear_optimizer): # Create checkpoint in CannedEstimator v1. linear_feature_columns_v1 = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns_v1 = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] est_v1 = dnn_linear_combined.DNNLinearCombinedEstimator( head=head_lib._regression_head(label_dimension=label_dimension), linear_feature_columns=linear_feature_columns_v1, dnn_feature_columns=dnn_feature_columns_v1, dnn_hidden_units=(2, 2), model_dir=self._old_ckpt_dir, dnn_optimizer=dnn_optimizer, linear_optimizer=linear_optimizer) # Train num_steps = 10 est_v1.train(train_input_fn, steps=num_steps) self.assertIsNotNone(est_v1.latest_checkpoint()) self.assertTrue(est_v1.latest_checkpoint().startswith(self._old_ckpt_dir)) # Convert checkpoint from v1 to v2. source_checkpoint = os.path.join(self._old_ckpt_dir, 'model.ckpt-10') source_graph = os.path.join(self._old_ckpt_dir, 'graph.pbtxt') target_checkpoint = os.path.join(self._new_ckpt_dir, 'model.ckpt-10') checkpoint_converter.convert_checkpoint('combined', source_checkpoint, source_graph, target_checkpoint) # Create CannedEstimator V2 and restore from the converted checkpoint. linear_feature_columns_v2 = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns_v2 = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est_v2 = dnn_linear_combined.DNNLinearCombinedEstimatorV2( head=regression_head.RegressionHead(label_dimension=label_dimension), linear_feature_columns=linear_feature_columns_v2, dnn_feature_columns=dnn_feature_columns_v2, dnn_hidden_units=(2, 2), model_dir=self._new_ckpt_dir, dnn_optimizer=dnn_optimizer, linear_optimizer=linear_optimizer) # Train extra_steps = 10 est_v2.train(train_input_fn, steps=extra_steps) self.assertIsNotNone(est_v2.latest_checkpoint()) self.assertTrue(est_v2.latest_checkpoint().startswith(self._new_ckpt_dir)) # Make sure estimator v2 restores from the converted checkpoint, and # continues training extra steps. self.assertEqual( num_steps + extra_steps, est_v2.get_variable_value(tf.compat.v1.GraphKeys.GLOBAL_STEP)) def _create_input_fn(self, label_dimension, batch_size): """Creates input_fn for integration test.""" data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # learn y = x train_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) return train_input_fn, eval_input_fn, predict_input_fn def _test_ckpt_converter_with_an_optimizer(self, dnn_opt, linear_opt): """Tests checkpoint converter with an optimizer.""" label_dimension = 2 batch_size = 10 train_input_fn, eval_input_fn, predict_input_fn = self._create_input_fn( label_dimension, batch_size) self._test_ckpt_converter( train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, input_dimension=label_dimension, label_dimension=label_dimension, batch_size=batch_size, dnn_optimizer=dnn_opt, linear_optimizer=linear_opt) def test_ckpt_converter_with_adagrad(self): """Tests checkpoint converter with Adagrad.""" self._test_ckpt_converter_with_an_optimizer('Adagrad', 'RMSProp') def test_ckpt_converter_with_rmsprop(self): """Tests checkpoint converter with RMSProp.""" self._test_ckpt_converter_with_an_optimizer('RMSProp', 'Ftrl') def test_ckpt_converter_with_ftrl(self): """Tests checkpoint converter with Ftrl.""" self._test_ckpt_converter_with_an_optimizer('Ftrl', 'Adam') def test_ckpt_converter_with_adam(self): """Tests checkpoint converter with Adam.""" self._test_ckpt_converter_with_an_optimizer('Adam', 'SGD') def test_ckpt_converter_with_sgd(self): """Tests checkpoint converter with SGD.""" self._test_ckpt_converter_with_an_optimizer('SGD', 'Adagrad') if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/BUILD ================================================ # Description: TPUEstimator # Placeholder: load py_library # INTERNAL TEST RULE PLACEHOLDER load("//tensorflow_estimator:estimator.bzl", "py_test", "tpu_py_test") licenses(["notice"]) package( default_visibility = [ "//tensorflow_estimator:internal", "//third_party/tensorflow:__subpackages__", ], ) py_library( name = "tpu_estimator", srcs = [ "_tpu_estimator_embedding.py", "error_handling.py", "iteration_count_estimator.py", "tpu_config.py", "tpu_context.py", "tpu_estimator.py", "util.py", ], srcs_version = "PY3", deps = [ "//tensorflow_estimator/python/estimator", "//tensorflow_estimator/python/estimator:analytics_tools", "//tensorflow_estimator/python/estimator:estimator_export", "//tensorflow_estimator/python/estimator:expect_six_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//tensorflow_estimator/python/estimator:export_output", "//tensorflow_estimator/python/estimator:model_fn", "//tensorflow_estimator/python/estimator:run_config", ], ) py_test( name = "tpu_config_test", size = "small", srcs = ["tpu_config_test.py"], python_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "error_handling_test", size = "small", srcs = ["error_handling_test.py"], python_version = "PY3", deps = [ ":tpu_estimator", ], ) py_test( name = "tpu_estimator_signals_test", size = "small", srcs = ["tpu_estimator_signals_test.py"], python_version = "PY3", # TODO(jhseu): Remove. Fails in OSS on Python 3. tags = [ "no_oss", ], deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) tpu_py_test( name = "tpu_estimator_test", size = "medium", timeout = "long", srcs = ["tpu_estimator_test.py"], args = [ "--test_num_shards=2", ], disable_experimental = True, shard_count = 2, srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//third_party/py/absl/flags", ], ) tpu_py_test( name = "tpu_estimator_embedding_test", size = "medium", timeout = "long", srcs = [ "tpu_estimator_embedding_test.py", ], args = [ "--test_num_shards=2", ], # TODO(b/140117863): Hanging, then timeout disable_experimental = True, shard_count = 4, srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//third_party/py/absl/flags", ], ) tpu_py_test( name = "tpu_estimator_evaluation_test", size = "medium", timeout = "long", srcs = ["tpu_estimator_evaluation_test.py"], args = [ "--test_num_shards=2", ], disable_experimental = True, shard_count = 2, srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//third_party/py/absl/flags", ], ) tpu_py_test( name = "tpu_estimator_export_test", size = "medium", srcs = ["tpu_estimator_export_test.py"], args = [ "--test_num_shards=2", ], disable_experimental = True, shard_count = 2, srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) tpu_py_test( name = "tpu_estimator_gradients_test", size = "medium", srcs = [ "tpu_estimator_gradients_test.py", ], args = [ "--test_num_shards=2", "--xla_jf_conv_full_precision=true", ], # TODO(b/140117863): Fatal error from hardware disable_experimental = True, disable_mlir_bridge = False, shard_count = 2, srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) tpu_py_test( name = "tpu_estimator_input_v2_test", size = "medium", srcs = ["tpu_estimator_input_v2_test.py"], disable_experimental = True, srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) tpu_py_test( name = "tpu_estimator_integration_test", size = "medium", srcs = ["tpu_estimator_integration_test.py"], args = [ "--test_num_shards=2", ], disable_experimental = True, srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) tpu_py_test( name = "tpu_estimator_model_parallelism_test", size = "medium", srcs = ["tpu_estimator_model_parallelism_test.py"], args = [ ], disable_experimental = True, srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) py_test( name = "autotuning_iterations_per_loop_test", size = "small", srcs = ["autotuning_iterations_per_loop_test.py"], python_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", ], ) tpu_py_test( name = "tpu_enqueue_sequence_test", size = "medium", srcs = ["tpu_enqueue_sequence_test.py"], disable_experimental = True, python_version = "PY3", srcs_version = "PY3", deps = [ ":tpu_estimator", "//tensorflow_estimator/python/estimator:expect_absl_installed", "//tensorflow_estimator/python/estimator:expect_tensorflow_installed", "//third_party/tensorflow/contrib/summary", ], ) ================================================ FILE: tensorflow_estimator/python/estimator/tpu/__init__.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= """TPUEstimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function ================================================ FILE: tensorflow_estimator/python/estimator/tpu/_tpu_estimator_embedding.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== """Tooling for support TPU embedding in TPUEstimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import tensorflow as tf from tensorflow.python.feature_column import feature_column as core_fc from tensorflow.python.feature_column import feature_column_lib as core_fc_lib from tensorflow.python.feature_column import utils as fc_utils from tensorflow.python.framework import ops from tensorflow.python.tpu import feature_column as tpu_fc from tensorflow.python.tpu import feature_column_v2 as tpu_fc_v2 from tensorflow.python.tpu import tpu_embedding from tensorflow.python.tpu.tpu_embedding import AdagradParameters from tensorflow.python.tpu.tpu_embedding import AdamParameters from tensorflow.python.tpu.tpu_embedding import FtrlParameters from tensorflow.python.tpu.tpu_embedding import MomentumParameters from tensorflow.python.tpu.tpu_embedding import ProximalAdagradParameters from tensorflow.python.tpu.tpu_embedding import RMSPropParameters from tensorflow.python.tpu.tpu_embedding import StochasticGradientDescentParameters from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.estimator_export import estimator_export # pylint: disable=protected-access _TPU_EMBEDDING_COLUMN_CLASSES = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn, tpu_fc_v2._TPUEmbeddingColumnV2, tpu_fc_v2._TPUSharedEmbeddingColumnV2) _TPU_DEVICE_SPECIFIC_EMBEDDING_COLUMNS = ( tpu_fc_v2._TPUDeviceSpecificEmbeddingColumnV2, tpu_fc_v2._TPUSharedDeviceSpecificEmbeddingColumnV2) _EMBEDDING_COLUMN_CLASSES = (core_fc._EmbeddingColumn, core_fc_lib.EmbeddingColumn, core_fc._SharedEmbeddingColumn) _SUPPORTED_FEATURE_COLUMNS = (core_fc._NumericColumn, core_fc_lib.NumericColumn) _SUPPORTED_OPTIMIZERS = ( ProximalAdagradParameters, AdagradParameters, AdamParameters, FtrlParameters, StochasticGradientDescentParameters, MomentumParameters, RMSPropParameters, ) # pylint: enable=protected-access _TABLE_NAME_PREFIX = 'tbl_' _LEN_TABLE_NAME_PREFIX = len(_TABLE_NAME_PREFIX) def _get_table_name_from_embedding_var_name(embedding_var_name): return '{}{}'.format(_TABLE_NAME_PREFIX, embedding_var_name) def _get_embedding_var_name_from_table_name(table_name): return table_name[_LEN_TABLE_NAME_PREFIX:] def _get_embedding_variable_name(scope_name, var_name): if scope_name: scope_name = scope_name + '/' return '{}{}'.format(scope_name, var_name) def _get_slot_variable_names(scope_name, var_name, optimization_parameters): """Return embedding variable names which are consistent with CPU runs.""" if scope_name: scope_name = scope_name + '/' if isinstance(optimization_parameters, tf.compat.v1.tpu.experimental.AdagradParameters): return tpu_embedding.AdagradSlotVariableNames('{}{}/Adagrad'.format( scope_name, var_name)) elif isinstance(optimization_parameters, tf.compat.v1.tpu.experimental.AdamParameters): return tpu_embedding.AdamSlotVariableNames( '{}{}/Adam/m'.format(scope_name, var_name), '{}{}/Adam/v'.format(scope_name, var_name)) elif isinstance(optimization_parameters, tf.compat.v1.tpu.experimental.FtrlParameters): return tpu_embedding.FtrlSlotVariableNames( '{}{}/Ftrl'.format(scope_name, var_name), # accumulator '{}{}/Ftrl_1'.format(scope_name, var_name)) # linear elif isinstance(optimization_parameters, MomentumParameters): return tpu_embedding.MomentumSlotVariableNames('{}{}/Momentum'.format( scope_name, var_name)) elif isinstance(optimization_parameters, RMSPropParameters): return tpu_embedding.RMSPropSlotVariableNames( ms='{}{}/RMSProp/ms'.format(scope_name, var_name), mom='{}{}/RMSProp/mom'.format(scope_name, var_name), ) elif isinstance(optimization_parameters, ProximalAdagradParameters): return tpu_embedding.ProximalAdagradSlotVariableNames( '{}{}/ProximalAdagrad'.format(scope_name, var_name)) elif isinstance( optimization_parameters, tf.compat.v1.tpu.experimental.StochasticGradientDescentParameters): return None else: raise ValueError('Support to infer full variable name ' 'for optimization_parameter {} has not been added.'.format( optimization_parameters)) def get_full_variable_names(graph, table_to_config_dict, optimization_parameters=None): """Return embedding variable names and slot variables which are consistent with CPU runs.""" collection = graph.get_collection_ref(tpu_fc._TPU_FC_TO_SCOPE) # pylint: disable=protected-access if not collection: raise RuntimeError( 'Embedding feature column did not capture any thing. Make sure the ' 'feature columns passed to TPUEstimator constructor is properly ' 'used in model_fn.') embedding_variable_name_by_table = {} slot_variable_names_by_table = {} for table_name in table_to_config_dict: embedding_var_name = _get_embedding_var_name_from_table_name(table_name) (scope_name, var_name) = collection[0][embedding_var_name] embedding_variable_name_by_table[table_name] = ( _get_embedding_variable_name(scope_name, var_name)) if optimization_parameters: slot_variable_names_by_table[table_name] = _get_slot_variable_names( scope_name, var_name, optimization_parameters) graph.clear_collection(tpu_fc._TPU_FC_TO_SCOPE) # pylint: disable=protected-access return embedding_variable_name_by_table, slot_variable_names_by_table def get_configs_from_feature_columns(feature_columns): """Create configs for TPUEmbedding etc from a list of feature columns. Args: feature_columns: a list of supported feature columns. Returns: A tuple of dicts, the first maps tables to their config, the second maps features to their config, the third maps learning rate key to callback that takes global step and outputs dynamic learning rate. """ allowed = ( tpu_fc_v2._TPUEmbeddingColumnV2, # pylint: disable=protected-access tpu_fc_v2._TPUSharedEmbeddingColumnV2) # pylint: disable=protected-access warn = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn) # pylint: disable=protected-access for column in feature_columns: if not isinstance(column, allowed + warn): raise TypeError( 'Unsupported feature column {}. Supported types are {}.'.format( type(column), allowed)) if isinstance(column, warn): tf.compat.v1.logging.warn( 'Columns of type {} are deprecated. Supported types are {}.'.format( type(column), allowed)) table_to_config = {} feature_to_config = {} for column in feature_columns: feature_name = column.get_feature_key_name() table_name = _get_table_name_from_embedding_var_name( column.get_embedding_var_name()) if feature_name in feature_to_config: raise ValueError( 'Feature column {} is used with multiple embeddings and this is ' 'not supported.'.format(feature_name)) feature_to_config[feature_name] = tpu_embedding.FeatureConfig( table_id=table_name, max_sequence_length=column.get_max_sequence_length(), weight_key=column.get_weight_key_name()) vocabulary_size, dimension = column.get_embedding_table_size() table_to_config[table_name] = tpu_embedding.TableConfig( vocabulary_size=vocabulary_size, dimension=dimension, initializer=column.get_initializer(), combiner=column.get_combiner(), learning_rate_fn=column.get_learning_rate_fn()) return table_to_config, feature_to_config @estimator_export(v1=['estimator.tpu.experimental.EmbeddingConfigSpec']) class EmbeddingConfigSpec( collections.namedtuple('EmbeddingConfigSpec', [ 'feature_columns', 'tensor_core_feature_columns', 'optimization_parameters', 'clipping_limit', 'pipeline_execution_with_tensor_core', 'experimental_gradient_multiplier_fn', 'feature_to_config_dict', 'table_to_config_dict', 'partition_strategy', 'profile_data_directory' ])): """Class to keep track of the specification for TPU embeddings. Pass this class to `tf.estimator.tpu.TPUEstimator` via the `embedding_config_spec` parameter. At minimum you need to specify `feature_columns` and `optimization_parameters`. The feature columns passed should be created with some combination of `tf.tpu.experimental.embedding_column` and `tf.tpu.experimental.shared_embedding_columns`. TPU embeddings do not support arbitrary Tensorflow optimizers and the main optimizer you use for your model will be ignored for the embedding table variables. Instead TPU embeddigns support a fixed set of predefined optimizers that you can select from and set the parameters of. These include adagrad, adam and stochastic gradient descent. Each supported optimizer has a `Parameters` class in the `tf.tpu.experimental` namespace. ``` column_a = tf.feature_column.categorical_column_with_identity(...) column_b = tf.feature_column.categorical_column_with_identity(...) column_c = tf.feature_column.categorical_column_with_identity(...) tpu_shared_columns = tf.tpu.experimental.shared_embedding_columns( [column_a, column_b], 10) tpu_non_shared_column = tf.tpu.experimental.embedding_column( column_c, 10) tpu_columns = [tpu_non_shared_column] + tpu_shared_columns ... def model_fn(features): dense_features = tf_keras.layers.DenseFeature(tpu_columns) embedded_feature = dense_features(features) ... estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn, ... embedding_config_spec=tf.estimator.tpu.experimental.EmbeddingConfigSpec( column=tpu_columns, optimization_parameters=( tf.estimator.tpu.experimental.AdagradParameters(0.1)))) ``` @compatibility(TF2) TPU Estimator manages its own TensorFlow graph and session, so it is not compatible with TF2 behaviors. We recommend that you migrate to the newer `tf.distribute.TPUStrategy`. See the [TPU guide](https://www.tensorflow.org/guide/tpu) for details. @end_compatibility """ def __new__(cls, feature_columns=None, optimization_parameters=None, clipping_limit=None, pipeline_execution_with_tensor_core=False, experimental_gradient_multiplier_fn=None, feature_to_config_dict=None, table_to_config_dict=None, partition_strategy='div', profile_data_directory=None): """Creates an `EmbeddingConfigSpec` instance. Args: feature_columns: All embedding `FeatureColumn`s used by model. optimization_parameters: An instance of `AdagradParameters`, `AdamParameters` or `StochasticGradientDescentParameters`. This optimizer will be applied to all embedding variables specified by `feature_columns`. clipping_limit: (Optional) Clipping limit (absolute value). pipeline_execution_with_tensor_core: setting this to `True` makes training faster, but trained model will be different if step N and step N+1 involve the same set of embedding IDs. Please see `tpu_embedding_configuration.proto` for details. experimental_gradient_multiplier_fn: (Optional) A Fn taking global step as input returning the current multiplier for all embedding gradients. feature_to_config_dict: A dictionary mapping feature names to instances of the class `FeatureConfig`. Either features_columns or the pair of `feature_to_config_dict` and `table_to_config_dict` must be specified. table_to_config_dict: A dictionary mapping feature names to instances of the class `TableConfig`. Either features_columns or the pair of `feature_to_config_dict` and `table_to_config_dict` must be specified. partition_strategy: A string, determining how tensors are sharded to the tpu hosts. See `tf.nn.safe_embedding_lookup_sparse` for more details. Allowed value are `"div"` and `"mod"'. If `"mod"` is used, evaluation and exporting the model to CPU will not work as expected. profile_data_directory: Directory where embedding lookup statistics are stored. These statistics summarize information about the inputs to the embedding lookup operation, in particular, the average number of embedding IDs per example and how well the embedding IDs are load balanced across the system. The lookup statistics are used during TPU initialization for embedding table partitioning. Collection of lookup statistics is done at runtime by profiling the embedding inputs, only a small fraction of input samples are profiled to minimize host CPU overhead. Once a suitable number of samples are profiled, the lookup statistics are saved to table-specific files in the profile data directory generally at the end of a TPU training loop. The filename corresponding to each table is obtained by hashing table specific parameters (e.g., table name and number of features) and global configuration parameters (e.g., sharding strategy and task count). The same profile data directory can be shared among several models to reuse embedding lookup statistics. Returns: An `EmbeddingConfigSpec` instance. Raises: ValueError: If the feature_columns are not specified. TypeError: If the feature columns are not of ths correct type (one of _SUPPORTED_FEATURE_COLUMNS, _TPU_EMBEDDING_COLUMN_CLASSES OR _EMBEDDING_COLUMN_CLASSES). ValueError: If `optimization_parameters` is not one of the required types. """ if (not feature_columns and not (feature_to_config_dict and table_to_config_dict) or (feature_columns and (feature_to_config_dict and table_to_config_dict))): raise ValueError('Exactly one of `feature_columns` and the pair ' '`feature_to_config_dict` and `table_to_config_dict` ' 'must be be specified.') if partition_strategy not in ('div', 'mod'): raise ValueError('Invalid partition_strategy {}. Must be one of "mod" or ' '"div".'.format(partition_strategy)) tensor_core_feature_columns = None embedding_core_feature_columns = None if feature_columns: tensor_core_feature_columns = [] embedding_core_feature_columns = [] # It is unknown at this moment, whether the TPUEstimator is running in CPU # or TPU mode. So allow non-TPU embedding columns also. supported_classes = tuple( list(_SUPPORTED_FEATURE_COLUMNS) + list(_TPU_EMBEDDING_COLUMN_CLASSES) + list(_EMBEDDING_COLUMN_CLASSES)) for column in feature_columns: if (isinstance(column, _TPU_DEVICE_SPECIFIC_EMBEDDING_COLUMNS) and (column._embedding_lookup_device == # pylint: disable=protected-access tpu_fc_v2.EmbeddingDevice.TPU_TENSOR_CORE)): tensor_core_feature_columns.append(column) else: embedding_core_feature_columns.append(column) if not isinstance(column, supported_classes): raise TypeError( 'All feature columns must be supported types in {}. Got {}' .format(supported_classes, type(column))) if not isinstance(optimization_parameters, _SUPPORTED_OPTIMIZERS): raise ValueError('optimization_parameters must be an instance of type ' '{}. Got {}.'.format(_SUPPORTED_OPTIMIZERS, type(optimization_parameters))) else: for feature, config in feature_to_config_dict.items(): if not isinstance(config, tpu_embedding.FeatureConfig): raise TypeError( 'Config for feature {} must be of type `FeatureConfig`. Got {}' .format(feature, type(config))) if config.table_id not in table_to_config_dict: raise ValueError('Feature {} refers to table {} which is not in the ' 'table_to_config_dict.'.format( feature, config.table_id)) for table, config in table_to_config_dict.items(): if not isinstance(config, tpu_embedding.TableConfig): raise TypeError( 'Config for table {} must be of type `TableConfig`. Got ' '{}'.format(table, type(config))) return super(EmbeddingConfigSpec, cls).__new__( cls, feature_columns=embedding_core_feature_columns, tensor_core_feature_columns=tensor_core_feature_columns, optimization_parameters=optimization_parameters, clipping_limit=clipping_limit, pipeline_execution_with_tensor_core=pipeline_execution_with_tensor_core, experimental_gradient_multiplier_fn=experimental_gradient_multiplier_fn, feature_to_config_dict=feature_to_config_dict, table_to_config_dict=table_to_config_dict, partition_strategy=partition_strategy, profile_data_directory=profile_data_directory) class EmbeddingConfig(object): """This is the internal immutable object for embedding config. `_EmbeddingConfig` is responsible to _translate_ user provided `EmbeddingConfigSpec` to internal data structures, mostly constructor arguments of `TPUEmbedding`. """ def __init__(self, embedding_config_spec, train_batch_size, eval_batch_size, num_hosts, num_cores, run_config): if not embedding_config_spec: raise ValueError('embedding_config_spec cannot be None.') self._embedding_config_spec = embedding_config_spec self._train_batch_size = train_batch_size self._eval_batch_size = eval_batch_size self._num_hosts = num_hosts self._num_cores = num_cores self._run_config = run_config if embedding_config_spec.feature_columns: self._table_to_config_dict, self._feature_to_config_dict = ( get_configs_from_feature_columns( embedding_config_spec.feature_columns)) else: self._table_to_config_dict = embedding_config_spec.table_to_config_dict self._feature_to_config_dict = embedding_config_spec.feature_to_config_dict self._partition_strategy = embedding_config_spec.partition_strategy self._mode_to_tpu_embedding_dict = {} self.dummy_table_variables = None self._grad_multiplier_fn = ( embedding_config_spec.experimental_gradient_multiplier_fn) def get_grad_multiplier(self): if self._grad_multiplier_fn: return ops.convert_to_tensor( self._grad_multiplier_fn(tf.compat.v1.train.get_global_step()), dtype=tf.dtypes.float32) def has_embedding_tables(self): return bool(self._table_to_config_dict) def _create_tpu_embedding(self, mode): """Create tpu_embedding.TPUEmbedding based on mode.""" if mode == model_fn_lib.ModeKeys.TRAIN: batch_size = self._train_batch_size else: batch_size = self._eval_batch_size if mode == model_fn_lib.ModeKeys.TRAIN: tpu_embedding_mode = tpu_embedding.TRAINING optimization_parameters = ( self._embedding_config_spec.optimization_parameters) elif (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.PREDICT): tpu_embedding_mode = tpu_embedding.INFERENCE optimization_parameters = None else: raise ValueError('Mode {} is not supported.'.format(mode)) if self._run_config.cluster: master = self._run_config.cluster.master() cluster_spec = self._run_config.cluster.cluster_spec() cluster_def = cluster_spec.as_cluster_def() if cluster_spec else None else: master = ( self._run_config.evaluation_master if mode == model_fn_lib.ModeKeys.EVAL else self._run_config.master) cluster_def = None master_job_name = None if self._run_config.tpu_config.tpu_job_name is not None: master_job_name = self._run_config.tpu_config.tpu_job_name tpu_embedding_ = tpu_embedding.TPUEmbedding( self._table_to_config_dict, self._feature_to_config_dict, batch_size, tpu_embedding_mode, master, optimization_parameters, cluster_def, pipeline_execution_with_tensor_core=self._embedding_config_spec .pipeline_execution_with_tensor_core, partition_strategy=self._partition_strategy, profile_data_directory=self._embedding_config_spec .profile_data_directory, master_job_name=master_job_name) return tpu_embedding_ def get_tpu_embedding(self, mode): if mode not in self._mode_to_tpu_embedding_dict: self._mode_to_tpu_embedding_dict[mode] = ( self._create_tpu_embedding(mode)) return self._mode_to_tpu_embedding_dict[mode] def _maybe_dense_to_sparse(tensor): """Possibly convert a dense (rank 1 or 2) tensor to a SparseTensor.""" # If already sparse, return as is. if isinstance(tensor, tf.sparse.SparseTensor): return tensor indices = tf.compat.v1.where(tensor) values = tf.compat.v1.gather_nd(tensor, indices) shape = tf.compat.v1.shape(tensor, out_type=tf.dtypes.int64) return tf.sparse.SparseTensor(indices, values, shape) def split_inputs(ctx, features, labels, num_cores_per_batch=1): """Splits the dense and sparse tensors inside the features and labels.""" enqueue_datas = collections.OrderedDict() if ctx.embedding_config: tpu_embedding_ = ctx.embedding_config.tpu_embedding for feature_key in tpu_embedding_.feature_to_config_dict: sparse_feature = _get_sparse_feature_from_feature(feature_key, features) max_sequence_length = tpu_embedding_.feature_to_config_dict[ feature_key].max_sequence_length combiner = tpu_embedding_._table_to_config_dict[ tpu_embedding_._feature_to_config_dict[feature_key].table_id].combiner if max_sequence_length > 0: length_feature_name = ( tpu_fc.get_sequence_length_feature_key_name_from_feature_key_name( feature_key)) length_feature = tf.math.minimum( fc_utils.sequence_length_from_sparse_tensor(sparse_feature), max_sequence_length) length_feature.set_shape(ctx.batch_size_for_input_fn) features[length_feature_name] = length_feature weight_key = tpu_embedding_.feature_to_config_dict[feature_key].weight_key sparse_feature_split = _split_tensor(sparse_feature, num_cores_per_batch) if combiner is None and not isinstance(sparse_feature, tf.sparse.SparseTensor): # A dense tensor with no combiner was provided so we assume that each # of the embedding_indices belongs to a different sample (setting # sample_indices to None). if weight_key is not None: raise ValueError( 'Found weights {} for weighted_categorical_column, which is not' 'compatible with sparse feature {} enqueued as dense tensor.' .format(weight_key, feature_key)) enqueue_data = [] for i in range(num_cores_per_batch): enqueue_data.append( tpu_embedding.EnqueueData(sparse_feature_split[i])) else: weights = None if isinstance(sparse_feature, tf.sparse.SparseTensor): weights = _get_weights_from_features(weight_key, features) weights_split = _split_tensor(weights, num_cores_per_batch) enqueue_data = [] for i in range(num_cores_per_batch): split_weights = weights_split[i] if weights else None enqueue_data.append( tpu_embedding.EnqueueData.from_sparse_tensor( _maybe_dense_to_sparse(sparse_feature_split[i]), weights=split_weights)) enqueue_datas[feature_key] = enqueue_data if ctx.tensor_core_embedding_columns: # pylint: disable=protected-access for column in ctx.tensor_core_embedding_columns: feature_key = column.categorical_column.key sparse_feature = _get_sparse_feature_from_feature(feature_key, features) padded_values, padded_mask = ( tpu_fc_v2.pad_sparse_embedding_lookup_indices( sparse_feature, column._tensor_core_shape[1])) padded_values.set_shape( [ctx.batch_size_for_input_fn, column._tensor_core_shape[1]]) padded_mask.set_shape( [ctx.batch_size_for_input_fn, column._tensor_core_shape[1]]) features[feature_key] = padded_values mask_key = feature_key + tpu_fc_v2._TENSOR_CORE_MASK_KEY_SUFFIX if mask_key in features: raise ValueError('Mask key {} for Tensor Core embedding is ' 'already in use.'.format(mask_key)) features[mask_key] = padded_mask # pylint: enable=protected-access # Transpose the enqueue_datas dict into a list of dicts enqueue_datas_list = [] for i in range(num_cores_per_batch): enqueue_data = {} for key, value in enqueue_datas.items(): enqueue_data[key] = value[i] enqueue_datas_list.append(enqueue_data) return features, labels, enqueue_datas_list def _split_tensor(tensor, num_splits): """Splits tensor into num_splits pieces, returns a list of pieces.""" if tensor is None: return [None] * num_splits elif num_splits <= 0: return ValueError( 'Tensors cannot be split into {} pieces.'.format(num_splits)) elif num_splits == 1: return [tensor] elif isinstance(tensor, tf.sparse.SparseTensor): return tf.compat.v2.sparse.split(tensor, num_splits, axis=0) else: return tf.split(tensor, num_splits) def _get_sparse_feature_from_feature(feature_key, features): """Pop and return sparse feature.""" sparse_feature = features.pop(feature_key) if not sparse_feature.dtype.is_integer: raise ValueError('SparseTensor with string as values are not supported. ' 'If you are using categorical_column_with_vocabulary_file ' 'or categorical_column_with_vocabulary_list, please call ' 'your_column.categorical_column._transform_feature({{' 'your_column.key: features[your_column.key]}}) in ' 'your input_fn() to convert string to int. ' 'feature_key = {}.'.format(feature_key)) return sparse_feature def _get_weights_from_features(weight_key_name, features): """Pop and return feature for weights, possibly None.""" weights = None if weight_key_name is not None: if weight_key_name in features: weights = features.pop(weight_key_name) else: raise ValueError( 'Cannot find weights {} for weighted_categorical_column.' ' Please check if the weights are present in feature dict. Also' ' note weight-sharing among weighted_categorical_column is not ' 'supported on TPU.'.format(weight_key_name)) if not isinstance(weights, tf.sparse.SparseTensor): raise ValueError( 'weighted_categorical_column with weight key name {} has dense ' 'weights. Dense weights are not supported on TPU. Please use ' 'sparse weights instead.'.format(weight_key_name)) if weights.dtype is not tf.dtypes.float32: weights = tf.cast(weights, dtype=tf.dtypes.float32) return weights def get_tpu_embedding_columns(feature_columns): """Get feature columns meant to use TPU embedding. Args: feature_columns: a list of feature columns. Returns: A list of feature columns which can be placed on TPU embedding. """ tpu_embedding_columns = [] for column in feature_columns: if isinstance(column, _TPU_EMBEDDING_COLUMN_CLASSES): tpu_embedding_columns.append(column) return tpu_embedding_columns ================================================ FILE: tensorflow_estimator/python/estimator/tpu/autotuning_iterations_per_loop_test.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= """Tests for auto-tuning iterations_per_loop using TPUStopWithAutoTunedStepHook.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import time import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator.tpu import iteration_count_estimator from tensorflow_estimator.python.estimator.tpu import tpu_estimator from tensorflow_estimator.python.estimator.tpu import util as util_lib class IterationsPerLoopParsingTest(tf.test.TestCase): def _parse_and_validate_iterations_per_loop(self, value, expected_value, expected_unit): d = util_lib.parse_iterations_per_loop(value) self.assertTrue(d) self.assertEqual(d.value, expected_value) self.assertEqual(d.unit, expected_unit) def _parse_and_validate_invalid_iterations_per_loop(self, value): with self.assertRaises(ValueError) as ve: self._parse_and_validate_iterations_per_loop(value, 0, '') self.assertTrue( ve.exception.message.startswith( 'Invalid `iterations_per_loop` value.')) def test_parsing_iterations_per_loop(self): """Tests parsing valid and invalid `iterations_per_loop` values.""" self._parse_and_validate_iterations_per_loop(1, 1, 'count') self._parse_and_validate_iterations_per_loop('1', 1, 'count') self._parse_and_validate_iterations_per_loop(2, 2, 'count') self._parse_and_validate_iterations_per_loop(10, 10, 'count') self._parse_and_validate_iterations_per_loop(123, 123, 'count') self._parse_and_validate_iterations_per_loop('123', 123, 'count') self._parse_and_validate_iterations_per_loop('1h', 3600, 'seconds') self._parse_and_validate_iterations_per_loop('1m', 60, 'seconds') self._parse_and_validate_iterations_per_loop('1s', 1, 'seconds') self._parse_and_validate_iterations_per_loop('10h', 10 * 3600, 'seconds') self._parse_and_validate_iterations_per_loop('10m', 10 * 60, 'seconds') self._parse_and_validate_iterations_per_loop('10s', 10, 'seconds') self._parse_and_validate_iterations_per_loop('100h', 100 * 3600, 'seconds') self._parse_and_validate_iterations_per_loop('1000m', 1000 * 60, 'seconds') self._parse_and_validate_iterations_per_loop('10800s', 10800, 'seconds') self._parse_and_validate_invalid_iterations_per_loop(+0) self._parse_and_validate_invalid_iterations_per_loop(0) self._parse_and_validate_invalid_iterations_per_loop(-0) self._parse_and_validate_invalid_iterations_per_loop(-0o12) self._parse_and_validate_invalid_iterations_per_loop('012') self._parse_and_validate_invalid_iterations_per_loop('001') self._parse_and_validate_invalid_iterations_per_loop('0') self._parse_and_validate_invalid_iterations_per_loop('01') self._parse_and_validate_invalid_iterations_per_loop('-1') self._parse_and_validate_invalid_iterations_per_loop('-0h') self._parse_and_validate_invalid_iterations_per_loop('0h') self._parse_and_validate_invalid_iterations_per_loop('0s') self._parse_and_validate_invalid_iterations_per_loop('0m') self._parse_and_validate_invalid_iterations_per_loop('-1h') self._parse_and_validate_invalid_iterations_per_loop('-1s') self._parse_and_validate_invalid_iterations_per_loop('-1m') class IterationPredictorTest(tf.test.TestCase): def setUp(self): self.estimator = iteration_count_estimator.IterationCountEstimator( capacity=5) def test_empty(self): """Tests on empty queue.""" self.assertEqual(self.estimator._min_iterations, self.estimator.get(1)) self.assertEqual(self.estimator._min_iterations, self.estimator.get(10)) def test_reset(self): """Tests reset states.""" self.assertEqual(0, self.estimator._sample_count) self.assertEqual(self.estimator._min_iterations, self.estimator.get(50)) self.assertEqual(0, len(self.estimator._buffer_wheel)) self.estimator._reset() self.assertEqual(0, self.estimator._sample_count) self.assertEqual(self.estimator._min_iterations, self.estimator.get(100)) self.assertEqual(0, len(self.estimator._buffer_wheel)) self.estimator.update(9, 1) self.assertEqual(1, self.estimator._sample_count) self.assertEqual(self.estimator._min_iterations, self.estimator.get(8)) def test_invalid_update(self): """Tests reject invalid update.""" self.estimator._reset() self.estimator.update(0, 0) self.assertEqual(0, len(self.estimator._buffer_wheel)) with self.assertRaises(ValueError) as ve: self.assertEqual(self.estimator._min_iterations, self.estimator.get(-1)) self.assertIn('Invalid `total_secs`', ve.message) with self.assertRaises(ValueError) as ve: self.assertEqual(self.estimator._min_iterations, self.estimator.get(0)) self.assertIn('Invalid `total_secs`', ve.message) def test_zero_mean(self): """Tests getting estimate when the elapsed time mean value is zero.""" self.estimator.update(0, 1) self.assertEqual(self.estimator._min_iterations, self.estimator.get(10)) self.estimator.update(0, 1) self.estimator.update(0, 1) self.assertEqual(self.estimator._min_iterations, self.estimator.get(10)) def test_diff_less_than_percentage(self): """Tests computing diff less than a percentage.""" self.assertTrue(self.estimator._diff_less_than_percentage(5, 10, 50)) self.assertTrue(self.estimator._diff_less_than_percentage(2.5, 10, 75)) self.assertTrue(self.estimator._diff_less_than_percentage(10, 10, 5)) self.assertTrue(self.estimator._diff_less_than_percentage(9.5, 10, 5)) self.assertTrue(self.estimator._diff_less_than_percentage(9.6, 10, 5)) self.assertFalse(self.estimator._diff_less_than_percentage(11, 10, 5)) self.assertFalse(self.estimator._diff_less_than_percentage(20, 10, 5)) self.assertTrue(self.estimator._diff_less_than_percentage(10.3, 10, 5)) self.assertTrue(self.estimator._diff_less_than_percentage(10.5, 10, 5)) self.assertFalse(self.estimator._diff_less_than_percentage(10.6, 10, 5)) self.assertFalse(self.estimator._diff_less_than_percentage(1, 10, 5)) self.assertFalse(self.estimator._diff_less_than_percentage(9, 10, 5)) with self.assertRaises(ValueError) as ve: self.assertTrue(self.estimator._diff_less_than_percentage(0, 10, 5)) self.assertIn('Invalid `actual` value', ve.message) with self.assertRaises(ValueError) as ve: self.assertTrue(self.estimator._diff_less_than_percentage(10, 0, 5)) self.assertIn('Invalid `target` value.', ve.message) def test_mean_runtime_secs(self): """Tests computing mean of step time secs.""" self.assertEqual(0.0, self.estimator._mean_runtime_secs()) self.estimator.update(1, 5) self.assertEqual(1.0, self.estimator._mean_runtime_secs()) self.estimator._reset() self.estimator.update(2, 3) self.estimator.update(2, 3) self.estimator.update(2, 3) self.assertEqual(2.0, self.estimator._mean_runtime_secs()) self.estimator._reset() self.estimator.update(1, 3) self.estimator.update(2, 3) self.assertEqual((1.0 + 2.0) / 2, self.estimator._mean_runtime_secs()) def test_mean_step_time_secs(self): """Tests computing mean of step time secs.""" self.assertEqual(0.0, self.estimator._mean_step_time_secs()) self.estimator.update(1, 5) self.assertEqual(1.0 / 5, self.estimator._mean_step_time_secs()) self.estimator._reset() self.estimator.update(2, 3) self.estimator.update(2, 3) self.estimator.update(2, 3) self.assertEqual(2.0 / 3, self.estimator._mean_step_time_secs()) self.estimator._reset() self.estimator.update(1, 3) self.estimator.update(2, 3) self.assertEqual((1.0 / 3 + 2.0 / 3) / 2, self.estimator._mean_step_time_secs()) def _test_std_step_time_secs(self): """Tests computing std deviation of the step time secs.""" self.assertEqual(0.0, self.estimator._std_step_time_secs()) self.estimator.update(1, 5) self.estimator.update(1, 5) self.assertEqual(0.0, self.estimator._std_step_time_secs()) self.estimator.update(4, 5) self.assertAlmostEqual(0.283, self.estimator._std_step_time_secs(), 3) self.estimator.update(5, 5) self.assertAlmostEqual(0.357, self.estimator._std_step_time_secs(), 3) def test_buffer_capacity(self): """Tests to make sure wheel is kept at its capacity.""" self.estimator._reset(capacity=3) self.assertEqual(0, len(self.estimator._buffer_wheel)) self.assertEqual(3, self.estimator._capacity) for _ in range(0, self.estimator._capacity): self.estimator.update(1, 1) self.assertEqual(3, len(self.estimator._buffer_wheel)) self.assertEqual(1.0, self.estimator._mean_runtime_secs()) self.assertEqual(1.0, self.estimator._mean_step_time_secs()) for _ in range(0, self.estimator._capacity): self.estimator.update(3, 2) self.assertEqual(3, len(self.estimator._buffer_wheel)) self.assertEqual(3.0, self.estimator._mean_runtime_secs()) self.assertEqual(1.5, self.estimator._mean_step_time_secs()) def test_partial_wheel(self): """Tests getting estimate when the circular buffer is not full.""" self.assertEqual(0, self.estimator._sample_count) self.estimator.update(5.0, 1) self.assertEqual(1, self.estimator._sample_count) self.assertEqual(5.0, self.estimator._mean_runtime_secs()) self.assertEqual(5.0, self.estimator._mean_step_time_secs()) self.assertEqual(2, self.estimator.get(10)) self.estimator.update(5.0, 1) self.assertEqual(2, self.estimator._sample_count) self.assertEqual(5.0, self.estimator._mean_runtime_secs()) self.assertEqual(5.0, self.estimator._mean_step_time_secs()) self.assertEqual(3, self.estimator.get(15)) self.estimator.update(5.0, 1) self.assertEqual(3, self.estimator._sample_count) self.assertEqual(5.0, self.estimator._mean_runtime_secs()) self.assertEqual(5.0, self.estimator._mean_step_time_secs()) self.assertEqual(2, self.estimator.get(10)) def test_update_convergence(self): """Tests iterative search convergence.""" for _ in range(0, self.estimator._capacity): self.estimator.update(2.0, 4) self.assertEqual(2, self.estimator._mean_runtime_secs()) self.assertEqual(0.5, self.estimator._mean_step_time_secs()) iterations = 4 target_elapsed_time = 10 actual_elapsed_time = 2 secs_per_iterations = actual_elapsed_time / iterations for _ in range(0, 5): self.estimator.update(actual_elapsed_time, iterations) iterations = self.estimator.get(target_elapsed_time) actual_elapsed_time = iterations * secs_per_iterations self.assertLessEqual(abs(actual_elapsed_time - target_elapsed_time), 1) class TPUStopAtStepHookTest(tf.test.TestCase): def test_invalid_parameters_on_construction(self): """Tests invalid parameters on construction.""" with self.assertRaises(ValueError) as ve: tpu_estimator._TPUStopAtStepHook( util_lib.IterationsPerLoopCounter(value=10, unit='count'), num_steps=None, final_step=None) self.assertEqual(ve.exception.message, 'One of num_steps or final_step must be specified.') with self.assertRaises(ValueError) as ve: tpu_estimator._TPUStopAtStepHook( util_lib.IterationsPerLoopCounter(value=10, unit='count'), num_steps=10, final_step=100) self.assertEqual(ve.exception.message, 'Only one of num_steps or final_step can be specified.') with self.assertRaises(ValueError) as ve: tpu_estimator._TPUStopAtStepHook( util_lib.IterationsPerLoopCounter(value=10, unit='secs'), num_steps=10, final_step=100) self.assertEqual( ve.exception.message, 'Only `count` or `seconds` are accepted as the `iterations_per_loop` ' 'unit.') def _validate_hook_life_cycle(self, iterations_per_loop_counter, num_steps): """Test execute hook life-cycle. This test validates: - Correctly updating the iterations both for `iterations_per_loop_counter` specified as both `count` and `seconds` - Terminates the session.run() by signaling termination `request_stop()` - The computation of the final iterations count when the remaining step count is smaller than the iterations_per_loop_counter.value. Args: iterations_per_loop_counter: This is the number of train steps running in TPU before returning to CPU host for each `Session.run`. Can be specified as `count` or `seconds`. num_steps: Number of steps to execute. """ with self.test_session() as sess: global_step_tensor = tf.compat.v1.train.get_or_create_global_step( sess.graph) global_step_tensor.load(0, session=sess) self.assertEqual(sess.run(global_step_tensor), 0) default_iterations = 1 hook = tpu_estimator._TPUStopAtStepHook( iterations_per_loop_counter, num_steps=num_steps) self.assertEqual(default_iterations, hook._next_iteration_count) self.assertEqual(num_steps, hook._num_steps) self.assertEqual(None, hook._final_step) self.assertEqual(iterations_per_loop_counter.value, hook._iterations_per_loop_counter.value) self.assertEqual(iterations_per_loop_counter.unit, hook._iterations_per_loop_counter.unit) def _step(hook, is_final, expected_iterations): hook.begin() hook.after_create_session(sess, None) class RunContextMock(object): def __init__(self, session): self.session = session self.stop = False def request_stop(self): self.stop = True class RunValues(object): def __init__(self, elapsed_time_secs): self.results = {'elapsed_time': elapsed_time_secs} run_context = RunContextMock(sess) run_values = RunValues(1) time.sleep(1.0) hook.after_run(run_context, run_values) if is_final: self.assertEqual(hook._next_iteration_count, expected_iterations) self.assertEqual(run_context.stop, is_final) else: self.assertLessEqual( abs(hook._next_iteration_count - expected_iterations), 1) # Estimates iterations when global_step < final_step. global_step = sess.run(tf.compat.v1.train.get_global_step()) self.assertEqual(global_step, 0) _step(hook, is_final=False, expected_iterations=3) # Estimates iterations when global_step < final_step. global_step_tensor.load(2, session=sess) _step(hook, is_final=False, expected_iterations=3) # Estimates iterations when global_step < final_step, and # (final_step - global_step) < estimated-iterations. global_step_tensor.load(4, session=sess) _step(hook, is_final=False, expected_iterations=1) # Estimates iterations when global_step == final_step. global_step_tensor.load(5, session=sess) _step(hook, is_final=True, expected_iterations=0) @test_util.deprecated_graph_mode_only def test_hook_life_cycle(self): """Tests update iterations.""" self._validate_hook_life_cycle( util_lib.IterationsPerLoopCounter(value=3, unit='seconds'), 5) self._validate_hook_life_cycle( util_lib.IterationsPerLoopCounter(value=3, unit='count'), 5) def _validate_initialization(self, iterations_per_loop_counter, num_steps): with self.test_session() as sess: global_step_tensor = tf.compat.v1.train.get_or_create_global_step( sess.graph) global_step_tensor.load(0, session=sess) self.assertEqual(sess.run(global_step_tensor), 0) hook = tpu_estimator._TPUStopAtStepHook( iterations_per_loop_counter, num_steps=num_steps) self.assertEqual(1, hook._next_iteration_count) self.assertEqual(num_steps, hook._num_steps) self.assertEqual(None, hook._final_step) self.assertEqual(iterations_per_loop_counter.value, hook._iterations_per_loop_counter.value) self.assertEqual(iterations_per_loop_counter.unit, hook._iterations_per_loop_counter.unit) if iterations_per_loop_counter.unit == 'count': with self.assertRaises(AttributeError) as ve: _ = hook.iteration_count_estimator self.assertIn('object has no attribute', ve.message) else: self.assertIsInstance(hook._iteration_count_estimator, iteration_count_estimator.IterationCountEstimator) @test_util.deprecated_graph_mode_only def test_initialization(self): """Tests initialization. This test validates initialization of the Hook using both specifying `iterations_per_loop` as raw `count` and `seconds`. """ self._validate_initialization( util_lib.IterationsPerLoopCounter(value=3, unit='seconds'), 3) self._validate_initialization( util_lib.IterationsPerLoopCounter(value=600, unit='seconds'), 1) self._validate_initialization( util_lib.IterationsPerLoopCounter(value=3600, unit='seconds'), 5) self._validate_initialization( util_lib.IterationsPerLoopCounter(value=3, unit='count'), 100) self._validate_initialization( util_lib.IterationsPerLoopCounter(value=100, unit='count'), 10) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/error_handling.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== """ErrorRendezvous handler for collecting errors from multiple threads.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import contextlib import sys import threading import time import six import tensorflow as tf from tensorflow_estimator.python.estimator.tools import analytics _UNINTERESTING_ERRORS = (tf.errors.CancelledError,) _IGNORED_ERRORS = ( tf.errors.AbortedError, tf.errors.UnavailableError, ) _CHECK_NUMERIC_OP_NAME = 'CheckNumerics' class ErrorRendezvous(object): """Resolve errors from multiple threads during TPU execution. TPU errors can occur on the infeed or outfeed threads as well as the main training thread. Depending on which thread "wins" and receives the session error first, we may end up showing users a confusing and non-actionable error message (session cancelled) instead of a root cause (e.g. a bad filename). The rendezvous object provides a location to capture these errors until all threads terminate. At that point we can choose the most informative error to report. """ def __init__(self, num_sources): # string -> (message, traceback) self._errors = {} self._num_sources = num_sources self._session_cancel_timer = None def record_error(self, source, exc_info, session=None): """Report an exception from the given source. If a session is passed, a timer will be registered to close it after a few seconds. This is necessary to ensure the main training loop does not hang if an infeed/oufeed error occurs. We sleep a few seconds to allow a more interesting error from another thread to propagate. Args: source: string, source of the error exc_info: Output from `sys.exc_info` (type, value, traceback) session: Session to close after delay. """ _, value, _ = exc_info # Ignore errors already handled by MonitoredSession if isinstance(value, _IGNORED_ERRORS): return self._errors[source] = exc_info # If the error is a numeric type, e.g., NaN error, we can assume that the # loop execution completed successfully. In this case, we can skip the # `session.close()` logic and wait for the infeed/outfeed threads to # complete as normal. try: if value.op.type == _CHECK_NUMERIC_OP_NAME: analytics.track_numerical_issues(exc_info) return except AttributeError as _: pass if session is not None and self._session_cancel_timer is None: def _cancel_session(): time.sleep(5) tf.compat.v1.logging.error('Closing session due to error %s' % value) try: session.close() except: # pylint: disable=bare-except tf.compat.v1.logging.error( '\n\n\nFailed to close session after error.' 'Other threads may hang.\n\n\n') self._session_cancel_timer = threading.Thread(target=_cancel_session,) self._session_cancel_timer.daemon = True self._session_cancel_timer.start() def record_done(self, source): """Mark execution source `source` as done. If an error was originally reported from `source` it is left intact. Args: source: `str`, source being recorded """ tf.compat.v1.logging.info('%s marked as finished', source) if source not in self._errors: self._errors[source] = None @contextlib.contextmanager def catch_errors(self, source, session=None): """Context manager to report any errors within a block.""" try: yield except Exception: # pylint: disable=broad-except self.record_error(source, sys.exc_info(), session) def raise_errors(self, timeout_sec=0): """Wait for up to `timeout` seconds for all error sources to finish. Preferentially raise "interesting" errors (errors not in the _UNINTERESTING_ERRORS) set. Args: timeout_sec: Seconds to wait for other error sources. """ for _ in range(timeout_sec): if len(self._errors) == self._num_sources: break time.sleep(1) kept_errors = [(k, v) for (k, v) in self._errors.items() if v is not None] # First check for any interesting errors, then fall back on the session # cancelled errors etc. for k, (typ, value, traceback) in kept_errors: if isinstance(value, _UNINTERESTING_ERRORS): continue else: tf.compat.v1.logging.warn('Reraising captured error') six.reraise(typ, value, traceback) for k, (typ, value, traceback) in kept_errors: tf.compat.v1.logging.warn('Reraising captured error') six.reraise(typ, value, traceback) ================================================ FILE: tensorflow_estimator/python/estimator/tpu/error_handling_test.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Error Handling tests.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow_estimator.python.estimator.tpu import error_handling class ErrorHandlingTest(tf.test.TestCase): def catch_and_raise(self, error): er = error_handling.ErrorRendezvous(1) with er.catch_errors(source='infeed'): raise error er.raise_errors() def testInterestingError(self): with self.assertRaises(tf.errors.InternalError): self.catch_and_raise(tf.errors.InternalError('message', None, None)) def testIgnoredError(self): """Expect no error to be raised.""" self.catch_and_raise(tf.errors.AbortedError('message', None, None)) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/iteration_count_estimator.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= """Estimator that uses past runtime samples to estimate iterations count. The estimator helps simplify determining the number of iterations count to spend on a given alloted time budget. The estimate will get adjusted over time as the estimator learns more from collecting per iteration runtime samples. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import numpy as np import tensorflow as tf RuntimeCounter = collections.namedtuple( "RuntimeCounter", ["runtime_secs", "steps", "step_time_secs"]) class IterationCountEstimator(object): """Estimates iterations count using past iterations runtime. The estimator collects iterations elapsed time (in seconds) and store it into a circular buffer. As it learns enough samples, it computes the mean value of the past observed iterations elapsed time to estimate the number of iterations count to run within the alloted time budget in seconds. To keep the buffer from growing indefinitely, we limit the size by the virtue of using circular buffer. As it uses the mean of iterations runtime to compute the iterations count estimate, setting a larger buffer size will smooth out the estimation. Once the buffer is getting filled up, older values will be dequeued in FIFO order. Setting larger buffer size will make the estimator less sensitive to runtime fluctuations but will result in slower convergence. For faster convergence buffer size can be set smaller but more prone to runtime fluctuations. As a safety feature, the estimator will return default iterations value, when: 1. The circular buffer is empty (initially). 2. The user input is invalid. """ def __init__(self, capacity=20): """Constructs a new `IterationsEstimator` instance. Args: capacity: Size of circular buffer to hold timer values. Each timer value represents the time spent on the last iterations. Raises: ValueError: If one or more parameters specified is invalid. """ self._reset(capacity=capacity) def _reset(self, capacity=20): """Resets internal variables.""" if capacity <= 0: raise ValueError("IterationCountEstimator `capacity` must be positive. " "Actual:%d." % capacity) # A circular buffer with fixed capacity to store the observation time values # and once the buffer is full, the oldest value will be evicted. self._buffer_wheel = collections.deque([]) self._capacity = capacity self._min_iterations = 1 self._last_iterations = self._min_iterations self._sample_count = 0 def _mean_runtime_secs(self): return np.mean(self._buffer_wheel, axis=0)[0] if self._buffer_wheel else 0 def _mean_step_time_secs(self): return np.mean(self._buffer_wheel, axis=0)[2] if self._buffer_wheel else 0 def _std_step_time_secs(self): return np.std(self._buffer_wheel, axis=0)[2] if self._buffer_wheel else 0 def _diff_less_than_percentage(self, actual, target, percentage): """Checks if `actual` value is within a `percentage` to `target` value. Args: actual: Actual value. target: Target value. percentage: Max percentage threshold. Returns: True if the ABS(`actual` - `target`) is less than or equal to `percentage` , otherwise False. Raise: ValueError: If `total_secs` value is not positive. """ if actual == 0: raise ValueError("Invalid `actual` value. Value must not be zero.") if target == 0: raise ValueError("Invalid `target` value. Value must not be zero.") return (float(abs(target - actual)) / target) <= percentage * 0.01 def _is_step_time_stable(self): """Checks if the step time has stabilized. We define stability a function of small stdev and after running for some time. Returns: True if stability is reached, False otherwise. """ std = self._std_step_time_secs() return std < 0.03 and self._sample_count > self._capacity def update(self, runtime_secs, count): """Updates the unit time spent per iteration. Args: runtime_secs: The total elapsed time in seconds. count: The number of iterations. """ if runtime_secs <= 0.0: tf.compat.v1.logging.debug( "Invalid `runtime_secs`. Value must be positive. Actual:%.3f.", runtime_secs) return if count <= 0.0: tf.compat.v1.logging.debug( "Invalid samples `count`. Value must be positive. Actual:%d.", count) return if len(self._buffer_wheel) >= self._capacity: self._buffer_wheel.popleft() step_time_secs = float(runtime_secs) / count self._buffer_wheel.append( RuntimeCounter( runtime_secs=runtime_secs, steps=count, step_time_secs=step_time_secs)) self._sample_count += 1 def get(self, total_secs): """Gets the iterations count estimate. If recent predicted iterations are stable, re-use the previous value. Otherwise, update the prediction value based on the delta between the current prediction and the expected number of iterations as determined by the per-step runtime. Args: total_secs: The target runtime in seconds. Returns: The number of iterations as estimate. Raise: ValueError: If `total_secs` value is not positive. """ if total_secs <= 0: raise ValueError( "Invalid `total_secs`. It must be positive number. Actual:%d" % total_secs) if not self._buffer_wheel: tf.compat.v1.logging.debug( "IterationCountEstimator has no sample(s). Returns min iterations:%d.", self._min_iterations) return self._min_iterations mean_runtime_secs = self._mean_runtime_secs() mean_step_time_secs = self._mean_step_time_secs() std_step_time_secs = self._std_step_time_secs() projected_iterations = total_secs / mean_step_time_secs last_runtime_secs = self._buffer_wheel[-1].runtime_secs delta_iterations = projected_iterations - self._last_iterations # Stabilizes the search once it is close enough to the target runtime and # the step time is stable within range bound. if ((self._diff_less_than_percentage(last_runtime_secs, total_secs, 10) or self._diff_less_than_percentage(mean_runtime_secs, total_secs, 5)) and self._is_step_time_stable()): delta_iterations = 0 self._last_iterations += delta_iterations self._last_iterations = max(self._last_iterations, self._min_iterations) tf.compat.v1.logging.info( "IterationCountEstimator -- target_runtime:%.3fs. last_runtime:%.3fs. " "mean_runtime:%.3fs. last_step_time:%.3f. std_step_time:%.3f. " "mean_step_time:%.3fs. delta_steps:%.2f. prev_steps:%.2f. " "next_steps:%.2f.", total_secs, last_runtime_secs, mean_runtime_secs, self._buffer_wheel[-1].step_time_secs, std_step_time_secs, mean_step_time_secs, delta_iterations, self._buffer_wheel[-1].steps, self._last_iterations) return int(self._last_iterations + 0.5) ================================================ FILE: tensorflow_estimator/python/estimator/tpu/spatial_partitioning_api.md ================================================ # Spatial partitioning Spatial partitioning allows us to run models with larger input images. Typically these models will be too large to fit on a single TPU core. Spatial partitioning uses multiple cores to process different parts of the input tensor. Each core communicates with the other cores when necessary to merge overlapping parts of the computation. All the complicated merging logic is implemented in the XLA compiler, therefore you only need to configure how the inputs to your model are partitioned. Note: Spatial partitioning only distributes activations across multiple cores. Each core still maintains its own copy of the model weights. For most image model, activations use more memory than the model weights. ## Enabling Spatial Partitioning with TPUEstimator Spatial partitioning doesn't require any code change in your model. You only need to specify the spatial partition parameters in your TPUConfig. ``` tpu_config=tpu_config.TPUConfig( iterations_per_loop=100, num_cores_per_replica=4, per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V2, input_partition_dims=[[1, 4, 1, 1], None]] ``` `per_host_input_for_training` must be set to PER_HOST_V2 for spatial partitioning: this means you must have a tf.data based input pipeline. `num_cores_per_replica` determines the maximum number partitions we can split. `input_partition_dims` is a list with two elements: `feature_partition_dims` and `label_partition_dims` describes how to partition the input tensors. The structure of `feature_partition_dims` and `label_partition_dims` must match the structure of features and labels from input_fn. ### Partitioning when features and labels are single tensors `features` or `labels` can be a single tensor. In this case, `feature_partition_dims` or `label_partition_dims` must be a list/tuple of integers or None. The length of the list/tuple must equal to the number of dimensions of the tensor. For example, if `features` is an image tensor with shape [N, H, W, C], the `feature_partition_dims` must be a list/tuple with 4 integers. ``` features = image_tensor # [N, H, W, C] labels = class_label # [N] input_partition_dims = [[1,4,1,1], None] ``` ### Partitioning when features or labels are a dictionary `features` or `labels` can alternatively be a dictionary from `feature_name` to a `Tensor`. In this case `feature_partition_dims` or `label_partition_dims` must be a dict with exactly the same keys, and the value is a list/tuple of integers or None. ``` features = {'image': image_tensor, 'image_mask': mask_tensor} labels = {'class_label': class_id, 'mask': mask_id} input_partition_dims = [ {'image': [1,4,1,1], 'image_mask': [1, 2, 2,1]}, {'class_label': [1], mask: None}] ``` In this example, both `features` and `labels` are dictionaries. Therefore the `input_partition_dims` contains two dicts with the same structure: the first dict in `input_partition_dims` has two keys ‘image’ and ‘image_mask’ to match the tensors in features. The value is a list of integers describes how to partition the tensor. 'class_label': [1] means we send the class_label tensor to core 0 only. ### Partitioning when features are a dict, labels are a single tensor `features` and `labels` could be any of the aforementation’s format. The rule for `feature_partition_dims` and `label_partition_dims` are applied separately. ``` features = {'image': image_tensor, 'image_mask': mask_tensor} labels = class_label # [N] input_partition_dims = [ {'image': [1,4,1,1], 'image_mask': [1, 2, 2,1]}, [1]] ``` ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_config.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== """A RunConfig subclass with TPU support.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import json import os import tensorflow as tf from tensorflow_estimator.python.estimator import run_config as run_config_lib from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.tpu import util as util_lib # pylint: disable=protected-access _TF_CONFIG_ENV = run_config_lib._TF_CONFIG_ENV _SERVICE_KEY = run_config_lib._SERVICE_KEY _TPU_WORKER_JOB_NAME = 'tpu_worker_job_name' # pylint: enable=protected-access @estimator_export(v1=['estimator.tpu.InputPipelineConfig']) class InputPipelineConfig(object): r"""Please see the definition of these values in TPUConfig. @compatibility(TF2) TPU Estimator manages its own TensorFlow graph and session, so it is not compatible with TF2 behaviors. We recommend that you migrate to the newer `tf.distribute.TPUStrategy`. See the [TPU guide](https://www.tensorflow.org/guide/tpu) for details. @end_compatibility """ PER_SHARD_V1 = 1 PER_HOST_V1 = 2 PER_HOST_V2 = 3 BROADCAST = 4 SLICED = 5 @estimator_export(v1=['estimator.tpu.TPUConfig']) class TPUConfig( collections.namedtuple('TPUConfig', [ 'iterations_per_loop', 'num_shards', 'num_cores_per_replica', 'per_host_input_for_training', 'tpu_job_name', 'initial_infeed_sleep_secs', 'input_partition_dims', 'eval_training_input_configuration', 'experimental_host_call_every_n_steps', 'experimental_allow_per_host_v2_parallel_get_next', 'experimental_feed_hook', ])): r"""TPU related configuration required by `TPUEstimator`. Args: iterations_per_loop: This is the number of train steps running in TPU system before returning to CPU host for each `Session.run`. This means global step is increased `iterations_per_loop` times in one `Session.run`. It is recommended to be set as number of global steps for next checkpoint. Note that in evaluation don't use this value, instead we run total eval `steps` on TPU for a single `Session.run`. [Experimental]: `iterations_per_loop` can be specified as a time interval. To specify N seconds in one `Session.run`, one can specify it as `Ns` and substitute the N with the N with the number of desired seconds. Alternatively, the unit of time can also be specified in minutes or hours, e.g. `3600s` or `60m` or `1h`. num_shards: (Deprecated, ignored by TPUEstimator). The number of model replicas in the system. For non-model-parallelism case, this number equals the total number of TPU cores. For model-parallelism, the total number of TPU cores equals num_cores_per_replica * num_shards. num_cores_per_replica: Defaults to `None`, which disables model parallelism. An integer which describes the number of TPU cores per model replica. This is required by model-parallelism which enables partitioning the model to multiple cores. Currently num_cores_per_replica must be 1, 2, 4, or 8. per_host_input_for_training: If `True`, for `PER_HOST_V1`, the `input_fn` is invoked once on each host, and the number of hosts must be smaller or equal to the number of replicas. For PER_HOST_V2, the `input_fn` is invoked once for each host (if the number of hosts is less than the number of replicas) or replica (if the number of replicas is less than the number of hosts. With the per-core input pipeline configuration, it is invoked once for each core. With a global batch size `train_batch_size` in `TPUEstimator` constructor, the batch size for each shard is `train_batch_size` // #hosts in the `True` or `PER_HOST_V1` mode. In `PER_HOST_V2` mode, it is `train_batch_size` // #cores. In `BROADCAST` mode, `input_fn` is only invoked once on host 0 and the tensors are broadcasted to all other replicas. The batch size equals to `train_batch_size`. With the per-core input pipeline configuration, the shard batch size is also `train_batch_size` // #cores. Note: per_host_input_for_training==PER_SHARD_V1 only supports mode.TRAIN. tpu_job_name: The name of the TPU job. Typically, this name is auto-inferred within TPUEstimator, however when using ClusterSpec propagation in more esoteric cluster configurations, you may need to specify the job name as a string. initial_infeed_sleep_secs: The number of seconds the infeed thread should wait before enqueueing the first batch. This helps avoid timeouts for models that require a long compilation time. input_partition_dims: A nested list to describe the partition dims for all the tensors from input_fn(). The structure of input_partition_dims must match the structure of `features` and `labels` from input_fn(). The total number of partitions must match `num_cores_per_replica`. For example, if input_fn() returns two tensors: images with shape [N, H, W, C] and labels [N]. input_partition_dims = [[1, 2, 2, 1], None] will split the images to 4 pieces and feed into 4 TPU cores. labels tensor are directly broadcasted to all the TPU cores since the partition dims is `None`. Current limitations: This feature is only supported with the PER_HOST_V2 input mode. eval_training_input_configuration: If `SLICED`, `input_fn` is only invoked once on host 0 and the tensors are broadcasted to all other replicas. Unlike per_host_input_for_training=BROADCAST, each replica will only get a slice of the data instead of a whole copy. If `PER_HOST_V1`, the behaviour is determined by per_host_input_for_training. experimental_host_call_every_n_steps: Within a training loop, this argument sets how often host calls are performed during training. Host calls will be evaluated every n steps within a training loop where n is the value of this argument. experimental_allow_per_host_v2_parallel_get_next: When enabled, allows concurrent execution of dataset get next calls when using PER_HOST_V2 input. May result in a performance increase for models with a small step time, but as a consequence TPUEstimator may non-deterministically distribute batches to different cores, rather than guaranteeing round robin behavior. experimental_feed_hook: This is a class which user can provide to the TPU estimator to override the default TPUInfeedOutfeedSessionHook implementation and add customized implementatioin to handle infeed outfeed logic. If given class is None, TPU estimator uses default TPUInfeedOutfeedSessionHook implementation in tpu_estimator.py. If not None, TPU estimator uses this customized tpu infeed outfeed session hook class rather to override the default one. Raises: ValueError: If `num_cores_per_replica` is not 1, 2, 4, 8, ..., 128. @compatibility(TF2) TPU Estimator manages its own TensorFlow graph and session, so it is not compatible with TF2 behaviors. We recommend that you migrate to the newer `tf.distribute.TPUStrategy`. See the [TPU guide](https://www.tensorflow.org/guide/tpu) for details. @end_compatibility """ def __new__(cls, iterations_per_loop=2, num_shards=None, num_cores_per_replica=None, per_host_input_for_training=True, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None, eval_training_input_configuration=InputPipelineConfig.PER_HOST_V1, experimental_host_call_every_n_steps=1, experimental_allow_per_host_v2_parallel_get_next=False, experimental_feed_hook=None): # Check iterations_per_loop. util_lib.parse_iterations_per_loop(iterations_per_loop) # Check num_shards. if num_shards is not None: util_lib.check_positive_integer(num_shards, 'TPUConfig num_shards') if input_partition_dims is not None: if len(input_partition_dims) != 1 and len(input_partition_dims) != 2: raise ValueError( 'input_partition_dims must be a list/tuple with one or two' ' elements.') if per_host_input_for_training is not InputPipelineConfig.PER_HOST_V2: raise ValueError( 'input_partition_dims is only supported in PER_HOST_V2 mode.') if num_cores_per_replica is None: raise ValueError( 'input_partition_dims requires setting num_cores_per_replica.') # Check num_cores_per_replica if num_cores_per_replica is not None: if num_cores_per_replica not in ([1, 2, 4, 8, 16, 32, 64, 128]): raise ValueError( 'num_cores_per_replica must be 1, 2, 4, 8, 16, 32, 64, 128; ' 'got {}'.format(str(num_cores_per_replica))) if eval_training_input_configuration not in [ InputPipelineConfig.PER_HOST_V1, InputPipelineConfig.SLICED ]: raise ValueError( 'eval_training_input_configuration must be PER_HOST_V1 or SLICED;' ' got {}'.format(str(eval_training_input_configuration))) # per_host_input_for_training may be True, False, or integer in [1..3]. # Map legacy values (True, False) to numeric values. if per_host_input_for_training is False: per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1 elif per_host_input_for_training is True: per_host_input_for_training = InputPipelineConfig.PER_HOST_V1 # Check initial_infeed_sleep_secs. if initial_infeed_sleep_secs: util_lib.check_positive_integer(initial_infeed_sleep_secs, 'TPUConfig initial_infeed_sleep_secs') tpu_job_name = tpu_job_name or _get_tpu_job_name_from_tf_config() return super(TPUConfig, cls).__new__( cls, iterations_per_loop=iterations_per_loop, num_shards=num_shards, num_cores_per_replica=num_cores_per_replica, per_host_input_for_training=per_host_input_for_training, tpu_job_name=tpu_job_name, initial_infeed_sleep_secs=initial_infeed_sleep_secs, input_partition_dims=input_partition_dims, eval_training_input_configuration=eval_training_input_configuration, experimental_host_call_every_n_steps=( experimental_host_call_every_n_steps), experimental_allow_per_host_v2_parallel_get_next=( experimental_allow_per_host_v2_parallel_get_next), experimental_feed_hook=(experimental_feed_hook)) @estimator_export(v1=['estimator.tpu.RunConfig']) class RunConfig(run_config_lib.RunConfig): """RunConfig with TPU support.""" def __init__(self, tpu_config=None, evaluation_master=None, master=None, cluster=None, **kwargs): """Constructs a RunConfig. Args: tpu_config: the TPUConfig that specifies TPU-specific configuration. evaluation_master: a string. The address of the master to use for eval. Defaults to master if not set. master: a string. The address of the master to use for training. cluster: a ClusterResolver **kwargs: keyword config parameters. Raises: ValueError: if cluster is not None and the provided session_config has a cluster_def already. @compatibility(TF2) TPU Estimator manages its own TensorFlow graph and session, so it is not compatible with TF2 behaviors. We recommend that you migrate to the newer `tf.distribute.TPUStrategy`. See the [TPU guide](https://www.tensorflow.org/guide/tpu) for details. @end_compatibility """ super(RunConfig, self).__init__(**kwargs) self._tpu_config = tpu_config or TPUConfig() self._cluster = cluster # If user sets master and/or evaluation_master explicitly, including empty # string '', take it. Otherwise, take the values set by parent class. if master is not None: if cluster is not None: raise ValueError('Both master and cluster are set.') self._master = master else: if cluster: self._master = cluster.master() if evaluation_master is not None: self._evaluation_master = evaluation_master elif (not self._evaluation_master and self.task_type != run_config_lib.TaskType.EVALUATOR): # If the task type is EVALUATOR, it means some cluster manager sets the # TF_CONFIG. In that case, we respect the configuration in TF_CONFIG. # # Otherwise, it means user executes the code without external cluster # manager. For that, we optimize the user experience by setting # evaluation_master to master, unless user overwrites it. self._evaluation_master = self._master # Set the ClusterSpec to use if cluster: self._cluster_spec = cluster.cluster_spec() # Merge the cluster_def into the ConfigProto. if self._session_config is None: # pylint: disable=access-member-before-definition self._session_config = tf.compat.v1.ConfigProto( allow_soft_placement=True, isolate_session_state=True) if self._session_config.HasField('cluster_def'): raise ValueError('You cannot provide a ClusterResolver and ' 'session_config.cluster_def.') if self._cluster_spec: self._session_config.cluster_def.CopyFrom( self._cluster_spec.as_cluster_def()) def _maybe_overwrite_session_config_for_distributed_training(self): # Overrides the parent class session_config overwrite for between-graph. TPU # runs with in-graph, which should not have device filter. Doing nothing # ("pass") basically disables it. pass @property def evaluation_master(self): return self._evaluation_master @property def master(self): return self._master @property def tpu_config(self): return self._tpu_config @property def cluster(self): return self._cluster def replace(self, **kwargs): if 'tpu_config' not in kwargs: return super(RunConfig, self).replace(**kwargs) tpu_config = kwargs.pop('tpu_config') new_instance = super(RunConfig, self).replace(**kwargs) new_instance._tpu_config = tpu_config # pylint: disable=protected-access return new_instance def _get_tpu_job_name_from_tf_config(): """Extracts the TPU job name from TF_CONFIG env variable.""" # TODO(xiejw): Extends this to support both TF_CONFIG env variable and cluster # spec propagation. tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV, '{}')) tpu_job_name = tf_config.get(_SERVICE_KEY, {}).get(_TPU_WORKER_JOB_NAME) if tpu_job_name: tf.compat.v1.logging.info('Load TPU job name from TF_CONFIG: %s', tpu_job_name) return tpu_job_name ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_config_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """TPU RunConfig tests.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import json import tensorflow as tf from tensorflow_estimator.python.estimator import run_config as run_config_lib from tensorflow_estimator.python.estimator.tpu import tpu_config as tpu_config_lib from tensorflow_estimator.python.estimator.tpu import util as util_lib def _set_tf_config_env_variable(tf_config): return tf.compat.v1.test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}) class TPURunConfigTest(tf.test.TestCase): def test_no_session_config_set_in_local_case(self): run_config = tpu_config_lib.RunConfig() self.assertIsNone(run_config.session_config) def test_no_session_config_overwrite_in_local_case(self): session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) run_config = tpu_config_lib.RunConfig(session_config=session_config) self.assertEqual(session_config, run_config.session_config) def test_no_session_config_set_with_cluster_spec(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host3:3'], run_config_lib.TaskType.WORKER: ['host3:4'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } with _set_tf_config_env_variable(tf_config): run_config = tpu_config_lib.RunConfig() self.assertIsNone(run_config.session_config) def test_no_session_config_overwrite_with_cluster_spec(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host3:3'], run_config_lib.TaskType.WORKER: ['host3:4'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } with _set_tf_config_env_variable(tf_config): session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) run_config = tpu_config_lib.RunConfig(session_config=session_config) self.assertEqual(session_config, run_config.session_config) def test_fail_with_invalid_num_shards(self): with self.assertRaisesRegexp(ValueError, 'must be positive'): tpu_config_lib.RunConfig( tpu_config=tpu_config_lib.TPUConfig(num_shards=0)) def _validate_invalid_iterations_per_loop(self, iterations_per_loop): with self.assertRaisesRegexp(ValueError, 'must be positive'): tpu_config_lib.RunConfig( tpu_config=tpu_config_lib.TPUConfig( iterations_per_loop=iterations_per_loop)) def test_fail_with_iterations_per_loop(self): self._validate_invalid_iterations_per_loop(0) self._validate_invalid_iterations_per_loop(-1) self._validate_invalid_iterations_per_loop('-1h') self._validate_invalid_iterations_per_loop('-1m') self._validate_invalid_iterations_per_loop('-1s') def test_fail_with_invalid_num_cores_per_replica(self): with self.assertRaisesRegexp( ValueError, 'num_cores_per_replica must be 1, 2, 4, 8, 16, 32, 64, 128;' ' got 7'): tpu_config_lib.TPUConfig(num_cores_per_replica=7) def _evaluate_iterations_per_loop_in_seconds(self, value, expected_value, expected_unit): config = tpu_config_lib.RunConfig( tpu_config=tpu_config_lib.TPUConfig(iterations_per_loop=value)) self.assertEqual(config.tpu_config.iterations_per_loop, value) d = util_lib.parse_iterations_per_loop( config.tpu_config.iterations_per_loop) self.assertEqual(expected_value, d.value) self.assertEqual(expected_unit, d.unit) def test_valid_iterations_per_loop(self): self._evaluate_iterations_per_loop_in_seconds(1, 1, 'count') self._evaluate_iterations_per_loop_in_seconds(100, 100, 'count') self._evaluate_iterations_per_loop_in_seconds('300s', 300, 'seconds') self._evaluate_iterations_per_loop_in_seconds('1m', 60, 'seconds') self._evaluate_iterations_per_loop_in_seconds('1h', 3600, 'seconds') class TPURunConfigMasterTest(tf.test.TestCase): def test_default_values(self): run_config = tpu_config_lib.RunConfig() self.assertEqual('', run_config.master) self.assertEqual('', run_config.evaluation_master) def test_user_provided_master_and_evaluation_master(self): run_config = tpu_config_lib.RunConfig( master='_master_123', evaluation_master='_eval_master_123') self.assertEqual('_master_123', run_config.master) self.assertEqual('_eval_master_123', run_config.evaluation_master) def test_evaluation_master_defaults_to_master(self): run_config = tpu_config_lib.RunConfig(master='_master_123') self.assertEqual('_master_123', run_config.master) self.assertEqual('_master_123', run_config.evaluation_master) def test_tf_config(self): tf_config = { 'session_master': '_master_123', 'eval_session_master': '_eval_master_123' } with _set_tf_config_env_variable(tf_config): run_config = tpu_config_lib.RunConfig() self.assertEqual('_master_123', run_config.master) self.assertEqual('_eval_master_123', run_config.evaluation_master) def test_evaluation_master_defaults_to_master_in_tf_config(self): tf_config = { 'session_master': '_master_123', } with _set_tf_config_env_variable(tf_config): run_config = tpu_config_lib.RunConfig() self.assertEqual('_master_123', run_config.master) self.assertEqual('_master_123', run_config.evaluation_master) def test_respect_evaluation_master_in_tf_config(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], }, 'task': { 'type': run_config_lib.TaskType.EVALUATOR, 'index': 0 }, } with _set_tf_config_env_variable(tf_config): run_config = tpu_config_lib.RunConfig(master='_something') self.assertEqual('', run_config.evaluation_master) def test_user_overwrites_tf_config(self): tf_config = { 'session_master': '_master_123', 'eval_session_master': '_eval_master_123' } with _set_tf_config_env_variable(tf_config): run_config = tpu_config_lib.RunConfig( master='_new_master_123', evaluation_master='_new_eval_master_123') self.assertEqual('_new_master_123', run_config.master) self.assertEqual('_new_eval_master_123', run_config.evaluation_master) def test_user_overwrites_master_in_tf_config(self): tf_config = { 'session_master': '_master_123', 'eval_session_master': '_eval_master_123' } with _set_tf_config_env_variable(tf_config): run_config = tpu_config_lib.RunConfig(master='_new_master_123') self.assertEqual('_new_master_123', run_config.master) self.assertEqual('_eval_master_123', run_config.evaluation_master) class TPUJobNameTest(tf.test.TestCase): def test_default_name(self): config = tpu_config_lib.RunConfig() self.assertIsNone(config.tpu_config.tpu_job_name) def test_with_tf_config(self): tf_config = {'service': {'tpu_worker_job_name': '_my_new_name',}} with _set_tf_config_env_variable(tf_config): config = tpu_config_lib.RunConfig() self.assertEqual('_my_new_name', config.tpu_config.tpu_job_name) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_context.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== """TPU system metadata and associated tooling.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from contextlib import contextmanager import copy import tensorflow as tf from tensorflow.python.distribute import distribute_lib from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.tpu import device_assignment as tpu_device_assignment from tensorflow.python.tpu import tpu_system_metadata as tpu_system_metadata_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.tpu import _tpu_estimator_embedding from tensorflow_estimator.python.estimator.tpu import tpu_config _DEFAULT_JOB_NAME = 'tpu_worker' _DEFAULT_COORDINATOR_JOB_NAME = 'coordinator' _LOCAL_MASTERS = ('', 'local') # TODO(pgavin): support PF 3D mesh _NUM_CORES_TO_COMPUTATION_SHAPE = { 1: [1, 1, 1, 1], 2: [1, 1, 1, 2], 4: [1, 2, 1, 2], 8: [2, 2, 1, 2], 16: [4, 2, 1, 2], 32: [4, 4, 1, 2], 64: [8, 4, 1, 2], 128: [8, 8, 1, 2], } class TPUContext(object): """A context that holds the current configuration of the TPU computation. TPUContext was designed for getting TPU context information when calling input_fn. It can be called in model_fn as well. User is not expected to construct the instance from constructor. The only legitimate way to get the instance is either in `input_fn`: ``` def input_fn(params): batch_size = params['batch_size'] context = params['context'] # ... ``` or in `model_fn` ``` def model_fn(params): batch_size = params['batch_size'] context = params['context'] # ... ``` Most of the fields of TPUContext are useful for both `input_fn` and `model_fn`. Exceptions are: 1. `input_fn` only: current_input_fn_deployment current_host 2. `model_fn` only: device_assignment """ def __init__(self, internal_ctx, input_device=None, invocation_index=None, call_from_input_fn=True, host_id=None): self._internal_ctx = internal_ctx self._input_device = input_device self._invocation_index = invocation_index self._call_from_input_fn = call_from_input_fn self._host_id = host_id def current_input_fn_deployment(self): """The configuration of the current input_fn invocation. The configuration depends on `TPUConfig.per_host_input_for_training`. See `TPUConfig` for details. Only set in params dict of input_fn Returns: A tuple of 1. Device spec string: String, is the current CPU host where the input_fn is invoked. 2. Current invocation index: Int, 0-based index of the input_fn invocation. See next item for details. 3. Total invocation count: Int, the total number of times to invoke the input_fn on all CPU hosts. Each invocation will be passed with a new `TPUContext` instance with current invocation index set properly. 4. Total number of replicas consumed by current_invocation: Int, the number of replicas fed by the data returned by current input_fn. For example, for per_core input pipeline deployment and non-model-parallelism, total invocation count is equal to the number of cores in the system and num replicas consumed by current invocation is 1. For per-host v2 input pipeline deployment, total invocation count is equal to the number of hosts in the system and num replicas consumed by current invocation is equal to number of replicas per host. Raises: RuntimeError: If this method is not be called from input_fn. """ if not self._call_from_input_fn: raise RuntimeError('This TPUContext instance must not be called from' ' model_fn.') if self._internal_ctx.is_input_sharded_per_core(): total_invocation_count = ( self._internal_ctx.num_hosts * self._internal_ctx.num_of_replicas_per_host) replicas_consumed = 1 elif self._internal_ctx.is_input_broadcast_with_iterators(): total_invocation_count = 1 replicas_consumed = self._internal_ctx.num_replicas elif self._internal_ctx.is_replica_across_hosts(): total_invocation_count = self._internal_ctx.num_replicas replicas_consumed = 1 else: total_invocation_count = self._internal_ctx.num_hosts replicas_consumed = self._internal_ctx.num_of_replicas_per_host return (self._input_device, self._invocation_index, total_invocation_count, replicas_consumed) @property def num_replicas(self): """The total number of replicas. For non-model-parallelism, num_replicas should be the total num of TPU cores in the system. Returns: The number of replicas. """ return self._internal_ctx.num_replicas @property def num_hosts(self): """The number of hosts for the TPU system.""" return self._internal_ctx.num_hosts @property def current_host(self): """The current host index for the TPU system. Returns: The host index (int). Raises: RuntimeError: If this method is not be called from input_fn. """ if not self._call_from_input_fn: raise RuntimeError('This TPUContext instance must not be called from' ' model_fn.') return self._host_id @property def num_of_replicas_per_host(self): """The number of replicas for each host.""" if self._internal_ctx.model_parallelism_enabled: raise ValueError( 'num_of_replicas_per_host is not supported for model_parallelism') return self._internal_ctx.num_of_replicas_per_host @property def device_assignment(self): """Returns device_assignment object. Raises: RuntimeError: If this method is not be called from model_fn. """ if self._call_from_input_fn: raise RuntimeError('This TPUContext instance must not be called from' ' input_fn.') return self._internal_ctx.device_assignment def device_for_replica(self, replica_id): """Returns the tuple of (CPU device and device ordinal) for replica. This should be used for full replicate for non-model-parallelism. Args: replica_id: Int, the replica index. Returns: A tuple of device spec for CPU device and int device ordinal. """ # Note that: For the non-model parallelism, the mapping could be # a random permutation. The order should not matter in most cases # as far as model is replicated to all cores in the system. return self._internal_ctx.device_for_replica(replica_id) @property def tpu_host_placement_function(self): """Returns the TPU host place function. The place function takes host_id as the input and returns the TF device for the correspoding host. """ def _placement_function(host_id): """Return the host device given host_id.""" return self._internal_ctx.tpu_host_placement_function(host_id=host_id) return _placement_function class _InternalTPUContext(object): """A context holds immutable states of TPU computation. This immutable object holds TPUEstimator config, train/eval batch size, and `TPUEstimator.use_tpu`, which is expected to be passed around. It also provides utility functions, based on the current state, to determine other information commonly required by TPU computation, such as TPU device names, TPU hosts, shard batch size, etc. if eval_on_tpu is False, then execution of eval on TPU is disabled. if eval_on_tpu is True, but use_tpu is False, a warning is issued, and TPU execution is disabled for all modes. N.B. As `mode` is not immutable state in Estimator, but essential to distinguish between TPU training and evaluation, a common usage for _InternalTPUContext with `mode` is as follows: ``` with _ctx.with_mode(mode) as ctx: if ctx.is_running_on_cpu(): ... ``` """ def __init__(self, config, train_batch_size, eval_batch_size, predict_batch_size, use_tpu, eval_on_tpu=True, embedding_config_spec=None): self._config = config self._train_batch_size = train_batch_size self._eval_batch_size = eval_batch_size self._predict_batch_size = predict_batch_size self._use_tpu = use_tpu tf.compat.v1.logging.info('_TPUContext: eval_on_tpu %s', eval_on_tpu) if not use_tpu and eval_on_tpu: tf.compat.v1.logging.warn('eval_on_tpu ignored because use_tpu is False.') self._eval_on_tpu = eval_on_tpu self._model_parallelism_enabled = ( use_tpu and config.tpu_config.num_cores_per_replica) self._mode = None num_cores_per_replica = config.tpu_config.num_cores_per_replica if self._model_parallelism_enabled: self._computation_shape = _NUM_CORES_TO_COMPUTATION_SHAPE[ num_cores_per_replica] else: self._computation_shape = None self._lazy_tpu_system_metadata_dict = {} # key by master address self._lazy_device_assignment_dict = {} # key by master address self._lazy_validation_dict = {} # key by ModeKeys self._embedding_config_spec = embedding_config_spec self._lazy_embedding_config_dict = {} # key by master address def _assert_mode(self): if self._mode is None: raise RuntimeError( '`mode` needs to be set via contextmanager `with_mode`.') return self._mode @contextmanager def with_mode(self, mode): # NOTE(xiejw): Shallow copy is enough. It will share he lazy dictionaries, # such as _lazy_tpu_system_metadata_dict between new copy and the original # one. Note that all lazy states stored in properties _lazy_foo are sort of # immutable as they should be same for the process lifetime. new_ctx = copy.copy(self) new_ctx._mode = mode # pylint: disable=protected-access yield new_ctx @property def mode(self): return self._assert_mode() def _get_master_address(self): mode = self._assert_mode() config = self._config master = ( config.master if mode != model_fn_lib.ModeKeys.EVAL else config.evaluation_master) return master def _get_tpu_system_metadata(self): """Gets the (maybe cached) TPU system metadata.""" master = self._get_master_address() tpu_system_metadata = self._lazy_tpu_system_metadata_dict.get(master) if tpu_system_metadata is not None: return tpu_system_metadata cluster_def = None if (self._config.session_config and self._config.session_config.cluster_def.job): cluster_def = self._config.session_config.cluster_def # pylint: disable=protected-access tpu_system_metadata = ( tpu_system_metadata_lib._query_tpu_system_metadata( master, cluster_def=cluster_def, query_topology=self.model_parallelism_enabled)) self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata return tpu_system_metadata def _get_device_assignment(self): """Gets the (maybe cached) TPU device assignment.""" master = self._get_master_address() device_assignment = self._lazy_device_assignment_dict.get(master) if device_assignment is not None: return device_assignment tpu_system_metadata = self._get_tpu_system_metadata() device_assignment = tpu_device_assignment.device_assignment( tpu_system_metadata.topology, computation_shape=self._computation_shape, num_replicas=self.num_replicas) tf.compat.v1.logging.info( 'num_cores_per_replica: %s', str(self._config.tpu_config.num_cores_per_replica)) tf.compat.v1.logging.info('computation_shape: %s', str(self._computation_shape)) tf.compat.v1.logging.info('num_replicas: %d', self.num_replicas) tf.compat.v1.logging.info( 'device_assignment.topology.device_coordinates: %s', str(device_assignment.topology.device_coordinates)) tf.compat.v1.logging.info('device_assignment.core_assignment: %s', str(device_assignment.core_assignment)) self._lazy_device_assignment_dict[master] = device_assignment return device_assignment @property def tensor_core_embedding_columns(self): if self._embedding_config_spec: return self._embedding_config_spec.tensor_core_feature_columns return None @property def embedding_config(self): """Returns the embedding config based on current mode.""" master = self._get_master_address() if master in self._lazy_embedding_config_dict: embedding_config = self._lazy_embedding_config_dict[master] else: embedding_config = None if self._use_tpu and self._embedding_config_spec: embedding_config = _tpu_estimator_embedding.EmbeddingConfig( self._embedding_config_spec, self._train_batch_size, self._eval_batch_size, self.num_hosts, self.num_cores, self.config) if not embedding_config.has_embedding_tables(): embedding_config = None self._lazy_embedding_config_dict[master] = embedding_config if embedding_config is not None: mode = self._assert_mode() # Dynamically attach tpu_embedding based on mode. With # this, we could keep embedding_config immutable but call site always # accesses the unified API '.tpu_embedding'. embedding_config.tpu_embedding = embedding_config.get_tpu_embedding(mode) return embedding_config @property def allow_per_host_v2_parallel_get_next(self): return (self._config.tpu_config .experimental_allow_per_host_v2_parallel_get_next) @property def feed_hook(self): return (self._config.tpu_config.experimental_feed_hook) @property def model_parallelism_enabled(self): return self._model_parallelism_enabled @property def input_partition_dims(self): return self._config.tpu_config.input_partition_dims @property def device_assignment(self): return (self._get_device_assignment() if self._model_parallelism_enabled else None) @property def num_of_cores_per_host(self): metadata = self._get_tpu_system_metadata() return metadata.num_of_cores_per_host @property def num_cores(self): metadata = self._get_tpu_system_metadata() return metadata.num_cores @property def num_of_replicas_per_host(self): """Return the number of replicas per host.""" if self.model_parallelism_enabled: # There can be fewer replicas. This might return 0! return self.num_replicas // self.num_hosts else: return self.num_of_cores_per_host @property def num_replicas(self): """Compute the total number of replicas.""" num_cores_in_system = self.num_cores if self.model_parallelism_enabled: num_cores_per_replica = self._config.tpu_config.num_cores_per_replica if num_cores_per_replica > num_cores_in_system: raise ValueError( 'The num of cores required by the model parallelism, specified by ' 'TPUConfig.num_cores_per_replica, is larger than the total num of ' 'TPU cores in the system. num_cores_per_replica: {}, num cores ' 'in the system: {}'.format(num_cores_per_replica, num_cores_in_system)) if num_cores_in_system % num_cores_per_replica != 0: raise RuntimeError( 'The num of cores in the system ({}) is not divisible by the num ' 'of cores ({}) required by the model parallelism, specified by ' 'TPUConfig.num_cores_per_replica. This should never happen!'.format( num_cores_in_system, num_cores_per_replica)) return num_cores_in_system // num_cores_per_replica else: return num_cores_in_system @property def num_hosts(self): metadata = self._get_tpu_system_metadata() return metadata.num_hosts @property def config(self): return self._config def is_input_sharded_per_core(self): """Return true if input_fn is invoked per-core (other than per-host).""" mode = self._assert_mode() return (mode == model_fn_lib.ModeKeys.TRAIN and (self._config.tpu_config.per_host_input_for_training is tpu_config.InputPipelineConfig.PER_SHARD_V1)) def is_input_per_host_with_iterators(self): """Return true if input_fn should be run in the per-host v2 config.""" return (self._config.tpu_config.per_host_input_for_training is tpu_config.InputPipelineConfig.PER_HOST_V2) def is_input_broadcast_with_iterators(self): """Return true if input_fn should be run in the full_replicae config.""" return ((self._config.tpu_config.per_host_input_for_training is tpu_config.InputPipelineConfig.BROADCAST) or (self.is_input_slice_broadcast_to_all_cores())) def is_input_slice_broadcast_to_all_cores(self): """Return true if input_fn is invoked once and broadcast to other hosts.""" mode = self._assert_mode() return (mode != model_fn_lib.ModeKeys.TRAIN and self._config.tpu_config.eval_training_input_configuration is tpu_config.InputPipelineConfig.SLICED) def is_replica_across_hosts(self): """Return true if single replica is across multiple hosts.""" # For example, when num_cores_per_replica > num_cores_per_host. num_cores_per_replica = self._config.tpu_config.num_cores_per_replica num_cores_per_host = self._get_tpu_system_metadata().num_of_cores_per_host return (num_cores_per_replica is not None and num_cores_per_replica > num_cores_per_host) def is_running_on_cpu(self, is_export_mode=False): """Determines whether the input_fn and model_fn should be invoked on CPU. This API also validates user provided configuration, such as batch size, according the lazy initialized TPU system metadata. Args: is_export_mode: Indicates whether the current mode is for exporting the model, when mode == PREDICT. Only with this bool, we could tell whether user is calling the Estimator.predict or Estimator.export_savedmodel, which are running on TPU and CPU respectively. Parent class Estimator does not distinguish these two. Returns: bool, whether current input_fn or model_fn should be running on CPU. Raises: ValueError: any configuration is invalid. """ is_running_on_cpu = self._is_running_on_cpu(is_export_mode) if not is_running_on_cpu: self._validate_tpu_configuration() return is_running_on_cpu def _is_running_on_cpu(self, is_export_mode): """Determines whether the input_fn and model_fn should be invoked on CPU.""" mode = self._assert_mode() if not self._use_tpu: return True if mode == model_fn_lib.ModeKeys.EVAL and not self._eval_on_tpu: tf.compat.v1.logging.info('_is_running_on_cpu: eval_on_tpu disabled') return True if is_export_mode: return True return False @property def global_batch_size(self): mode = self._assert_mode() if mode == model_fn_lib.ModeKeys.TRAIN: return self._train_batch_size elif mode == model_fn_lib.ModeKeys.EVAL: return self._eval_batch_size elif mode == model_fn_lib.ModeKeys.PREDICT: return self._predict_batch_size else: return None @property def batch_size_for_input_fn(self): """Returns the shard batch size for `input_fn`.""" global_batch_size = self.global_batch_size if (self.is_running_on_cpu() or self.is_input_broadcast_with_iterators()): return global_batch_size # On TPU if self.is_input_sharded_per_core() or ( self.is_input_per_host_with_iterators()) or ( self.is_replica_across_hosts()): return global_batch_size // self.num_replicas else: return global_batch_size // self.num_hosts @property def batch_size_for_model_fn(self): """Returns the shard batch size for `model_fn`.""" global_batch_size = self.global_batch_size if (self.is_running_on_cpu() or self.is_input_broadcast_with_iterators() and not self.is_input_slice_broadcast_to_all_cores()): return global_batch_size # On TPU. always sharded per shard. return global_batch_size // self.num_replicas @property def master_job(self): """Returns the job name to use to place TPU computations on. Returns: A string containing the job name, or None if no job should be specified. Raises: ValueError: If the user needs to specify a tpu_job_name, because we are unable to infer the job name automatically, or if the user-specified job names are inappropriate. """ run_config = self._config # If the user specifies the tpu_job_name, use that. if run_config.tpu_config.tpu_job_name: return run_config.tpu_config.tpu_job_name # The tpu job is determined by the run_config. Right now, this method is # required as tpu_config is not part of the RunConfig. mode = self._assert_mode() master = ( run_config.evaluation_master if mode == model_fn_lib.ModeKeys.EVAL else run_config.master) cluster_def = ( run_config.session_config.cluster_def if run_config.session_config else None) try: master_job = tpu_system_metadata_lib.master_job(master, cluster_def) except ValueError as e: raise ValueError( str(e) + ' Please specify a tpu_job_name as part of ' 'your TPUConfig.') return master_job @property def tpu_host_placement_function(self): """Returns the TPU host place function.""" master = self.master_job def _placement_function(_sentinal=None, replica_id=None, host_id=None): # pylint: disable=invalid-name """Return the host device given replica_id or host_id.""" assert _sentinal is None if replica_id is not None and host_id is not None: raise RuntimeError( 'replica_id and host_id can have only one non-None value.') if master is None: return '/replica:0/task:0/device:CPU:0' else: if replica_id is not None: if self.model_parallelism_enabled: return self.device_assignment.host_device( replica=replica_id, job=master) else: host_id = replica_id / self.num_of_cores_per_host return '/job:%s/task:%d/device:CPU:0' % (master, host_id) return _placement_function @property def tpu_device_placement_function(self): """Returns a TPU device placement Fn.""" master = self.master_job job_device = '' if master is None else ('/job:%s' % master) def _placement_function(i): if self.model_parallelism_enabled: return self.device_assignment.tpu_device(replica=i, job=master) else: num_of_cores_per_host = self.num_of_cores_per_host host_id = i / num_of_cores_per_host ordinal_id = i % num_of_cores_per_host return '%s/task:%d/device:TPU:%d' % (job_device, host_id, ordinal_id) return _placement_function def tpu_ordinal_function(self, host_id): """Returns the TPU ordinal fn.""" def _tpu_ordinal_function(shard_index_in_host): """Return the TPU ordinal associated with a shard. Required because the enqueue ops are placed on CPU. Args: shard_index_in_host: the shard index Returns: The ordinal of the TPU device the shard's infeed should be placed on. """ if self.model_parallelism_enabled: # We put both enqueue/dequeue ops at tpu.core(0) in each replica. replica = self.device_assignment.lookup_replicas(host_id, 0)[shard_index_in_host] return self.device_assignment.tpu_ordinal(replica=replica) else: return shard_index_in_host % self.num_of_cores_per_host return _tpu_ordinal_function def _validate_tpu_configuration(self): """Validates the configuration based on the TPU system metadata.""" mode = self._assert_mode() if self._lazy_validation_dict.get(mode): return # All following information is obtained from TPU system metadata. num_cores = self.num_cores num_replicas = self.num_replicas num_hosts = self.num_hosts if not num_cores: tpu_system_metadata = self._get_tpu_system_metadata() raise RuntimeError( 'Cannot find any TPU cores in the system. Please double check ' 'Tensorflow master address and TPU worker(s). Available devices ' 'are {}.'.format(tpu_system_metadata.devices)) if self._config.tpu_config.num_shards: user_provided_num_replicas = self._config.tpu_config.num_shards if user_provided_num_replicas != num_replicas: message = ( 'TPUConfig.num_shards is not set correctly. According to TPU ' 'system metadata for Tensorflow master ({}): num_replicas should ' 'be ({}), got ({}). For non-model-parallelism, num_replicas should ' 'be the total num of TPU cores in the system. For ' 'model-parallelism, the total number of TPU cores should be ' 'num_cores_per_replica * num_replicas. Please set it ' 'accordingly or leave it as `None`'.format( self._get_master_address(), num_replicas, user_provided_num_replicas)) raise ValueError(message) if self._config.tpu_config.num_cores_per_replica and ( not self.is_input_per_host_with_iterators()): num_cores_per_replica = self._config.tpu_config.num_cores_per_replica num_cores_per_host = self._get_tpu_system_metadata().num_of_cores_per_host if num_cores_per_replica > num_cores_per_host: raise ValueError( 'Except the PER_HOST_V2 mode, the num of cores required by ' 'model parallelism specified by TPUConfig.num_cores_per_replica ' 'should be less than or equal to the num_cores_per_host. ' 'num_cores_per_replica: {}, num_cores_per_host: {}'.format( num_cores_per_replica, num_cores_per_host)) if mode == model_fn_lib.ModeKeys.TRAIN: if (self._train_batch_size % num_replicas != 0 and not self.is_input_broadcast_with_iterators()): raise ValueError( 'train batch size {} must be divisible by number of replicas {}' .format(self._train_batch_size, num_replicas)) elif mode == model_fn_lib.ModeKeys.EVAL: if self._eval_batch_size is None: raise ValueError( 'eval_batch_size in TPUEstimator constructor cannot be `None` ' 'if .evaluate is running on TPU.') if (self._eval_batch_size % num_replicas != 0 and not self.is_input_broadcast_with_iterators()): raise ValueError( 'eval batch size {} must be divisible by number of replicas {}' .format(self._eval_batch_size, num_replicas)) if (num_hosts != 1 and not self.is_input_broadcast_with_iterators() and not self.is_input_per_host_with_iterators()): raise ValueError( 'TPUEstimator.evaluate is only supported under three conditions: ' '1. num_hosts=1; 2. BROADCAST mode; ' '3. PER_HOST_V2 mode. ' 'mode: {}; num_hosts: {}; num_replicas=1:{}'.format( self._config.tpu_config.per_host_input_for_training, num_hosts, num_replicas)) if num_hosts > 1 and self.is_input_per_host_with_iterators(): tf.compat.v1.logging.warn('Running TPUEstimator.evaluate for input mode' ' PER_HOST_V2 and num_hosts %d', num_hosts) else: assert mode == model_fn_lib.ModeKeys.PREDICT if self._predict_batch_size is None: raise ValueError( 'predict_batch_size in TPUEstimator constructor cannot be `None` ' 'if .predict is running on TPU.') if (self._predict_batch_size % num_replicas != 0 and not self.is_input_broadcast_with_iterators()): raise ValueError( 'predict batch size {} must be divisible by number of replicas {}' .format(self._predict_batch_size, num_replicas)) if num_hosts != 1 and not ( self.is_input_broadcast_with_iterators()) and not ( num_replicas == 1 and self.is_input_per_host_with_iterators()): raise ValueError( 'TPUEstimator.predict is only supported under three conditions: ' '1. num_hosts=1; 2. BROADCAST mode; ' '3. PER_HOST_V2 mode with num_replicas=1. ' 'mode: {}; num_hosts: {}; num_replicas=1:{}'.format( self._config.tpu_config.per_host_input_for_training, num_hosts, num_replicas)) # Record the state "validated" into lazy dictionary. self._lazy_validation_dict[mode] = True def device_for_replica(self, replica_id): """Returns the tuple of (CPU device and device ordinal) for replica. This should be used for full replicate for non-model-parallelism. Args: replica_id: Int, the replica index. Returns: A tuple of device spec for CPU device and int device ordinal. """ master = self.master_job if self.model_parallelism_enabled: return (self.device_assignment.host_device( replica=replica_id, job=master), self.device_assignment.tpu_ordinal(replica=replica_id)) job_device = '' if master is None else ('/job:%s' % master) num_of_replicas_per_host = self.num_of_replicas_per_host assert num_of_replicas_per_host > 0, ( 'Got num_of_replicas_per_host: {}'.format(num_of_replicas_per_host)) host_id = replica_id / num_of_replicas_per_host ordinal_id = replica_id % num_of_replicas_per_host host_device = '%s/task:%d/device:CPU:0' % (job_device, host_id) return (host_device, ordinal_id) class _OneCoreTPUContext(_InternalTPUContext): """Special _InternalTPUContext for one core usage.""" def __init__(self, config, train_batch_size, eval_batch_size, predict_batch_size, use_tpu): super(_OneCoreTPUContext, self).__init__(config, train_batch_size, eval_batch_size, predict_batch_size, use_tpu) def _get_tpu_system_metadata(self): """Gets the (maybe cached) TPU system metadata.""" master = self._get_master_address() tpu_system_metadata = self._lazy_tpu_system_metadata_dict.get(master) if tpu_system_metadata is not None: return tpu_system_metadata tpu_system_metadata = ( tf.tpu.experimental.TPUSystemMetadata( # pylint: disable=protected-access num_cores=1, num_hosts=1, num_of_cores_per_host=1, topology=None, devices=[])) self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata return tpu_system_metadata class _TPUEstimatorReplicaContext(tf.distribute.ReplicaContext): """Internal context for storing replica id. This is to set eager.context.Context() so that only summary ops from 0th replica is executed. """ def __init__(self, replica_id_in_sync): """Creates internal replica context for TPUEstimator. Args: replica_id_in_sync: Zero indexed integer id of replica that is running the TPU compuation. """ super(_TPUEstimatorReplicaContext, self).__init__(None, replica_id_in_sync) # Use default strategy and replica context when variables are # accessed/watched for backpropagation. # pylint: disable=protected-access self._thread_context = distribute_lib._DefaultReplicaThreadMode( ) self._strategy = self._thread_context.strategy # pylint: enable=protected-access def __enter__(self): def replica_id_is_zero(): return tf.math.equal(self.replica_id_in_sync_group, tf.constant(0)) if hasattr(summary_ops_v2, '_summary_state'): summary_state = summary_ops_v2._summary_state # pylint: disable=protected-access self._summary_recording_distribution_strategy = ( summary_state.is_recording_distribution_strategy) summary_state.is_recording_distribution_strategy = replica_id_is_zero def __exit__(self, exception_type, exception_value, traceback): if hasattr(summary_ops_v2, '_summary_state'): summary_state = summary_ops_v2._summary_state # pylint: disable=protected-access summary_state.is_recording_distribution_strategy = ( self._summary_recording_distribution_strategy) def _get_tpu_context(config, train_batch_size, eval_batch_size, predict_batch_size, use_tpu, eval_on_tpu, embedding_config_spec): """Returns an instance of `_InternalTPUContext`.""" if (config.tpu_config.num_shards == 1 and config.tpu_config.num_cores_per_replica is None): if embedding_config_spec is not None: raise ValueError('Setting TPUConfig.num_shards==1 is unsupported ' 'when embedding_config_spec is not None.') tf.compat.v1.logging.warn( 'Setting TPUConfig.num_shards==1 is an unsupported behavior. ' 'Please fix as soon as possible (leaving num_shards as None.)') return _OneCoreTPUContext(config, train_batch_size, eval_batch_size, predict_batch_size, use_tpu) return _InternalTPUContext(config, train_batch_size, eval_batch_size, predict_batch_size, use_tpu, eval_on_tpu, embedding_config_spec) ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_enqueue_sequence_test.py ================================================ # Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for sequence embedding features using TPU and TPUEstimator.""" import os from typing import Dict, List, Text, Tuple from absl import flags from absl.testing import parameterized import numpy as np import tensorflow.compat.v1 as tf from tensorflow.contrib import summary as contrib_summary from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.util import tf_keras_v1 from tensorflow_estimator.python.estimator.tpu import tpu_config as tpu_config_lib from tensorflow_estimator.python.estimator.tpu import tpu_estimator FLAGS = flags.FLAGS class TPUEnqueueSequenceTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): super().setUp() temp_dir = self.get_temp_dir() self._model_dir = os.path.join(temp_dir, 'model_dir') self._summary_dir = os.path.join(temp_dir, 'summaries') os.mkdir(self._model_dir) os.mkdir(self._summary_dir) # The key in the dataset which holds the sparse IDs. TPUEstimator will pass # the embeddings in the features dictionary arg of model_fn after performing # the embedding lookups. _KEY = 'SparseIDs' # The names of the summaries which hold the activations/sequence lengths. _SUMMARY_ACTIVATIONS = 'summary_activations' _SUMMARY_SEQUENCE_LENGTHS = 'summary_sequence_lengths' def get_activations_and_sequence_lengths( self, embedding_weights: List[List[float]], sparse_ids: tf.SparseTensorValue, batch_size: int, max_sequence_length: int, dimension: int, combiner: Text = 'mean', ) -> Tuple[tf.Tensor, tf.Tensor]: """Gets the activations and seq lengths for a batch of sparse IDs. This method uses TPUEstimator and the Feature Column API to get embedding activations for a batch of sparse of sparse IDs using a specified set of embedding weights. Args: embedding_weights: The embedding weights as a 2D list of floats. The outer list length is the vocabulary size of the embedding table. The inner list length is the dimension of the embedding weights. sparse_ids: The embedding IDs to lookup. This is a 2D SparseTensorValue of shape [batch_size, max_sequence_length]. batch_size: The size of the first dimension of sparse_ids. max_sequence_length: The size of the second dimension of sparse_ids. dimension: The embedding dimension size (number of floats for each embedding ID). combiner: The embedding column combiner (used for multivalent features). Returns: A tuple containing: activations: The activations for the specified sparse_ids. type=float32, shape=[batch_size, max_sequence_length, dimension] sequence_lengths: The sequence length of each example. type=int64. shape=[batch_size]. """ vocab_size = len(embedding_weights) categorical_column = ( tf.feature_column.sequence_categorical_column_with_identity( key=self._KEY, num_buckets=vocab_size, )) # Create embedding column initialized with weights provided by caller. embedding_column = tf.tpu.experimental.embedding_column( categorical_column, dimension=dimension, max_sequence_length=max_sequence_length, initializer=tf.constant_initializer(embedding_weights), combiner=combiner, ) # Add an SGD optimizer. This choice is arbitrary for computing activations. # It's only required to avoid an undefined gradients error. embedding_opt = tf.tpu.experimental.StochasticGradientDescentParameters(.1) embedding_config_spec = tpu_estimator.EmbeddingConfigSpec( feature_columns=[embedding_column], optimization_parameters=embedding_opt, ) def _input_fn(params: Dict[Text, int]) -> tf.data.Dataset: """Creates a batched dataset containing the sparse_ids as a feature.""" # Convert sparse IDs to batched dataset. sparse_ids_dataset = tf.data.Dataset.range(1).map( lambda x: {self._KEY: tf.SparseTensor.from_value(sparse_ids)}) # Unbatch and rebatch the dataset based on the batch_size param from # TPUEstimator. This is necessary for shape validation performed internal # to TPUEstimator. return sparse_ids_dataset.unbatch().repeat().batch(params['batch_size']) def _host_call( concat_activations: tf.Tensor, concat_sequence_lengths: tf.Tensor, ) -> List[tf.Operation]: """Stores the activations and sequence lengths into a summary. TPUEstimator will concat the activations and sequence lengths from the minibatches on each core along axis=0 and pass them to this host call. This host call writes them to a file using the TF summary APIs. Args: concat_activations: The activations for the global batch. 2D Tensor(type=float32, shape=[batch_size, max_sequence_length]). concat_sequence_lengths: The sequence lengths for the global batch. 2D Tensor(type=int64, shape=[batch_size, max_sequence_length]). Returns: A list of summary ops for TPUEstimator to run on the host. """ with contrib_summary.create_file_writer(self._summary_dir).as_default(): with contrib_summary.always_record_summaries(): contrib_summary.generic( self._SUMMARY_ACTIVATIONS, concat_activations, ) contrib_summary.generic(self._SUMMARY_SEQUENCE_LENGTHS, concat_sequence_lengths) return contrib_summary.all_summary_ops() def _model_fn( features: Dict[Text, tf.Tensor], params: Dict[Text, int], mode: model_fn_lib.ModeKeys, ) -> tpu_estimator.TPUEstimatorSpec: """A model which writes activations and sequence lengths to a file. This method creates a model to extract the activations and sequence lengths on each TPU core and pass them to a host call which writes them to a file. The model also applies an optimizer to the activations simply to avoid an undefined gradients error. Args: features: A dictionary mapping keys to tensor inputs. params: Parameters passed by TPUEstimator. mode: Mode can be (TRAIN, EVAL, PREDICT). Returns: A TPUEstimatorSpec which holds the training_op that TPUEstimator will run on TPU and the host_call that TPUEstimator will run on the host. """ del params input_layer = tf_keras_v1.experimental.SequenceFeatures([embedding_column]) activations, sequence_lengths = input_layer(features) opt = tf.tpu.CrossShardOptimizer(tf.train.GradientDescentOptimizer(0.1)) loss = tf.reduce_sum(activations) train_op = opt.minimize(loss, global_step=tf.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, host_call=(_host_call, [activations, sequence_lengths]), ) tpu_config = tpu_config_lib.TPUConfig( per_host_input_for_training=( tpu_config_lib.InputPipelineConfig.PER_HOST_V2),) run_config = tpu_config_lib.RunConfig( session_config=tf.ConfigProto(isolate_session_state=True), tpu_config=tpu_config, ) estimator = tpu_estimator.TPUEstimator( model_fn=_model_fn, model_dir=self._model_dir, use_tpu=True, train_batch_size=batch_size, eval_batch_size=batch_size, config=run_config, embedding_config_spec=embedding_config_spec, ) # Train for 1 step and store the activations as summaries. estimator.train(_input_fn, steps=1) # Read the event summaries and decode the activation tensors. output = {} for filename in tf.io.gfile.listdir(self._summary_dir): filepath = os.path.join(os.path.join(self._summary_dir, filename)) for event in tf.train.summary_iterator(filepath): for v in event.summary.value: decoded = tf.io.decode_raw(v.tensor.tensor_content, v.tensor.dtype) shape = tf.TensorShape(v.tensor.tensor_shape) output[v.tag] = tf.reshape(decoded, shape) return (output[self._SUMMARY_ACTIVATIONS], output[self._SUMMARY_SEQUENCE_LENGTHS]) def test_non_contiguous_sequence(self): """Tests embedding lookups for non-contiguous sparse IDs. A "non-contiguous sequence" is a sequence which has missing values followed by actual values. """ batch_size = 4 max_sequence_length = 3 dimension = 2 embedding_weights = np.float32([ [-5., -5.], # embedding ID = 0 [10., 11.], # embedding ID = 1 [20., 21.], # embedding ID = 2 [30., 31.], # embedding ID = 3 [40., 41.], # embedding ID = 4 [50., 51.], # embedding ID = 5 ]) # The sparse_ids are indexes into the embedding_weights for each # (example, sequence_index). sparse_ids = tf.SparseTensorValue( indices=[[0, 0], [1, 0], [1, 1], [2, 0], [2, 2]], values=[ 1, # Example 0, sequence_index 0 2, # Example 1, sequence_index 0 3, # Example 1, sequence_index 1 4, # Example 2, sequence_index 0 5, # Example 2, sequence_index 2 ], dense_shape=[batch_size, max_sequence_length], ) activations, sequence_lengths = self.get_activations_and_sequence_lengths( embedding_weights, sparse_ids, batch_size, max_sequence_length, dimension, ) self.assertAllEqual( [ [ # Example 0 [10, 11], # Sequence Index = 0 [0., 0.], # Sequence Index = 1 [0., 0.], # Sequence Index = 2 ], [ # Example 1 [20, 21], # Sequence Index = 0 [30, 31], # Sequence Index = 1 [0., 0.], # Sequence Index = 2 ], [ # Example 2 [40, 41], # Sequence Index = 0 [0., 0.], # Sequence Index = 1 (Missing value mid-sequence) [50, 51], # Sequence Index = 2 ], [ # Example 3 [0., 0.], # Sequence Index = 0 [0., 0.], # Sequence Index = 1 [0., 0.], # Sequence Index = 2 ], ], activations) self.assertAllEqual( [ 1, # Example 0 2, # Example 1 3, # Example 2 0, # Example 3 ], sequence_lengths, ) def test_non_contiguous_sequence_with_length_gt_max_sequence_length(self): """Tests non contiguous sequence which has length > max_sequence_length. A "non-contiguous sequence" is a sequence which has missing values followed by actual values. Additionally, this test has a sequence with length > max_sequence_length. In this case, we expect the sequence to be truncated from the right. """ batch_size = 4 max_sequence_length = 3 dimension = 1 embedding_weights = np.float32([ [-5.], # embedding ID = 0 [10.], # embedding ID = 1 [20.], # embedding ID = 2 [30.], # embedding ID = 3 [40.], # embedding ID = 4 [50.], # embedding ID = 5 ]) # The sparse_ids are indexes into the embedding_weights for each # (example, sequence_index). Sequence indexes larger than max_sequence # length will be truncated. sparse_ids = tf.SparseTensorValue( indices=[[0, 0], [1, 0], [1, 1], [2, 0], [2, 2], [2, 3]], values=[ 1, # Example 0, sequence_index 0 2, # Example 1, sequence_index 0 3, # Example 1, sequence_index 1 4, # Example 2, sequence_index 0 5, # Example 2, sequence_index 2 6, # Example 2, sequence_index 3 ], dense_shape=[batch_size, max_sequence_length + 1], ) activations, sequence_lengths = self.get_activations_and_sequence_lengths( embedding_weights, sparse_ids, batch_size, max_sequence_length, dimension, ) self.assertAllEqual( [ [ # Example 0 [10], # Sequence Index = 0 [0.], # Sequence Index = 1 [0.], # Sequence Index = 2 ], [ # Example 1 [20], # Sequence Index = 0 [30], # Sequence Index = 1 [0.], # Sequence Index = 2 ], [ # Example 2 (Truncated) [40], # Sequence Index = 0 [0.], # Sequence Index = 1 (Missing value mid-sequence) [50], # Sequence Index = 2 ], [ # Example 3 [0.], # Sequence Index = 0 [0.], # Sequence Index = 1 [0.], # Sequence Index = 2 ], ], activations) self.assertAllEqual( [ 1, # Example 0 2, # Example 1 3, # Example 2 0, # Example 3 ], sequence_lengths, ) @parameterized.named_parameters( ('sum_combiner', 'sum'), ('mean_combiner', 'mean'), ) def test_multivalent_sequence_features(self, combiner: Text): """Tests multivalent sequence embedding features. Args: combiner: The combiner used to reduce multivalent features. A multivalent sequence can have many IDs per sequence index. The input for multivalent sequence features is a 3D SparseTensor (instead of a 2D SparseTensor for univalent sequence features). The last dimension represents the index that will be reduced (using the combiner). """ batch_size = 4 max_sequence_length = 3 dimension = 1 embedding_weights = np.float32([ [-5.], # embedding ID = 0 [10.], # embedding ID = 1 [20.], # embedding ID = 2 [30.], # embedding ID = 3 [40.], # embedding ID = 4 [50.], # embedding ID = 5 ]) # For multivalent sequence features, IDs are a 3D sparse tensor. # The outer dimension is batch, the middle dimension is sequence, and the # last dimension is the index. sparse_ids = tf.SparseTensorValue( indices=[ [0, 0, 0], [0, 0, 1], [1, 0, 0], [1, 1, 0], [3, 0, 0], [3, 2, 0], [3, 2, 1], [3, 3, 0], ], values=[ 1, # Example 0, sequence_index 0, id_index 0. 0, # Example 0, sequence_index 0, id_index 1. 2, # Example 1, sequence_index 0, id_index 0. 3, # Example 1, sequence_index 1, id_index 0. 4, # Example 3, sequence_index 0, id_index 0. 5, # Example 3, sequence_index 2. id_index 0. 2, # Example 3, sequence_index 2. id_index 1. 5, # Example 3, sequence_index 3, id_index 0. ], dense_shape=[batch_size, max_sequence_length + 1, 2], ) activations, sequence_lengths = self.get_activations_and_sequence_lengths( embedding_weights, sparse_ids, batch_size, max_sequence_length, dimension, combiner=combiner, ) self.assertAllEqual( [ [ # Example 0 [5 if combiner == 'sum' else 2.5], # Sequence Index = 0. [0.], # Sequence Index = 1. [0.], # Sequence Index = 2. ], [ # Example 1 [20], # Sequence Index = 0. [30], # Sequence Index = 1. [0.], # Sequence Index = 2. ], [ # Example 2 [0.], # Sequence Index = 0. [0.], # Sequence Index = 1. [0.], # Sequence Index = 2. ], [ # Example 3 [40], # Sequence Index = 0. [0.], # Sequence Index = 1. [70 if combiner == 'sum' else 35], # Sequence Index = 2. ], ], activations, ) self.assertAllEqual( [ 1, # Example 0 2, # Example 1 0, # Example 2 3, # Example 3 ], sequence_lengths, ) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== """TPUEstimator class.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import copy import enum import math import os import signal import sys import threading import time import tensorflow as tf import numpy as np import six from six.moves import queue as Queue # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.core.framework import variable_pb2 from tensorflow.core.framework.summary_pb2 import Summary from tensorflow.core.protobuf.tpu import compilation_result_pb2 as tpu_compilation_result from tensorflow.python.data.util import nest as data_nest from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import ref_variable from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging from tensorflow.python.tpu import functional as tpu_functional from tensorflow.python.tpu import preempted_hook from tensorflow.python.tpu import session_support from tensorflow.python.tpu import tensor_tracer from tensorflow.python.tpu import tpu from tensorflow.python.tpu import tpu_embedding_gradient from tensorflow.python.tpu import tpu_feed from tensorflow.python.tpu import tpu_function from tensorflow.python.tpu import tpu_replication from tensorflow.python.tpu import training_loop from tensorflow.python.tpu.ops import tpu_ops from tensorflow.python.training import evaluation from tensorflow.python.util import function_utils from tensorflow.python.util import tf_inspect from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.estimator_export import estimator_export from tensorflow_estimator.python.estimator.export import export_output as export_output_lib from tensorflow_estimator.python.estimator.tpu import _tpu_estimator_embedding from tensorflow_estimator.python.estimator.tpu import error_handling from tensorflow_estimator.python.estimator.tpu import iteration_count_estimator from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_context from tensorflow_estimator.python.estimator.tpu import util as util_lib from tensorflow_estimator.python.estimator.tpu._tpu_estimator_embedding import AdagradParameters # pylint: disable=unused-import from tensorflow_estimator.python.estimator.tpu._tpu_estimator_embedding import AdamParameters # pylint: disable=unused-import from tensorflow_estimator.python.estimator.tpu._tpu_estimator_embedding import EmbeddingConfigSpec # pylint: disable=unused-import from tensorflow_estimator.python.estimator.tpu._tpu_estimator_embedding import StochasticGradientDescentParameters # pylint: disable=unused-import _INITIAL_LOSS = 1e7 _ZERO_LOSS = 0. _TPU_ESTIMATOR = 'tpu_estimator' _ITERATIONS_PER_LOOP_VAR = 'iterations_per_loop' _BATCH_SIZE_KEY = 'batch_size' _CTX_KEY = 'context' _USE_TPU_KEY = 'use_tpu' _CROSS_REPLICA_SUM_OP = 'CrossReplicaSum' _ONE_GIGABYTE = 1024 * 1024 * 1024 _TPU_ENQUEUE_OPS = '_tpu_enqueue_ops' _TPU_TRAIN_OP = '_tpu_train_op' _INFERENCE_ON_TPU_MODE = '_inference_on_tpu' _KEY_WHEN_PREDICTIONS_IS_A_TENSOR = '_key_when_predictions_is_a_tensor' _TENSOR_PACKER_SMALL_FEATURE_DIM_SIZE = 1 _TENSOR_PACKER_MINIMUM_NUM_SMALL_FEATURES_TO_GROUP = 5 _TENSOR_PACKER_CONCATENATED_SMALL_FEATURES_KEY = '_concatenated_small_features' # Ideally _USE_TPU_KEY should be reserved as well. However there are already # models that make use of this key, thus it can not be reserved now to prevent # breakage. In the long run, we would like to mitigate this by migrating models # off of using _USE_TPU_KEY. _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY, _CTX_KEY] # TODO(b/65703635): Flip the value and remove all dead code. Currently, this is # only used for per-core based deployments. For per-host based pipelines, if a # user returns a Dataset instance it will be automatically wrapped in a # tf.while_loop (This can be disabled by returning features and labels # explicitly). _WRAP_INPUT_FN_INTO_WHILE_LOOP = False # Track the adoption of TPUEstimator _tpu_estimator_gauge = tf.compat.v2.__internal__.monitoring.BoolGauge( '/tensorflow/api/tpu_estimator', 'Whether the program uses tpu estimator or not.') if ops.get_to_proto_function('{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR)) is None: ops.register_proto_function( '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR), proto_type=variable_pb2.VariableDef, to_proto=ref_variable._to_proto_fn, # pylint: disable=protected-access from_proto=ref_variable._from_proto_fn) # pylint: disable=protected-access def _is_iterable(obj): """A Python 2 and 3 compatible util to check whether `obj` is iterable.""" try: iter(obj) return True except TypeError: return False class CatchInvalidHostcallFunctions(control_flow_ops.XLAControlFlowContext): def AddOp(self, op): if op.type in [ 'AudioSummary', 'AudioSummaryV2', 'HistogramSummary', 'ImageSummary', 'MergeSummary', 'ScalarSummary', 'TensorSummary', 'TensorSummaryV2' ]: raise ValueError('Please use tf.contrib.summary instead of tf.summary ' 'inside of host_calls.') def _create_global_step(graph): graph = graph or tf.compat.v1.get_default_graph() if tf.compat.v1.train.get_global_step(graph) is not None: raise ValueError('"global_step" already exists.') # Create in proper graph and base name_scope. with graph.as_default() as g, g.name_scope(None): return tf.compat.v1.get_variable( tf.compat.v1.GraphKeys.GLOBAL_STEP, shape=[], dtype=tf.dtypes.int64, initializer=tf.compat.v1.initializers.zeros(), trainable=False, use_resource=True, collections=[ tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, tf.compat.v1.GraphKeys.GLOBAL_STEP ]) def _create_or_get_iterations_per_loop(): """Creates or gets the iterations_per_loop variable. In TPUEstimator, the user provided computation, the model_fn, is wrapped inside a tf.while_loop for peak performance. The iterations of the loop are specified by this variable, which adjusts its value on the CPU after each TPU program execution and before the next TPU execution. The purpose of using a variable, rather then a constant, is to allow TPUEstimator adapt the TPU training iterations according to the final steps specified by users. For example, if the user sets the iterations_per_loop as 4 in TPUConfig and steps as 10 in TPUEstimator.train(), the iterations_per_loop variable will have the following value before each TPU training. - 1-th TPU execution: iterations_per_loop = 4 - 2-th TPU execution: iterations_per_loop = 4 - 3-th TPU execution: iterations_per_loop = 2 As model_fn increases the global step once per train_op invocation, the global step is 10 after all TPU executions, matching the steps=10 inputs passed in by users. Returns: A TF non-trainable resource variable. Raises: RuntimeError: If multi iterations_per_loop variables were found. """ graph = tf.compat.v1.get_default_graph() collection_name = '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR) iter_vars = graph.get_collection(collection_name) if len(iter_vars) == 1: return iter_vars[0] elif len(iter_vars) > 1: raise RuntimeError('Multiple iterations_per_loop_var in collection.') with ops.colocate_with(tf.compat.v1.train.get_global_step()): with tf.compat.v1.variable_scope( _TPU_ESTIMATOR, reuse=tf.compat.v1.AUTO_REUSE): return tf.compat.v1.get_variable( _ITERATIONS_PER_LOOP_VAR, initializer=tf.compat.v1.initializers.zeros(), shape=[], dtype=tf.dtypes.int32, trainable=False, collections=[collection_name, tf.compat.v1.GraphKeys.LOCAL_VARIABLES], use_resource=True) def _sync_variables_ops(ctx): """Create varriables synchronization ops. Gets the variables back from TPU nodes. This means the variables updated by TPU will now be *synced* to host memory. In BROADCAST mode, we skip this sync since the variables are ususally too big to transmit via RPC. Args: ctx: A `_InternalTPUContext` instance with mode. Returns: A list of sync ops. """ if not ctx.is_input_broadcast_with_iterators(): return [ tf.debugging.check_numerics(v.read_value(), 'Gradient for %s is NaN' % v.name).op for v in tf.compat.v1.trainable_variables() ] else: return [tf.no_op()] def _increase_eval_step_op(iterations_per_loop): """Returns an op to increase the eval step for TPU evaluation. Args: iterations_per_loop: Tensor. The number of eval steps running in TPU system before returning to CPU host for each `Session.run`. Returns: An operation """ eval_step = evaluation._get_or_create_eval_step() # pylint: disable=protected-access # Estimator evaluate increases 1 by default. So, we increase the difference. return tf.compat.v1.assign_add( eval_step, tf.cast(iterations_per_loop - 1, dtype=eval_step.dtype), use_locking=True) def _extract_key_names(tensor_or_dict): if isinstance(tensor_or_dict, dict): return sorted(tensor_or_dict.keys()) return [] class PeriodicLogger(object): def __init__(self, seconds): self._log_every_n_seconds = seconds self._last_log_time = 0 def log(self, msg, *args, **kw): if time.time() - self._last_log_time > self._log_every_n_seconds: self._last_log_time = time.time() tf.compat.v1.logging.info(msg, *args, **kw) class _SIGNAL(object): """Signal used to control the thread of infeed/outfeed. All preserved signals must be negative numbers. Positive numbers are used to indicate the number of iterations for next training/evaluation loop. """ NEXT_BATCH = -1 STOP = -2 @estimator_export(v1=['estimator.tpu.TPUEstimatorSpec']) class TPUEstimatorSpec(model_fn_lib._TPUEstimatorSpec): # pylint: disable=protected-access """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`. See `EstimatorSpec` for `mode`, `predictions`, `loss`, `train_op`, and `export_outputs`. For evaluation, `eval_metrics `is a tuple of `metric_fn` and `tensors`, where `metric_fn` runs on CPU to generate metrics and `tensors` represents the `Tensor`s transferred from TPU system to CPU host and passed to `metric_fn`. To be precise, TPU evaluation expects a slightly different signature from the `tf.estimator.Estimator`. While `EstimatorSpec.eval_metric_ops` expects a dict, `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`. The `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. The `tensors` usually specify the model logits, which are transferred back from TPU system to CPU host. All tensors must have be batch-major, i.e., the batch size is the first dimension. Once all tensors are available at CPU host from all shards, they are concatenated (on CPU) and passed as positional arguments to the `metric_fn` if `tensors` is list or keyword arguments if `tensors` is a dict. `metric_fn` takes the `tensors` and returns a dict from metric string name to the result of calling a metric function, namely a `(metric_tensor, update_op)` tuple. See `TPUEstimator` for MNIST example how to specify the `eval_metrics`. `scaffold_fn` is a function running on CPU to generate the `Scaffold`. This function should not capture any Tensors in `model_fn`. `host_call` is a tuple of a `function` and a list or dictionary of `tensors` to pass to that function and returns a list of Tensors. `host_call` currently works for train() and evaluate(). The Tensors returned by the function is executed on the CPU on every step, so there is communication overhead when sending tensors from TPU to CPU. To reduce the overhead, try reducing the size of the tensors. The `tensors` are concatenated along their major (batch) dimension, and so must be >= rank 1. The `host_call` is useful for writing summaries with `tf.summary.create_file_writer`. @compatibility(TF2) TPU Estimator manages its own TensorFlow graph and session, so it is not compatible with TF2 behaviors. We recommend that you migrate to the newer `tf.distribute.TPUStrategy`. See the [TPU guide](https://www.tensorflow.org/guide/tpu) for details. @end_compatibility """ def __new__(cls, mode, predictions=None, loss=None, train_op=None, eval_metrics=None, export_outputs=None, scaffold_fn=None, host_call=None, training_hooks=None, evaluation_hooks=None, prediction_hooks=None): """Creates a validated `TPUEstimatorSpec` instance.""" cls._host_calls = {} if eval_metrics is not None: cls._host_calls['eval_metrics'] = eval_metrics if host_call is not None: cls._host_calls['host_call'] = host_call _OutfeedHostCall.validate(cls._host_calls) training_hooks = tuple(training_hooks or []) evaluation_hooks = tuple(evaluation_hooks or []) prediction_hooks = tuple(prediction_hooks or []) for hook in training_hooks + evaluation_hooks + prediction_hooks: if not isinstance(hook, tf.compat.v1.train.SessionRunHook): raise TypeError( 'All hooks must be SessionRunHook instances, given: {}'.format( hook)) return super(TPUEstimatorSpec, cls).__new__( cls, mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metrics=eval_metrics, export_outputs=export_outputs, scaffold_fn=scaffold_fn, host_call=host_call, training_hooks=training_hooks, evaluation_hooks=evaluation_hooks, prediction_hooks=prediction_hooks) def as_estimator_spec(self): """Creates an equivalent `EstimatorSpec` used by CPU train/eval.""" host_call_ret = _OutfeedHostCall.create_cpu_hostcall(self._host_calls) eval_metric_ops = None if self.eval_metrics is not None: eval_metric_ops = host_call_ret['eval_metrics'] hooks = None if self.host_call is not None: hooks = [_OutfeedHostCallHook(host_call_ret['host_call'])] loss = self.loss if tensor_tracer.TensorTracer.is_enabled() \ and self.train_op is not None: tt = tensor_tracer.TensorTracer() loss = tt.trace_cpu(tf.compat.v1.get_default_graph(), loss, self.train_op) hooks = tuple(hooks or []) scaffold = self.scaffold_fn() if self.scaffold_fn else None return model_fn_lib.EstimatorSpec( mode=self.mode, predictions=self.predictions, loss=loss, train_op=self.train_op, eval_metric_ops=eval_metric_ops, export_outputs=self.export_outputs, scaffold=scaffold, training_hooks=self.training_hooks + hooks, evaluation_hooks=self.evaluation_hooks + hooks, prediction_hooks=self.prediction_hooks + hooks) class _OpQueueContext(object): """Manages work queue and thread for a infeed/outfeed thread.""" def __init__(self, name, target, args): self._name = name self._queue = Queue.Queue() args = (self,) + args self._thread = threading.Thread(name=name, target=target, args=args) self._thread.daemon = True self._thread.start() def stop(self): self._queue.put(_SIGNAL.STOP) def send_next_batch_signal(self, iterations): self._queue.put(iterations) def read_iteration_counts(self): while True: iterations = self._queue.get(block=True) tf.compat.v1.logging.debug('%s read iterations %s', self._name, iterations) if iterations == _SIGNAL.STOP: tf.compat.v1.logging.info('%s received shutdown signal, stopping.', self._name) return yield iterations def join(self): tf.compat.v1.logging.info('Shutting down %s thread.', self._name) self.stop() self._thread.join() class _OpSignalOnceQueueContext(_OpQueueContext): """Manages work queue and thread for a infeed/outfeed thread. This subclass only signals once. """ def __init__(self, name, target, args): super(_OpSignalOnceQueueContext, self).__init__(name, target, args) self._has_signaled = False def send_next_batch_signal(self, iterations): if not self._has_signaled: self._queue.put(iterations) self._has_signaled = True class TPUInfeedOutfeedSessionHook(tf.compat.v1.train.SessionRunHook): """A Session hook setting up the TPU initialization, infeed, and outfeed. This hook does two major things: 1. initialize and shutdown TPU system. 2. launch and join the threads for infeed enqueue and (optional) outfeed dequeue. """ def __init__(self, ctx, enqueue_ops, dequeue_ops, tpu_compile_op, run_infeed_loop_on_coordinator=True, rendezvous=None, master=None, session_config=None, tpu_init_ops=None, outfeed_every_n_steps=1): self._master_job = ctx.master_job self._enqueue_ops = enqueue_ops self._dequeue_ops = dequeue_ops self._rendezvous = rendezvous self._master = master self._session_config = session_config self._init_ops = list(tpu_init_ops or []) if ctx.embedding_config is None: self._embedding_layer_config = None else: self._embedding_layer_config = ( ctx.embedding_config.tpu_embedding.config_proto) self._run_infeed_loop_on_coordinator = run_infeed_loop_on_coordinator self._initial_infeed_sleep_secs = ( ctx.config.tpu_config.initial_infeed_sleep_secs) self._tpu_compile_op = tpu_compile_op # When using model parallelism, the TPU is pre-initialized at startup to # fetch mesh information. We skip re-initializing it here for # MeshTensorFlow since it places variables on TPU directly. Reinitialize tpu # is causing the variable corruption since the previous allocated memory # might be overwritten for other purpose. if (ctx.model_parallelism_enabled and (ctx.config.tpu_config.per_host_input_for_training is tpu_config.InputPipelineConfig.BROADCAST)): self._should_initialize_tpu = False else: self._should_initialize_tpu = True self._outfeed_every_n_steps = outfeed_every_n_steps def begin(self): tf.compat.v1.logging.info('TPU job name %s', self._master_job) self._iterations_per_loop_var = _create_or_get_iterations_per_loop() if self._should_initialize_tpu: self._finalize_ops = [ tf.compat.v1.tpu.shutdown_system(job=self._master_job) ] else: self._finalize_ops = [] summary_writer_init_ops = summary_ops_v2.summary_writer_initializer_op() self._init_ops.extend(summary_writer_init_ops) # Get all the writer resources from the initializer, so we know what to # flush. for op in summary_writer_init_ops: self._finalize_ops.append( summary_ops_v2.legacy_raw_flush(writer=op.inputs[0])) def _run_infeed(self, queue_ctx, session): tf.compat.v1.logging.info('Starting infeed thread controller.') if self._initial_infeed_sleep_secs: tf.compat.v1.logging.info('Infeed thread sleeping for %d seconds.', self._initial_infeed_sleep_secs) time.sleep(self._initial_infeed_sleep_secs) tf.compat.v1.logging.info('Infeed thread starting after sleep') with self._rendezvous.catch_errors(source='infeed', session=session): if self._run_infeed_loop_on_coordinator: for count, steps in enumerate(queue_ctx.read_iteration_counts()): for i in xrange(steps): tf.compat.v1.logging.debug('Infeed enqueue for iteration (%d, %d)', count, i) session.run(self._enqueue_ops) else: for _ in queue_ctx.read_iteration_counts(): session.run(self._enqueue_ops) tf.compat.v1.logging.info('Infeed thread finished, shutting down.') def _run_outfeed(self, queue_ctx, session): tf.compat.v1.logging.info('Starting outfeed thread controller.') status_logger = PeriodicLogger(seconds=60) with self._rendezvous.catch_errors(source='outfeed', session=session): for count, steps in enumerate(queue_ctx.read_iteration_counts()): step_counter = 0 for i in xrange(steps): tf.compat.v1.logging.debug('Outfeed dequeue for iteration (%d, %d)', count, i) if step_counter % self._outfeed_every_n_steps == 0: session.run(self._dequeue_ops) step_counter += 1 status_logger.log('Outfeed finished for iteration (%d, %d)', count, i) tf.compat.v1.logging.info('Outfeed thread finished, shutting down.') def _create_infeed_controller(self, name, target, args): return _OpQueueContext(name=name, target=target, args=args) def _assertCompilationSucceeded(self, result, coord): proto = tpu_compilation_result.CompilationResultProto() proto.ParseFromString(result) if proto.status_error_message: tf.compat.v1.logging.error('Compilation failed: {}'.format( proto.status_error_message)) coord.request_stop() else: tf.compat.v1.logging.info('Compilation succeeded') def after_create_session(self, session, coord): if self._should_initialize_tpu: tf.compat.v1.logging.info('Init TPU system') start = time.time() with tf.Graph().as_default(): with tf.compat.v1.Session( self._master, config=self._session_config) as sess: sess.run( tf.compat.v1.tpu.initialize_system( job=self._master_job, embedding_config=self._embedding_layer_config)) tf.compat.v1.logging.info('Initialized TPU in %d seconds', time.time() - start) session.run( self._init_ops, options=tf.compat.v1.RunOptions(timeout_in_ms=30 * 60 * 1000)) if os.environ.get('TPU_SPLIT_COMPILE_AND_EXECUTE', '') == '1': tf.compat.v1.logging.info( 'Compiling user program: this may take a while...') self._assertCompilationSucceeded(session.run(self._tpu_compile_op), coord) self._infeed_controller = self._create_infeed_controller( name='InfeedController', target=self._run_infeed, args=(session,)) self._outfeed_controller = _OpQueueContext( name='OutfeedController', target=self._run_outfeed, args=(session,)) # Enable the worker watchdog to terminate workers on coordinator exit. watchdog_timeout = int(os.environ.get('TF_TPU_WATCHDOG_TIMEOUT', '0')) if watchdog_timeout > 0: session_support.start_worker_watchdog( session, shutdown_timeout=watchdog_timeout) def before_run(self, run_context): iterations = run_context.session.run(self._iterations_per_loop_var) tf.compat.v1.logging.info('Enqueue next (%d) batch(es) of data to infeed.', iterations) self._infeed_controller.send_next_batch_signal(iterations) tf.compat.v1.logging.info( 'Dequeue next (%d) batch(es) of data from outfeed.', iterations) self._outfeed_controller.send_next_batch_signal(iterations) def end(self, session): tf.compat.v1.logging.info('Stop infeed thread controller') self._infeed_controller.join() self._rendezvous.record_done('infeed') tf.compat.v1.logging.info('Stop output thread controller') self._outfeed_controller.join() self._rendezvous.record_done('outfeed') tf.compat.v1.logging.info('Shutdown TPU system.') session.run(self._finalize_ops) class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook): def __init__(self, ctx, enqueue_ops, dequeue_ops, tpu_compile_op, rendezvous=None, master=None, session_config=None): super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__( ctx, enqueue_ops, dequeue_ops, tpu_compile_op=tpu_compile_op, run_infeed_loop_on_coordinator=False, rendezvous=rendezvous, master=master, session_config=session_config) def _create_infeed_controller(self, name, target, args): return _OpSignalOnceQueueContext(name=name, target=target, args=args) class _TPUStopAtStepHook(tf.compat.v1.train.SessionRunHook): """Hook that requests stop at a specified step. This hook is similar to the `session_run_hook._StopAfterNEvalsHook` with following differences for TPU training: 1. This hook sets the variable for `iterations_per_loop`, which is used by `TPUInfeedOutfeedSessionHook` to control the iterations for infeed/outfeed. If the `iterations_per_loop` value is specified as time in seconds, the number of iterations per `Session.run` will be estimated automatically based on per iteration runtime. As the hook execution order is not guaranteed, the variable update is handled in `after_create_session` and `after_run` as `TPUInfeedOutfeedSessionHook` reads the variable value in `before_run`. 2. For each training loop (session.run), the global step could be increased multiple times on TPU. The global step tensor value will be explicitly read again in `after_run` to ensure the latest value is retrieved to avoid race condition. """ def __init__(self, iterations_per_loop_counter, num_steps=None, final_step=None): """Initializes a `TPUStopAtStepHook`. Args: iterations_per_loop_counter: A namedtuple of [`value',`unit`] that represents the number of 'iterations count' or 'time in seconds' to run optimizer per loop, based on the `unit` specified, `count` or `seconds` respectively. num_steps: Number of steps to execute. final_step: Step after which to stop. Raises: ValueError: If one of the arguments is invalid. """ if num_steps is None and final_step is None: raise ValueError('One of `num_steps` or `final_step` must be specified.') if num_steps is not None and final_step is not None: raise ValueError( 'Only one of `num_steps` or `final_step` can be specified.') self._iterations_per_loop_counter = iterations_per_loop_counter if self._iterations_per_loop_counter.unit not in ['seconds', 'count']: raise ValueError('Only `count` or `seconds` are accepted as the ' '`iterations_per_loop_counter.unit') self._num_steps = num_steps self._final_step = final_step self._next_iteration_count = 1 self._iteration_count_estimator = None if self._iterations_per_loop_counter.unit == 'seconds': self._iteration_count_estimator = ( iteration_count_estimator.IterationCountEstimator()) self._start_time = time.time() def _next_iterations(self, global_step, final_step): """Computes the next iterations count. The next iterations count is computed by choosing the smaller of the remaining step count (`final_step` - `global_step`) and the estimated iterations count returned by the estimator. Args: global_step: The current step. final_step: Step after which to stop. Returns: The number of iterations count to run per loop. """ remaining_steps = final_step - global_step if self._iteration_count_estimator is not None: estimated_iterations = self._iteration_count_estimator.get( self._iterations_per_loop_counter.value) else: estimated_iterations = self._iterations_per_loop_counter.value self._next_iteration_count = min(remaining_steps, estimated_iterations) return self._next_iteration_count def begin(self): """Initializes variables. Initializes the global step and iterations per loop variables. Raises: RuntimeError: An error occurred if global step variable does not exist. """ self._global_step_tensor = tf.compat.v1.train.get_global_step() if self._global_step_tensor is None: raise RuntimeError('Global step should be created.') self._iterations_per_loop_var = _create_or_get_iterations_per_loop() def after_create_session(self, session, coord): """Computes and updates the first time iterations count. The iterations are computed by choosing the smaller of the (`final step` - `global step`), and the initial estimated iterations returned by the estimator (by default is 1). Args: session: A TensorFlow Session that has been created. coord: A Coordinator object which keeps track of all threads. """ global_step = session.run(self._global_step_tensor) if self._final_step is None: self._final_step = global_step + self._num_steps iterations = self._next_iterations(global_step, self._final_step) self._iterations_per_loop_var.load(iterations, session=session) def before_run(self, run_context): """Reset the timer.""" if self._iteration_count_estimator is not None: self._start_time = time.time() def after_run(self, run_context, run_values): """Computes the next iterations per loop value or terminates. Computes the elapsed time to run the last optimizer loop and if the `IterationCountEstimator` is used, records the elapsed time and iterations count. If the final step count has been reached, terminates. Otherwise, computes and updates the number of iterations to run the optimizer per loop. Args: run_context: A `SessionRunContext` object. run_values: A SessionRunValues object. """ if self._iteration_count_estimator is not None: elapsed_time = time.time() - self._start_time tf.compat.v1.logging.info('ElapsedTime: %.3f', elapsed_time) self._iteration_count_estimator.update(elapsed_time, self._next_iteration_count) # Global step cannot be retrieved via SessionRunArgs and before_run due to # race condition. global_step = run_context.session.run(self._global_step_tensor) if global_step >= self._final_step: run_context.request_stop() else: iterations = self._next_iterations(global_step, self._final_step) self._iterations_per_loop_var.load( iterations, session=run_context.session) class _SetEvalIterationsHook(tf.compat.v1.train.SessionRunHook): """Hook that requests stop at a specified step.""" def __init__(self, num_steps): """Initializes a `_SetEvalIterationsHook`. Args: num_steps: Number of steps to execute. """ self._num_steps = num_steps def begin(self): self._iterations_per_loop_var = _create_or_get_iterations_per_loop() def after_create_session(self, session, coord): self._iterations_per_loop_var.load(self._num_steps, session=session) class _StoppingPredictHook(tf.compat.v1.train.SessionRunHook): """Hook that requests stop according to the stopping signal in prediction.""" def __init__(self, scalar_stopping_signal): self._scalar_stopping_signal = scalar_stopping_signal def begin(self): self._iterations_per_loop_var = _create_or_get_iterations_per_loop() def after_create_session(self, session, coord): # This is not necessary as we do not run infeed enqueue and outfeed dequeue # in side threads for prediction model. But it makes the # TPUInfeedOutfeedSessionHook prints nice message. self._iterations_per_loop_var.load(1, session=session) def before_run(self, run_context): return tf.compat.v1.train.SessionRunArgs(self._scalar_stopping_signal) def after_run(self, run_context, run_values): _ = run_context scalar_stopping_signal = run_values.results if _StopSignals.should_stop(scalar_stopping_signal): # NOTE(xiejw): In prediction, stopping signals are inserted for each # batch. And we append one more batch to signal the system it should stop. # The data flow might look like # # batch 0: images, labels, stop = 0 (user provided) # batch 1: images, labels, stop = 0 (user provided) # ... # batch 99: images, labels, stop = 0 (user provided) # batch 100: images, labels, stop = 1 (TPUEstimator appended) # # where the final batch (id = 100) is appended by TPUEstimator, so we # should drop it before returning the predictions to user. # To achieve that, we throw the OutOfRangeError in after_run. Once # Monitored Session sees this error in SessionRunHook.after_run, the # "current" prediction, i.e., batch with id=100, will be discarded # immediately raise tf.errors.OutOfRangeError(None, None, 'Stopped by stopping signal.') def generate_per_core_enqueue_ops_fn_for_host(ctx, input_fn, inputs_structure_recorder, host_device, host_id): """Generates infeed enqueue ops for per-core input_fn on a single host.""" captured_infeed_queue = _CapturedObject() tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id) def enqueue_ops_fn(): """A fn returns enqueue_ops.""" num_cores_per_host = ctx.num_of_cores_per_host per_host_sharded_inputs = [] for core_ordinal in range(num_cores_per_host): with ops.name_scope('ordinal_%d' % (core_ordinal)): user_context = tpu_context.TPUContext( internal_ctx=ctx, input_device=host_device, invocation_index=host_id * ctx.num_of_cores_per_host + core_ordinal, host_id=host_id) inputs = _Inputs.from_input_fn(input_fn(user_context)) if inputs.is_dataset: raise TypeError( '`input_fn` returning `Dataset` is not yet supported in ' 'per-Core input pipeline deployment yet. Please set ' 'TPUConfig.per_host_input_for_training to True or return ' '`features` and `labels` from `input_fn`') features, labels = inputs.features_and_labels() inputs_structure_recorder.validate_and_record_structure( features, labels) flattened_inputs = ( inputs_structure_recorder.flatten_features_and_labels( features, labels)) per_host_sharded_inputs.append(flattened_inputs) infeed_queue = tpu_feed.InfeedQueue( number_of_tuple_elements=len(per_host_sharded_inputs[0])) captured_infeed_queue.capture(infeed_queue) per_host_enqueue_ops = infeed_queue.generate_enqueue_ops( per_host_sharded_inputs, tpu_ordinal_function=tpu_ordinal_function_impl) return per_host_enqueue_ops return enqueue_ops_fn, captured_infeed_queue def generate_per_host_enqueue_ops_fn_for_host(ctx, input_fn, inputs_structure_recorder, batch_axis, device, host_id): """Generates infeed enqueue ops for per-host input_fn on a single host.""" captured_infeed_queue = _CapturedObject() dataset_initializer = None with tf.compat.v1.device(device): user_context = tpu_context.TPUContext( internal_ctx=ctx, input_device=device, invocation_index=host_id, host_id=host_id) inputs = _Inputs.from_input_fn(input_fn(user_context)) is_dataset = inputs.is_dataset if ctx.mode == model_fn_lib.ModeKeys.PREDICT: if not is_dataset: raise TypeError( 'For mode PREDICT, `input_fn` must return `Dataset` instead of ' '`features` and `labels`.') if batch_axis is not None: raise TypeError('For mode PREDICT, batch_axis is not supported yet.') inputs = _InputsWithStoppingSignals( dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn, add_padding=True) if is_dataset: dataset_initializer = inputs.dataset_initializer() tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id) def enqueue_ops_fn(): """A Fn returning the TPU infeed enqueue ops. By providing as a Fn, it can be invoked inside the tf.while_loop such that the input pipeline for multiple iterations can be executed by one Session.run call. Returns: list of dict of ops. """ with tf.compat.v1.device(device): num_of_replicas_per_host = ctx.num_of_replicas_per_host # Convert user input to features and labels. If the user returns a # dataset, it is initialized and the features and labels extracted via # `dataset.iterator.get_next()` features, labels = inputs.features_and_labels() signals = inputs.signals() features, labels, enqueue_datas_list = ( _tpu_estimator_embedding.split_inputs( ctx, features, labels, num_cores_per_batch=num_of_replicas_per_host)) inputs_structure_recorder.validate_and_record_structure(features, labels) unsharded_tensor_list = ( inputs_structure_recorder.flatten_features_and_labels( features, labels, signals)) infeed_queue = tpu_feed.InfeedQueue( tuple_types=[t.dtype for t in unsharded_tensor_list], tuple_shapes=[t.shape for t in unsharded_tensor_list], shard_dimensions=batch_axis) captured_infeed_queue.capture(infeed_queue) infeed_queue.set_number_of_shards(num_of_replicas_per_host) per_host_enqueue_ops = ( infeed_queue.split_inputs_and_generate_enqueue_ops( unsharded_tensor_list, placement_function=lambda x: device, tpu_ordinal_function=tpu_ordinal_function_impl)) if ctx.embedding_config: per_host_enqueue_ops.extend( ctx.embedding_config.tpu_embedding.generate_enqueue_ops( enqueue_datas_list)) if signals is None: return per_host_enqueue_ops else: return { 'ops': per_host_enqueue_ops, 'signals': signals, } return enqueue_ops_fn, captured_infeed_queue, dataset_initializer def generate_per_host_v2_enqueue_ops_fn_for_host(ctx, input_fn, inputs_structure_recorder, device, host_id, invocation_index): """Generates infeed enqueue ops for per-host input_fn on a single host.""" captured_infeed_queue = _CapturedObject() dataset_initializer = None with tf.compat.v1.device(device): user_context = tpu_context.TPUContext( internal_ctx=ctx, input_device=device, invocation_index=invocation_index, host_id=host_id) inputs = _Inputs.from_input_fn(input_fn(user_context)) is_dataset = inputs.is_dataset if not is_dataset: raise TypeError('`input_fn` must return a `Dataset` for the PER_HOST_V2 ' 'input pipeline configuration.') # Be aware that when num_cores_per_replica > num_cores_per_host, # ctx.num_of_replicas_per_host is 0. if ctx.mode == model_fn_lib.ModeKeys.PREDICT: inputs = _InputsWithStoppingSignals( dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn, add_padding=True, num_invocations_per_step=max(1, ctx.num_of_replicas_per_host)) dataset_initializer = inputs.dataset_initializer() tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id) def device_function_impl(shard_id): if ctx.device_assignment is not None: # Find the replica_id of the host's logical core 0. # The current host_id is guaranteed to contain the logical core 0, # even when num_cores_per_replica > num_cores_per_host -- the function # caller makes sure that this host_id will must be receiving data (calls # input_fn). replica_id = ctx.device_assignment.lookup_replicas( task_id=host_id, logical_core=0)[shard_id] return ctx.tpu_host_placement_function(replica_id=replica_id) else: return None def enqueue_ops_fn(): """Generates the per_host enqueue ops.""" control_deps = [] per_host_sharded_inputs = [] enqueue_datas_list = [] # Be aware that when num_cores_per_replica > num_cores_per_host, # ctx.num_of_replicas_per_host is 0. num_replicas_per_host = max(1, ctx.num_of_replicas_per_host) cached_signals = None with tf.compat.v1.device(device): if not inputs.is_dataset: raise TypeError('`input_fn` must return a `Dataset` for this mode.') for host in range(num_replicas_per_host): # Use control dependencies to ensure a deterministic ordering. if ctx.allow_per_host_v2_parallel_get_next: features, labels = inputs.features_and_labels() # Calls get_next() with tf.control_dependencies(control_deps): if not ctx.allow_per_host_v2_parallel_get_next: features, labels = inputs.features_and_labels() # Calls get_next() signals = inputs.signals() # All the replicas share the replica 0's stopping signal. # This avoids inconsistent state among different model replcias. if cached_signals: signals['stopping'] = cached_signals['stopping'] else: cached_signals = signals features, labels, enqueue_data = ( _tpu_estimator_embedding.split_inputs(ctx, features, labels)) if len(enqueue_data) != 1: raise RuntimeError(('Missing or extra enqueue_data for host {}. ' 'len(enqueue_data) = {}.').format( host, len(enqueue_data))) enqueue_datas_list.append(enqueue_data[0]) inputs_structure_recorder.validate_and_record_structure( features, labels) flattened_inputs = ( inputs_structure_recorder.flatten_features_and_labels( features, labels, signals)) control_deps.extend(flattened_inputs) per_host_sharded_inputs.append(flattened_inputs) if inputs_structure_recorder.flattened_input_dims: input_partition_dims = inputs_structure_recorder.flattened_input_dims if signals: input_partition_dims += [None] * len(signals) # pylint: disable=protected-access infeed_queue = tpu_feed._PartitionedInfeedQueue( number_of_tuple_elements=len(per_host_sharded_inputs[0]), host_id=host_id, input_partition_dims=input_partition_dims, device_assignment=ctx.device_assignment) per_host_enqueue_ops = infeed_queue.generate_enqueue_ops( per_host_sharded_inputs) else: infeed_queue = tpu_feed.InfeedQueue( number_of_tuple_elements=len(per_host_sharded_inputs[0])) per_host_enqueue_ops = infeed_queue.generate_enqueue_ops( per_host_sharded_inputs, tpu_ordinal_function=tpu_ordinal_function_impl, placement_function=device_function_impl) captured_infeed_queue.capture(infeed_queue) if ctx.embedding_config: per_host_enqueue_ops.extend( ctx.embedding_config.tpu_embedding.generate_enqueue_ops( enqueue_datas_list)) if signals is None: return per_host_enqueue_ops else: return { 'ops': per_host_enqueue_ops, 'signals': signals, } return enqueue_ops_fn, captured_infeed_queue, dataset_initializer def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder, num_hosts): """Generates infeed enqueue ops for one input_fn on all the hosts.""" captured_infeed_queue = _CapturedObject() dataset_initializer = None device_0 = ctx.tpu_host_placement_function(host_id=0) with tf.compat.v1.device(device_0): user_context = tpu_context.TPUContext( internal_ctx=ctx, input_device=device_0, invocation_index=0, host_id=0) inputs = _Inputs.from_input_fn(input_fn(user_context)) is_dataset = inputs.is_dataset if ctx.mode == model_fn_lib.ModeKeys.PREDICT: if not is_dataset: raise TypeError( 'For mode PREDICT, `input_fn` must return `Dataset` instead of ' '`features` and `labels`.') inputs = _InputsWithStoppingSignals( dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn, add_padding=True) if is_dataset: dataset_initializer = inputs.dataset_initializer() num_replicas_per_host = ctx.num_of_replicas_per_host def tpu_ordinal_function_impl(shard_id): if ctx.device_assignment: return ctx.device_assignment.tpu_ordinal(replica=shard_id) else: return shard_id % num_replicas_per_host def device_function_impl(shard_id): # shard_id ranges from 0 to num_of_replicas_per_host - 1. # A shard is a replica inside a host. # In broadcast mode (generate_broadcast_enqueue_ops_fn), the enqueue ops # are always executed on the first host. Thus shard_id equals to replica_id. return ctx.tpu_host_placement_function(replica_id=shard_id) def enqueue_ops_fn(): """Generates enqueue ops for all the hosts.""" broadcasted_inputs = [] flattened_inputs = None # Cache result from input_fn. signals = None num_replicas = ctx.num_replicas core_id = 0 for host_id in xrange(num_hosts): with tf.compat.v1.device( ctx.tpu_host_placement_function(host_id=host_id)): for _ in xrange(ctx.num_of_replicas_per_host): # Note: input_fn is only called once at host 0 for the first replica. # The features and labels returned from that invocation are # broadcasted to other replicas(including the replicas on other # hosts). if flattened_inputs is None: features, labels = inputs.features_and_labels() # Calls get_next() signals = inputs.signals() inputs_structure_recorder.validate_and_record_structure( features, labels) flattened_inputs = ( inputs_structure_recorder.flatten_features_and_labels( features, labels, signals)) if (ctx.config.tpu_config.eval_training_input_configuration is tpu_config.InputPipelineConfig.SLICED): input_slices = [ tf.split(x, num_replicas) for x in flattened_inputs ] if (ctx.config.tpu_config.eval_training_input_configuration is tpu_config.InputPipelineConfig.SLICED): # for each core, slice out the flattened_inputs for each core. broadcasted_inputs.append([x[core_id] for x in input_slices]) core_id += 1 else: broadcasted_inputs.append(flattened_inputs) infeed_queue = tpu_feed.InfeedQueue( number_of_tuple_elements=len(broadcasted_inputs[0])) captured_infeed_queue.capture(infeed_queue) enqueue_ops = infeed_queue.generate_enqueue_ops( broadcasted_inputs, tpu_ordinal_function=tpu_ordinal_function_impl, placement_function=device_function_impl) if signals is None: return enqueue_ops else: return { 'ops': enqueue_ops, 'signals': signals, } return enqueue_ops_fn, captured_infeed_queue, dataset_initializer class TensorPacker(object): """Pack and unpack small tensors into a big one for efficiency.""" def __init__(self, small_feature_dim_size, minimum_num_small_features_to_group): self._small_feature_dim_size = small_feature_dim_size self._minimum_num_small_features_to_group = ( minimum_num_small_features_to_group) def maybe_concatenate_features(self, features): """If there are enough small tensors, concat them for performance.""" self._small_feature_names = {} self._small_feature_sizes = {} feature_names = _extract_key_names(features) if feature_names: # Not a single tensor. # First pass: see if it is worth concatenating the small features. for name in feature_names: tensor = features[name] # We do not handle nested inputs here. if not isinstance(tensor, tf.Tensor): return shape = tensor.get_shape().as_list() dtype = tensor.dtype if (len(shape) == 2 and shape[1] is not None and shape[1] <= self._small_feature_dim_size): tf.compat.v1.logging.log_first_n( tf.compat.v1.logging.INFO, 'Found small feature: %s %s', 1, name, shape) if tensor.dtype not in self._small_feature_names: self._small_feature_names[dtype] = [] self._small_feature_sizes[dtype] = [] self._small_feature_names[dtype].append(name) self._small_feature_sizes[dtype].append(shape[1]) dtypes_ = list(self._small_feature_names.keys()) for dtype in dtypes_: # If we could find 5 (or more) [batch_size, 1] dense features, # we will group them. if (len(self._small_feature_names[dtype]) < self._minimum_num_small_features_to_group): self._small_feature_names.pop(dtype) # reset self._small_feature_sizes.pop(dtype) # reset # Second pass: separate small features out small_feature_tensors = {} for dtype in self._small_feature_names: small_feature_tensors[dtype] = [] for name in self._small_feature_names[dtype]: small_feature_tensors[dtype].append(features.pop(name)) # Add the concat Tensor to features with a special key. for dtype in self._small_feature_names: key = self._get_small_feature_key(dtype) if key in features: raise ValueError('{} is reserved as feature key for concatenated' 'small features.') features[key] = (tf.concat(small_feature_tensors[dtype], axis=1)) def maybe_split_features(self, maybe_concatenated_features): for dtype in self._small_feature_names: key = self._get_small_feature_key(dtype) concatenated_small_features = maybe_concatenated_features.pop(key) splits = tf.split( concatenated_small_features, self._small_feature_sizes[dtype], axis=1) for name, split in zip(self._small_feature_names[dtype], splits): maybe_concatenated_features[name] = split def _get_small_feature_key(self, dtype): return _TENSOR_PACKER_CONCATENATED_SMALL_FEATURES_KEY + '_' + str(dtype) class _InputPipeline(object): """`_InputPipeline` handles invoking `input_fn` and piping to infeed queue. `_InputPipeline` abstracts the per-core/per-host `input_fn` invocation from call site. To be precise, based on the configuration in `_InternalTPUContext`, it invokes `input_fn` for all cores (usually multi-host TPU training) or for one host (usually for single-host TPU evaluation), and sends all `features` and `labels` returned by `input_fn` to TPU infeed. For per-core invocation, `features` and `labels` are piped to infeed directly, one tuple for each core. For per-host invocation, `features` and `labels` are split at host (with respect to `batch_axis`) and piped to all cores accordingly. In addition, flatten/unflatten are handled by `_InputPipeline` also. Model inputs returned by the `input_fn` can have one of the following forms: 1. features 2. (features, labels) 3. ((arbitrarily nested structure of features), labels) Internally, form 1 is reformed to `(features, None)` as features and labels are passed separately to underlying methods. For TPU training, TPUEstimator may expect multiple `features` and `labels` tuples one for each core. TPUEstimator allows various different structures for inputs (namely `features` and `labels`). Both `features` and `labels` can be any nested sturcture supported by TF nest (namely, dict, tuples, namedtuples or any nested structure of such of Tensors). `labels` could be `None` as well. These are flattened before they are passed to the infeed/outfeed library as that expectes flattend lists. """ class InputsStructureRecorder(object): """The recorder to record inputs structure.""" def __init__(self, input_partition_dims=None): # Holds the structure of inputs self._feature_structure = {} self._flattened_input_dims = None if input_partition_dims: # This should have been validated in TPUConfig. assert len(input_partition_dims) <= 2, 'must have 1 or 2 elements.' if len(input_partition_dims) == 2: self._feature_dims, self._label_dims = input_partition_dims else: self._feature_dims = input_partition_dims[0] self._label_dims = None assert self._feature_dims is not None, ('input_partition_dims[0] must ' 'not be None') else: self._feature_dims = None self._label_dims = None # Internal state. self._initialized = False @property def flattened_input_dims(self): assert self._initialized, 'InputsStructureRecorder is not initialized.' return self._flattened_input_dims def has_labels(self): return 'labels' in self._feature_structure def _flatten_input_dims(self, features, labels, feature_dims, label_dims): """Flatten input dims with the same order as flattened input tensors.""" try: flattened_input_dims = data_nest.flatten_up_to(features, feature_dims) except TypeError as e: raise ValueError( 'TPUConfig.input_partition_dims[0] mismatched the structure of' ' features. input_partition_dims[0]: {}, features {}. {}'.format( feature_dims, features, e)) if labels is not None: if label_dims is not None: try: flattened_input_dims.extend( data_nest.flatten_up_to(labels, self._label_dims)) except TypeError as e: raise ValueError( 'TPUConfig.input_partition_dims[1] mismatched the structure of' ' labels. input_partition_dims[1]: {}, labels: {}. {}'.format( label_dims, labels, e)) else: num_label_tensors = len(data_nest.flatten(labels)) flattened_input_dims.extend([None] * num_label_tensors) return flattened_input_dims def validate_and_record_structure(self, features, labels): """Validates and records the structure of `features` and `labels`.""" # Extract structure. feature_names = _extract_key_names(features) label_names = _extract_key_names(labels) if not self._initialized: # Record structure. self._initialized = True if self._feature_dims is not None: feature_dims_names = _extract_key_names(self._feature_dims) if feature_dims_names != feature_names: raise ValueError( 'TPUConfig.input_partition_dims[0] mismatched feature' ' keys. Expected {}, got {}'.format(feature_names, feature_dims_names)) label_dims_names = _extract_key_names(self._label_dims) if self._label_dims is not None and label_dims_names != label_names: raise ValueError( 'TPUConfig.input_partition_dims[1] mismatched label' ' keys. Expected {}, got {}'.format(label_names, label_dims_names)) self._flattened_input_dims = self._flatten_input_dims( features, labels, self._feature_dims, self._label_dims) def flatten_features_and_labels(self, features, labels, signals=None): """Flattens the `features` and `labels` to a single tensor list.""" self.tensor_packer = TensorPacker( _TENSOR_PACKER_SMALL_FEATURE_DIM_SIZE, _TENSOR_PACKER_MINIMUM_NUM_SMALL_FEATURES_TO_GROUP) self.tensor_packer.maybe_concatenate_features(features) self._feature_structure['features'] = features if labels is not None: self._feature_structure['labels'] = labels if signals is not None: self._feature_structure['signals'] = signals return data_nest.flatten(self._feature_structure) def unflatten_features_and_labels(self, flattened_inputs): """Restores the flattened inputs to original features and labels form. Args: flattened_inputs: Flattened inputs for each shard. Returns: A tuple of (`features`, `labels`), where `labels` could be None. Each one, if present, should have identical structure (single tensor vs dict) as the one returned by input_fn. Raises: ValueError: If the number of expected tensors from `flattened_inputs` mismatches the recorded structure. """ unflattened_inputs = data_nest.pack_sequence_as(self._feature_structure, flattened_inputs) features = unflattened_inputs['features'] self.tensor_packer.maybe_split_features(features) return _Inputs( features, unflattened_inputs.get('labels'), signals=unflattened_inputs.get('signals')) def __init__(self, input_fn, batch_axis, ctx): """Constructor. Args: input_fn: input fn for train or eval. batch_axis: A python tuple of int values describing how each tensor produced by the Estimator `input_fn` should be split across the TPU compute shards. ctx: A `_InternalTPUContext` instance with mode. Raises: ValueError: If both `sharded_features` and `num_cores` are `None`. """ self._inputs_structure_recorder = _InputPipeline.InputsStructureRecorder( ctx.input_partition_dims) self._sharded_per_core = ctx.is_input_sharded_per_core() self._input_fn = input_fn self._infeed_queue = None self._ctx = ctx self._batch_axis = batch_axis def generate_infeed_enqueue_ops_and_dequeue_fn(self): """Generates infeed enqueue ops and dequeue_fn.""" # While tf.while_loop is called, the body function, which invokes # `enqueue_fn` passed in, is called to construct the graph. So, input_fn # structure is recorded. enqueue_ops, all_hooks, run_infeed_loop_on_coordinator = ( self._invoke_input_fn_and_record_structure()) self._validate_input_pipeline() def dequeue_fn(): """dequeue_fn is used by TPU to retrieve the tensors.""" # In the model-parallel case, both the host-side and device-side # computations must agree on the core on which infeed takes place. We # choose to perform infeed on logical core 0 of each replica. values = self._infeed_queue.generate_dequeue_op(tpu_device=0) # The unflatten process uses the structure information recorded above. return self._inputs_structure_recorder.unflatten_features_and_labels( values) return (enqueue_ops, dequeue_fn, all_hooks, run_infeed_loop_on_coordinator) def _invoke_input_fn_and_record_structure(self): """Deploys the input pipeline and record input structure.""" enqueue_ops = [] infeed_queues = [] all_dataset_initializers = [] num_hosts = self._ctx.num_hosts tpu_host_placement_fn = self._ctx.tpu_host_placement_function run_infeed_loop_on_coordinator = True if self._sharded_per_core: # Per-Core input pipeline deployment. # Invoke input pipeline for each core and placed on the corresponding # host. for host_id in range(num_hosts): host_device = tpu_host_placement_fn(host_id=host_id) with tf.compat.v1.device(host_device): with ops.name_scope('input_pipeline_task%d' % (host_id)): enqueue_ops_fn, captured_infeed_queue = ( generate_per_core_enqueue_ops_fn_for_host( self._ctx, self._input_fn, self._inputs_structure_recorder, host_device, host_id)) if _WRAP_INPUT_FN_INTO_WHILE_LOOP: run_infeed_loop_on_coordinator = False enqueue_ops.append( _wrap_computation_in_while_loop( device=host_device, op_fn=enqueue_ops_fn)) else: enqueue_ops.append(enqueue_ops_fn()) # Infeed_queue_getter must be called after enqueue_ops_fn is called. infeed_queues.append(captured_infeed_queue.get()) elif self._ctx.is_input_broadcast_with_iterators(): # Only calls input_fn in host 0. host_device = tpu_host_placement_fn(host_id=0) enqueue_ops_fn, captured_infeed_queue, dataset_initializer = ( generate_broadcast_enqueue_ops_fn(self._ctx, self._input_fn, self._inputs_structure_recorder, num_hosts)) if dataset_initializer: all_dataset_initializers.append(dataset_initializer) run_infeed_loop_on_coordinator = False wrap_fn = ( _wrap_computation_in_while_loop if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else _wrap_computation_in_while_loop_with_stopping_signals) enqueue_ops.append(wrap_fn(device=host_device, op_fn=enqueue_ops_fn)) else: enqueue_ops.append(enqueue_ops_fn()) infeed_queues.append(captured_infeed_queue.get()) else: # This branch handles two senarios: # num_cores_per_replica > num_cores_per_host # and num_cores_per_replica <= num_cores_per_host # First, get the set of host_ids, by iterating replicas. # We only want and will get the set of *unique* host_ids # *that will call input_fn*. For each replica, we only call the input_fn # from the CPU host that contains logical core 0. # Use a list here to ensure deterministic order. host_id_with_invocation_id_pair = [] if not self._ctx.is_replica_across_hosts(): for host_id in range(num_hosts): invocation_index = host_id host_id_with_invocation_id_pair.append((host_id, invocation_index)) else: for replica_id in xrange(self._ctx.num_replicas): invocation_index = replica_id host_device, _ = self._ctx.device_for_replica(replica_id) # TODO(lehou): Get host_id in a better way. host_id = int(host_device.split('/task:')[1].split('/device:')[0]) host_id_with_invocation_id_pair.append((host_id, invocation_index)) for (host_id, invocation_index) in host_id_with_invocation_id_pair: host_device = tpu_host_placement_fn(host_id=host_id) with tf.compat.v1.device(host_device): with ops.name_scope('input_pipeline_task%d' % (host_id)): if self._ctx.is_input_per_host_with_iterators(): enqueue_ops_fn, captured_infeed_queue, dataset_initializer = ( generate_per_host_v2_enqueue_ops_fn_for_host( self._ctx, self._input_fn, self._inputs_structure_recorder, host_device, host_id, invocation_index)) else: enqueue_ops_fn, captured_infeed_queue, dataset_initializer = ( generate_per_host_enqueue_ops_fn_for_host( self._ctx, self._input_fn, self._inputs_structure_recorder, self._batch_axis, host_device, host_id)) # NOTE(xiejw): We dispatch here based on the return type of the # users `input_fn`. # # 1. If input_fn returns a Dataset instance, we initialize the # iterator outside of tf.while_loop, and call the iterator.get_next # inside tf.while_loop. This should be always safe. # # 2. If input_fn returns (features, labels), it is too late to wrap # them inside tf.while_loop, as resource initialization cannot be # handled in TF control flow properly. In this case, we will use # python loop to enqueue the data into TPU system. This may be # slow compared to the previous case. if dataset_initializer: all_dataset_initializers.append(dataset_initializer) run_infeed_loop_on_coordinator = False wrap_fn = ( _wrap_computation_in_while_loop if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else _wrap_computation_in_while_loop_with_stopping_signals) enqueue_ops.append( wrap_fn(device=host_device, op_fn=enqueue_ops_fn)) else: enqueue_ops.append(enqueue_ops_fn()) infeed_queues.append(captured_infeed_queue.get()) # infeed_queue is used to generate dequeue ops. The only thing it uses for # dequeue is dtypes and types. So, any one can be used. Here, grab the # first one. self._infeed_queue = infeed_queues[0] return enqueue_ops, [ util_lib.MultiHostDatasetInitializerHook(all_dataset_initializers) ], run_infeed_loop_on_coordinator def _validate_input_pipeline(self): """Validates the input pipeline. Perform some sanity checks to log user friendly information. We should error out to give users better error message. But, if _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break user code, so, log a warning. Raises: RuntimeError: If the validation failed. """ if tf.compat.v1.get_default_graph().get_collection( tf.compat.v1.GraphKeys.QUEUE_RUNNERS): err_msg = ('Input pipeline contains one or more QueueRunners. ' 'It could be slow and not scalable. Please consider ' 'converting your input pipeline to use `tf.data` instead (see ' 'https://www.tensorflow.org/guide/datasets for ' 'instructions.') if _WRAP_INPUT_FN_INTO_WHILE_LOOP: raise RuntimeError(err_msg) else: logging.warn(err_msg) def call_computation(computation_inputs, computation, batch_config=None): """Call computation. Args: computation_inputs: A tensor or dict of tensors, the inputs to the computation. computation: A Python function that takes no inputs and builds computation graph. If `computation` returns m outputs, this function will return a list of m Tensors. batch_config: A BatchConfig named tuple specifying the batching configuration to use for inference batching. Returns: A list of output tensors. """ # Using `TPUPartitionedCall` makes it possible to target a different # TPU core with every `Session.run()` call. Note that the entire inference # graph executes on a single core, and that invocations of this graph # will round-robin among the cores attached to a host. def tpu_partitioned_call(partition_inputs): # capture_resource_var_by_value enables variables to be mirrored on TPU # to avoid fetching from CPU, since variables do not change during # inference. @function.Defun(capture_resource_var_by_value=False) def tpu_subgraph(): return computation(partition_inputs) return tpu_functional.TPUPartitionedCall( args=tpu_subgraph.captured_inputs, device_ordinal=tpu_ops.tpu_ordinal_selector(), Tout=[o.type for o in tpu_subgraph.definition.signature.output_arg], f=tpu_subgraph) # Not using Batching Function but use TPUPartitionedCall/all cores. if not batch_config: return tpu_partitioned_call(computation_inputs) # Use Batching Function and TPUPartitionedCall/all cores. # Note that BatchingFunction requires a list of tensors and doesn't support # a dict of tensors. So we preserve the structure by deterministically # flattening the dict before batching and then recomposing it after batching # to feed into the computation. ordered_inputs_list = tf.nest.flatten(computation_inputs) @tf.nondifferentiable_batch_function( num_batch_threads=batch_config.num_batch_threads, max_batch_size=batch_config.max_batch_size, batch_timeout_micros=batch_config.batch_timeout_micros, allowed_batch_sizes=batch_config.allowed_batch_sizes, max_enqueued_batches=batch_config.max_enqueued_batches, autograph=False) def batched_tpu_computation(*tensor_args): """Recompose the input feature dict and calls the TPU computation.""" computation_feature_input = tf.nest.pack_sequence_as( computation_inputs, tensor_args) return tpu_partitioned_call(computation_feature_input) return batched_tpu_computation(*ordered_inputs_list) class _ModelFnWrapper(object): """A `model_fn` wrapper. This makes calling model_fn on CPU and TPU easier and more consistent and performs necessary check and mutation required by TPU training and evaluation. In addition, this wrapper manages converting the `model_fn` to a single TPU train and eval step. """ def __init__(self, model_fn, config, params, ctx): self._model_fn = model_fn self._config = config self._params = params self._ctx = ctx def call_without_tpu(self, features, labels, is_export_mode): return self._call_model_fn(features, labels, is_export_mode=is_export_mode) def _add_embedding_features(self, features, hook_dummy_table_variables): """Add embedding features, optionally add hook to intercept gradient.""" if self._ctx.embedding_config: tpu_embedding_ = self._ctx.embedding_config.tpu_embedding embedding_activations = tpu_embedding_.get_activations() if hook_dummy_table_variables: new_embedding_activations = ( tpu_embedding_gradient.hook_dummy_table_variables_to_activations( tpu_embedding_, embedding_activations, self._ctx.embedding_config.dummy_table_variables)) features.update(new_embedding_activations) else: features.update(embedding_activations) def convert_to_single_tpu_train_step(self, dequeue_fn): """Converts user provided model_fn` as a single train step on TPU. The user provided `model_fn` takes input tuple (features, labels) and produces the EstimatorSpec with train_op and loss for train `mode`. This usually represents a single train computation on CPU. For TPU training, a train (computation) step is first wrapped in a tf.while_loop control flow to repeat for many times and then replicated to all TPU shards. Besides the input should be taken from TPU infeed rather than input pipeline (input_fn) directly. To fit TPU loop and replicate pattern, the original train computation should be reformed, which is the returned `train_step`. Args: dequeue_fn: The function to retrieve inputs, features and labels, from TPU infeed dequeue channel. Returns: A tuple of train_fn, host_calls, and captured scaffold_fn. The train_fn representing the train step for TPU. """ host_call = _OutfeedHostCall( self._ctx, outfeed_every_n_steps=self._config.tpu_config .experimental_host_call_every_n_steps) captured_scaffold_fn = _CapturedObject() captured_training_hooks = _CapturedObject() def train_step(step): """Training step function for use inside a while loop.""" inputs = dequeue_fn() features, labels = inputs.features_and_labels() self._add_embedding_features(features, True) estimator_spec = self._verify_estimator_spec( self._call_model_fn(features, labels)) loss, train_op = estimator_spec.loss, estimator_spec.train_op if tensor_tracer.TensorTracer.is_enabled(): tt = tensor_tracer.TensorTracer() loss = tt.trace_tpu(tf.compat.v1.get_default_graph(), loss, train_op, self._ctx.num_replicas) tracer_host_call = tt.host_call_deps_and_fn() else: tracer_host_call = {} if isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec): # pylint: disable=protected-access captured_scaffold_fn.capture(estimator_spec.scaffold_fn) else: captured_scaffold_fn.capture(None) captured_training_hooks.capture(estimator_spec.training_hooks) if self._ctx.embedding_config is None: apply_sparse_grads = [] else: tpu_embedding_ = self._ctx.embedding_config.tpu_embedding gradients = ( tpu_embedding_gradient.get_gradients_through_dummy_table_variables( tpu_embedding_)) grad_multiplier = self._ctx.embedding_config.get_grad_multiplier() if grad_multiplier is not None: scaled_gradients = collections.OrderedDict( (k, v * grad_multiplier) for k, v in six.iteritems(gradients)) else: scaled_gradients = gradients apply_sparse_grads = [ tpu_embedding_.generate_send_gradients_op( scaled_gradients, tf.compat.v1.train.get_global_step()) ] stopping_signals = None user_provided_stopping_signals_name = None if self._ctx.feed_hook is not None: stopping_signals, user_provided_stopping_signals_name = \ self._ctx.feed_hook.get_stopping_signals_and_name(features) # We must run train_op to update the variables prior to running the # outfeed. with tf.control_dependencies([train_op] + apply_sparse_grads): host_call_outfeed_ops = [] host_call_fn, host_call_args = None, [] if (isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec) # pylint: disable=protected-access and estimator_spec.host_call is not None): host_call_fn, host_call_args = estimator_spec.host_call if stopping_signals is not None: identity_fn = lambda **kwargs: kwargs tracer_host_call[user_provided_stopping_signals_name] = [ identity_fn, stopping_signals ] if host_call_fn: # Ignore dummy hostcalls (no arguments) if host_call_args: tracer_host_call.update({'host_call': estimator_spec.host_call}) host_call.record(tracer_host_call) host_call_outfeed_ops = host_call.create_enqueue_op(step) elif tracer_host_call: host_call.record(tracer_host_call) host_call_outfeed_ops = host_call.create_enqueue_op(step) else: # Create a host call for the loss to track execution progress # Without this, we don't have any indication of the state of the # TPU program. tracer_host_call.update( {'host_call': (lambda loss_t: loss_t, [tf.reshape(loss, [1])])}) host_call.record(tracer_host_call) host_call_outfeed_ops = host_call.create_enqueue_op(step) with tf.control_dependencies(host_call_outfeed_ops): return tf.identity(loss) return (train_step, host_call, captured_scaffold_fn, captured_training_hooks) def convert_to_single_tpu_eval_step(self, dequeue_fn): """Converts user provided model_fn` as a single eval step on TPU. Similar to training, the user provided `model_fn` takes input tuple (features, labels) and produces the TPUEstimatorSpec with eval_metrics for eval `mode`. This usually represents a single evaluation computation on CPU. For TPU evaluation, a eval (computation) step is first wrapped in a tf.while_loop control flow to repeat for many times and then replicated to all TPU shards. Besides the input and output are slightly different. Input, features and labels, should be taken from TPU infeed rather than input pipeline (input_fn) directly. Output is managed in two stages. First, the model outputs as the result of evaluation computation, usually model logits, should be transferred from TPU system to CPU. Then, all model outputs are concatenated first on CPU and sent to the metric_fn for metrics computation. To fit TPU evaluation pattern, the original eval computation should be reformed, which is the returned `eval_step`. Args: dequeue_fn: The function to retrieve inputs, features and labels, from TPU infeed dequeue channel. Returns: A tuple of eval_fn, host_calls, and captured scaffold_fn. The eval_fn representing the eval step for TPU. """ host_calls = _OutfeedHostCall(self._ctx) captured_scaffold_fn = _CapturedObject() captured_eval_hooks = _CapturedObject() def eval_step(total_loss): """Evaluation step function for use inside a while loop.""" inputs = dequeue_fn() features, labels = inputs.features_and_labels() self._add_embedding_features(features, False) tpu_estimator_spec = self._call_model_fn(features, labels) if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec): # pylint: disable=protected-access raise RuntimeError( 'estimator_spec used by TPU evaluation must have type' '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec))) loss = tpu_estimator_spec.loss captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn) captured_eval_hooks.capture(tpu_estimator_spec.evaluation_hooks) to_record = {} if tpu_estimator_spec.eval_metrics: to_record['eval_metrics'] = tpu_estimator_spec.eval_metrics if tpu_estimator_spec.host_call is not None: # We assume that evaluate won't update global step, so we don't wrap # this host_call. to_record['host_call'] = tpu_estimator_spec.host_call host_calls.record(to_record) with tf.control_dependencies(host_calls.create_enqueue_op()): return tf.math.add(total_loss, loss) return eval_step, host_calls, captured_scaffold_fn, captured_eval_hooks def convert_to_single_tpu_predict_step(self, dequeue_fn): """Converts user provided model_fn` as a single predict step on TPU. Args: dequeue_fn: The function to retrieve inputs, features and labels, from TPU infeed dequeue channel. Returns: A tuple of predict_fn, host_calls, and captured scaffold_fn. The predict_fn representing the predict step for TPU. """ host_calls = _OutfeedHostCall(self._ctx) captured_scaffold_fn = _CapturedObject() captured_predict_hooks = _CapturedObject() def predict_step(unused_scalar_stopping_signal): """Evaluation step function for use inside a while loop.""" inputs = dequeue_fn() features, labels = inputs.features_and_labels() stopping_signals = inputs.signals() assert stopping_signals is not None, ( 'Internal Error: `signals` is missing.') tpu_estimator_spec = self._call_model_fn( features, labels, is_export_mode=False) if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec): # pylint: disable=protected-access raise RuntimeError( 'estimator_spec used by TPU prediction must have type' '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec))) self._verify_tpu_spec_predictions(tpu_estimator_spec.predictions) captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn) captured_predict_hooks.capture(tpu_estimator_spec.prediction_hooks) to_record = {} identity_fn = lambda **kwargs: kwargs to_record['predictions'] = [identity_fn, tpu_estimator_spec.predictions] to_record['signals'] = [identity_fn, stopping_signals] if tpu_estimator_spec.host_call is not None: to_record['host_call'] = tpu_estimator_spec.host_call host_calls.record(to_record) with tf.control_dependencies(host_calls.create_enqueue_op()): return _StopSignals.as_scalar_stopping_signal(stopping_signals) return (predict_step, host_calls, captured_scaffold_fn, captured_predict_hooks) def _verify_tpu_spec_predictions(self, predictions): """Validates TPUEstimatorSpec.predictions dict.""" # TODO(xiejw): Adds validation for prediction dictionrary. # TODO(xiejw): Adds support for single tensor as predictions. if not isinstance(predictions, dict): raise TypeError('TPUEstimatorSpec.predictions must be dict of Tensors.') for (key, tensor) in predictions.items(): if tensor.shape.dims[0].value is None: raise ValueError( 'The tensor with key ({}) in TPUEstimatorSpec.predictions has ' 'dynamic shape (should be static). Tensor: {}'.format(key, tensor)) return predictions def _validate_model_features_and_labels(self, features, labels, is_export_mode): """Validates that the features and labels for the model function are valid. A valid features/labels object is the one with: - Type: A tensor or any nested structure of tensors supported by TF nest, namely nested dictionary, tuple, namedtuple, or sequence of tensors. - Static shape if is_export_mode is False. Args: features: the features that would be input to the model function. labels: the labels that would be input to the model function. is_export_mode: boolean value specifying if in export mode. Raises: TypeError: If features/labels are not of the correct type. ValueError: If features/labels have dynamic shape. """ def validate(obj, obj_name): """Helper validate function.""" if is_export_mode or self._ctx.is_running_on_cpu(is_export_mode): return if isinstance(obj, tf.Tensor): if not obj.get_shape().is_fully_defined(): raise ValueError( 'The {} to the model returned by input_fn must have static shape.' ' Tensor: {}'.format(obj_name, obj)) else: for tensor in data_nest.flatten(obj): if not tensor.get_shape().is_fully_defined(): raise ValueError( ('The {} to the model returned by input_fn must have static ' 'shape. Tensor: {}').format(obj_name, tensor)) validate(features, 'features') if labels is not None: validate(labels, 'labels') def _call_model_fn(self, features, labels, is_export_mode=False): """Calls the model_fn with required parameters.""" self._validate_model_features_and_labels(features, labels, is_export_mode) model_fn_args = function_utils.fn_args(self._model_fn) kwargs = {} # Makes deep copy with `config` and params` in case user mutates them. config = copy.deepcopy(self._config) params = copy.deepcopy(self._params) if 'labels' in model_fn_args: kwargs['labels'] = labels elif labels is not None: raise ValueError( 'model_fn does not take labels, but input_fn returns labels.') if 'mode' in model_fn_args: kwargs['mode'] = self._ctx.mode if 'config' in model_fn_args: kwargs['config'] = config if 'params' in model_fn_args: kwargs['params'] = params if 'params' not in model_fn_args: raise ValueError('model_fn ({}) does not include params argument, ' 'required by TPUEstimator to pass batch size as ' 'params[\'batch_size\']'.format(self._model_fn)) if is_export_mode: batch_size_for_model_fn = None else: batch_size_for_model_fn = self._ctx.batch_size_for_model_fn if batch_size_for_model_fn is not None: _add_item_to_params(params, _BATCH_SIZE_KEY, batch_size_for_model_fn) running_on_cpu = self._ctx.is_running_on_cpu(is_export_mode) # In export mode, params['use_tpu'] has already been set based on mode # (i.e. True for _REWRITE_FOR_INFERENCE_MODE, False otherwise). if not is_export_mode: _add_item_to_params(params, _USE_TPU_KEY, not running_on_cpu) if not running_on_cpu: user_context = tpu_context.TPUContext( internal_ctx=self._ctx, call_from_input_fn=False) _add_item_to_params(params, _CTX_KEY, user_context) estimator_spec = self._model_fn(features=features, **kwargs) if (running_on_cpu and isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)): # pylint: disable=protected-access # The estimator_spec will be passed to `Estimator` directly, which expects # type `EstimatorSpec`. As we are running on the CPU, escape # the TPUInferenceContext. graph_context = tf.compat.v1.get_default_graph( )._get_control_flow_context() try: if isinstance(graph_context, tpu._TPUInferenceContext): tf.compat.v1.get_default_graph()._set_control_flow_context( graph_context.outer_context) return estimator_spec.as_estimator_spec() finally: tf.compat.v1.get_default_graph()._set_control_flow_context( graph_context) else: return estimator_spec def _verify_estimator_spec(self, estimator_spec): """Validates the estimator_spec.""" if isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec): # pylint: disable=protected-access return estimator_spec err_msg = '{} returned by EstimatorSpec is not supported in TPUEstimator.' if estimator_spec.training_chief_hooks: raise ValueError( err_msg.format('training_chief_hooks') + 'If you want' + ' to pass training hooks, please pass via training_hooks.') if estimator_spec.scaffold: tf.compat.v1.logging.warn( 'EstimatorSpec.Scaffold is ignored by TPU train/eval. ' 'Please use TPUEstimatorSpec.') return estimator_spec class _OutfeedHostCall(object): """Support for `eval_metrics` and `host_call` in TPUEstimatorSpec.""" def __init__(self, ctx, outfeed_every_n_steps=1): self._ctx = ctx self._names = [] # All of these are dictionaries of lists keyed on the name. self._host_fns = {} self._tensor_keys = collections.defaultdict(list) self._tensors = collections.defaultdict(list) self._tensor_dtypes = collections.defaultdict(list) self._tensor_shapes = collections.defaultdict(list) self._outfeed_every_n_steps = outfeed_every_n_steps @staticmethod def validate(host_calls): """Validates the `eval_metrics` and `host_call` in `TPUEstimatorSpec`.""" for name, host_call in host_calls.items(): if not isinstance(host_call, (tuple, list)): raise ValueError('{} should be tuple or list'.format(name)) if len(host_call) != 2: raise ValueError('{} should have two elements.'.format(name)) if not callable(host_call[0]): raise TypeError('{}[0] should be callable.'.format(name)) if not isinstance(host_call[1], (tuple, list, dict)): raise ValueError('{}[1] should be tuple or list, or dict.'.format(name)) if isinstance(host_call[1], (tuple, list)): fullargspec = tf_inspect.getfullargspec(host_call[0]) fn_args = function_utils.fn_args(host_call[0]) # wrapped_hostcall_with_global_step uses varargs, so we allow that. if fullargspec.varargs is None and len(host_call[1]) != len(fn_args): raise RuntimeError( 'In TPUEstimatorSpec.{}, length of tensors {} does not match ' 'method args of the function, which takes {}.'.format( name, len(host_call[1]), len(fn_args))) @staticmethod def create_cpu_hostcall(host_calls): """Runs on the host_call on CPU instead of TPU when use_tpu=False.""" _OutfeedHostCall.validate(host_calls) ret = {} for name, host_call in host_calls.items(): host_fn, tensors = host_call if isinstance(tensors, (tuple, list)): ret[name] = host_fn(*tensors) else: # Must be dict. try: ret[name] = host_fn(**tensors) except TypeError as e: tf.compat.v1.logging.warn( 'Exception while calling %s: %s. It is likely the tensors ' '(%s[1]) do not match the ' 'function\'s arguments', name, e, name) raise return ret def record(self, host_calls): """Records the host_call structure.""" for name, host_call in host_calls.items(): host_fn, tensor_list_or_dict = host_call self._names.append(name) self._host_fns[name] = host_fn if isinstance(tensor_list_or_dict, dict): for (key, tensor) in six.iteritems(tensor_list_or_dict): self._tensor_keys[name].append(key) self._tensors[name].append(tensor) self._tensor_dtypes[name].append(tensor.dtype) self._tensor_shapes[name].append(tensor.shape) else: # List or tuple. self._tensor_keys[name] = None for tensor in tensor_list_or_dict: self._tensors[name].append(tensor) self._tensor_dtypes[name].append(tensor.dtype) self._tensor_shapes[name].append(tensor.shape) def create_enqueue_op(self, step=None): """Create the op to enqueue the recorded host_calls. Returns: A list of enqueue ops, which is empty if there are no host calls. """ if not self._names: return [] tensors = [] # TODO(jhseu): Consider deduping tensors. for name in self._names: tensors.extend(self._tensors[name]) if self._outfeed_every_n_steps > 1 and step is None: raise ValueError('If outfeed is requested every n steps, you must pass ' 'a tensor whose value is the step number within the ' 'current training loop.') with tf.compat.v1.device(tf.compat.v1.tpu.core(0)): if self._outfeed_every_n_steps == 1: return [tpu_ops.outfeed_enqueue_tuple(tensors)] else: return [ tf.compat.v1.cond( tf.math.equal( tf.math.floormod(step, self._outfeed_every_n_steps), 0), lambda: tpu_ops.outfeed_enqueue_tuple(tensors), lambda: tf.no_op()) ] def create_tpu_hostcall(self): """Sends the tensors through outfeed and runs the host_fn on CPU. The tensors are concatenated along dimension 0 to form a global tensor across all shards. The concatenated function is passed to the host_fn and executed on the first host. Returns: A dictionary mapping name to the return type of the host_call by that name. Raises: RuntimeError: If outfeed tensor is scalar. """ if not self._names: return {} ret = {} # For each i, dequeue_ops[i] is a list containing the tensors from all # shards. This list is concatenated later. dequeue_ops = [] tensor_dtypes = [] tensor_shapes = [] for name in self._names: for _ in self._tensors[name]: dequeue_ops.append([]) for dtype in self._tensor_dtypes[name]: tensor_dtypes.append(dtype) for shape in self._tensor_shapes[name]: tensor_shapes.append(shape) # Outfeed ops execute on each replica's first logical core. Note: we must # constraint it such that we have at most one outfeed dequeue and enqueue # per replica. for i in xrange(self._ctx.num_replicas): host_device, ordinal_id = self._ctx.device_for_replica(i) with tf.compat.v1.device(host_device): outfeed_tensors = tpu_ops.outfeed_dequeue_tuple( dtypes=tensor_dtypes, shapes=tensor_shapes, device_ordinal=ordinal_id) for j, item in enumerate(outfeed_tensors): dequeue_ops[j].append(item) # Deconstruct dequeue ops. flat_dequeue_ops = [] for l in dequeue_ops: flat_dequeue_ops.extend(l) dequeue_ops_by_name = {} pos = 0 for name in self._names: dequeue_ops_by_name[name] = dequeue_ops[pos:pos + len(self._tensors[name])] pos += len(self._tensors[name]) def _call_host_fn(fn, *args, **kw): context = CatchInvalidHostcallFunctions() context.Enter() result = fn(*args, **kw) context.Exit() context.ExitResult(result) return result # It is assumed evaluation always happens on single host TPU system. So, # place all ops on tpu host if possible. # # TODO(jhseu): Evaluate whether this is right for summaries. with tf.compat.v1.device( self._ctx.tpu_host_placement_function(replica_id=0)): for name in self._names: dequeue_ops = dequeue_ops_by_name[name] for i, item in enumerate(dequeue_ops): # TODO(xiejw): Make the specification of the outfeed combinaton # function more explicit and well-documented. We may want to give the # user the option of concatenating along any axis. if (self._ctx.config.tpu_config.per_host_input_for_training is tpu_config.InputPipelineConfig.BROADCAST): # If the infeed is in BROADCAST mode (each core recieving the same # input), then we assume that the cores also produce identical # copies of the same output, and we simply take the output from # the first core. This mode is used by Mesh-TensorFlow. with tf.control_dependencies(dequeue_ops[i]): dequeue_ops[i] = tf.identity(dequeue_ops[i][0]) else: if dequeue_ops[i][0].shape.ndims == 0: raise RuntimeError( 'All tensors outfed from TPU should preserve batch size ' 'dimension, but got scalar {}'.format(dequeue_ops[i][0])) # Assume that the input has been batch-split and that axis 0 of the # output tensors represents the batch size. Concatenate along # the axis 0 to re-combine the batch. dequeue_ops[i] = tf.concat(dequeue_ops[i], axis=0) if self._tensor_keys[name] is not None: # The user-provided eval_metrics[1] is a dict. dequeue_ops = dict(zip(self._tensor_keys[name], dequeue_ops)) try: ret[name] = _call_host_fn(self._host_fns[name], **dequeue_ops) except TypeError as e: tf.compat.v1.logging.warn( 'Exception while calling %s: %s. It is likely the tensors ' '(%s[1]) do not match the ' 'function\'s arguments', name, e, name) raise else: ret[name] = _call_host_fn(self._host_fns[name], *dequeue_ops) # force all dequeue operations to be run if not consumed by the host calls ret['__force_dequeue'] = tf.group(*flat_dequeue_ops) return ret class _OutfeedHostCallHook(tf.compat.v1.train.SessionRunHook): """Hook to run host calls when use_tpu=False.""" def __init__(self, tensors): self._tensors = tensors def begin(self): # We duplicate this code from the TPUInfeedOutfeedSessionHook rather than # create a separate hook to guarantee execution order, because summaries # need to be initialized before the outfeed thread starts. # TODO(jhseu): Make a wrapper hook instead? self._init_ops = summary_ops_v2.summary_writer_initializer_op() # Get all the writer resources from the initializer, so we know what to # flush. self._finalize_ops = [] for op in self._init_ops: self._finalize_ops.append( summary_ops_v2.legacy_raw_flush(writer=op.inputs[0])) def after_create_session(self, session, coord): session.run(self._init_ops) def before_run(self, run_context): return tf.compat.v1.train.SessionRunArgs(self._tensors) def end(self, session): session.run(self._finalize_ops) class _NotSaver(object): """What to pass instead of a saver object if you don't want saving.""" def __init__(self, message): self._message = message def save(self, *args, **kwargs): del args, kwargs tf.compat.v1.logging.info(self._message) class ExamplesPerSecondHook(tf.compat.v1.train.StepCounterHook): """Calculate and report global_step/sec and examples/sec during runtime.""" def __init__(self, batch_size, every_n_steps=100, every_n_secs=None, output_dir=None, summary_writer=None): self._batch_size = batch_size super(ExamplesPerSecondHook, self).__init__( every_n_steps=every_n_steps, every_n_secs=every_n_secs, output_dir=output_dir, summary_writer=summary_writer) def _log_and_record(self, elapsed_steps, elapsed_time, global_step): global_step_per_sec = elapsed_steps / elapsed_time examples_per_sec = self._batch_size * global_step_per_sec if self._summary_writer is not None: global_step_summary = Summary(value=[ Summary.Value( tag='global_step/sec', simple_value=global_step_per_sec) ]) example_summary = Summary(value=[ Summary.Value(tag='examples/sec', simple_value=examples_per_sec) ]) self._summary_writer.add_summary(global_step_summary, global_step) self._summary_writer.add_summary(example_summary, global_step) tf.compat.v1.logging.info('global_step/sec: %g', global_step_per_sec) tf.compat.v1.logging.info('examples/sec: %g', examples_per_sec) class InstallSignalHandlerHook(tf.compat.v1.train.SessionRunHook): """Change SIGINT (CTRL^C) handler to force quit the process. The default behavior often results in hanging processes. The original handler is restored after training/evaluation. """ def __init__(self): self._signal_fn = signal.getsignal(signal.SIGINT) def before_run(self, run_context): signal.signal(signal.SIGINT, signal.SIG_DFL) def end(self, session): signal.signal(signal.SIGINT, self._signal_fn) class ExportSavedModelApiVersion(enum.Enum): V1 = 1 V2 = 2 class BatchConfig( collections.namedtuple('BatchConfig', [ 'num_batch_threads', 'max_batch_size', 'batch_timeout_micros', 'allowed_batch_sizes', 'max_enqueued_batches' ])): """Class to handle config inputs into the batching function.""" def __new__(cls, num_batch_threads, max_batch_size, batch_timeout_micros, allowed_batch_sizes, max_enqueued_batches=100): """Creates an BatchConfig instance. Args: num_batch_threads: Number of scheduling threads for processing batches of work. Determines the number of batches processed in parallel. max_batch_size: Batch sizes will never be bigger than this. batch_timeout_micros: Maximum number of microseconds to wait before outputting an incomplete batch. allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does nothing. Otherwise, supplies a list of batch sizes, causing the op to pad batches up to one of those sizes. The entries must increase monotonically, and the final entry must equal max_batch_size. max_enqueued_batches: The maximum depth of the batch queue. Defaults to 100. Returns: An BatchConfig instance. """ return super(BatchConfig, cls).__new__( cls, num_batch_threads=num_batch_threads, max_batch_size=max_batch_size, batch_timeout_micros=batch_timeout_micros, allowed_batch_sizes=allowed_batch_sizes, max_enqueued_batches=max_enqueued_batches) @estimator_export(v1=['estimator.tpu.TPUEstimator']) class TPUEstimator(estimator_lib.Estimator): """Estimator with TPU support. TPUEstimator also supports training on CPU and GPU. You don't need to define a separate `tf.estimator.Estimator`. TPUEstimator handles many of the details of running on TPU devices, such as replicating inputs and models for each core, and returning to host periodically to run hooks. TPUEstimator transforms a global batch size in params to a per-shard batch size when calling the `input_fn` and `model_fn`. Users should specify global batch size in constructor, and then get the batch size for each shard in `input_fn` and `model_fn` by `params['batch_size']`. - For training, `model_fn` gets per-core batch size; `input_fn` may get per-core or per-host batch size depending on `per_host_input_for_training` in `TPUConfig` (See docstring for TPUConfig for details). - For evaluation and prediction, `model_fn` gets per-core batch size and `input_fn` get per-host batch size. Evaluation ========== `model_fn` should return `TPUEstimatorSpec`, which expects the `eval_metrics` for TPU evaluation. If eval_on_tpu is False, the evaluation will execute on CPU or GPU; in this case the following discussion on TPU evaluation does not apply. `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`, where `tensors` could be a list of any nested structure of `Tensor`s (See `TPUEstimatorSpec` for details). `metric_fn` takes the `tensors` and returns a dict from metric string name to the result of calling a metric function, namely a `(metric_tensor, update_op)` tuple. One can set `use_tpu` to `False` for testing. All training, evaluation, and predict will be executed on CPU. `input_fn` and `model_fn` will receive `train_batch_size` or `eval_batch_size` unmodified as `params['batch_size']`. Current limitations: -------------------- 1. TPU evaluation only works on a single host (one TPU worker) except BROADCAST mode. 2. `input_fn` for evaluation should **NOT** raise an end-of-input exception (`OutOfRangeError` or `StopIteration`). And all evaluation steps and all batches should have the same size. Example (MNIST): ---------------- ``` # The metric Fn which runs on CPU. def metric_fn(labels, logits): predictions = tf.argmax(logits, 1) return { 'accuracy': tf.compat.v1.metrics.precision( labels=labels, predictions=predictions), } # Your model Fn which runs on TPU (eval_metrics is list in this example) def model_fn(features, labels, mode, config, params): ... logits = ... if mode = tf.estimator.ModeKeys.EVAL: return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=loss, eval_metrics=(metric_fn, [labels, logits])) # or specify the eval_metrics tensors as dict. def model_fn(features, labels, mode, config, params): ... final_layer_output = ... if mode = tf.estimator.ModeKeys.EVAL: return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=loss, eval_metrics=(metric_fn, { 'labels': labels, 'logits': final_layer_output, })) ``` Prediction ========== Prediction on TPU is an experimental feature to support large batch inference. It is not designed for latency-critical system. In addition, due to some usability issues, for prediction with small dataset, CPU `.predict`, i.e., creating a new `TPUEstimator` instance with `use_tpu=False`, might be more convenient. Note: In contrast to TPU training/evaluation, the `input_fn` for prediction *should* raise an end-of-input exception (`OutOfRangeError` or `StopIteration`), which serves as the stopping signal to `TPUEstimator`. To be precise, the ops created by `input_fn` produce one batch of the data. The `predict()` API processes one batch at a time. When reaching the end of the data source, an end-of-input exception should be raised by one of these operations. The user usually does not need to do this manually. As long as the dataset is not repeated forever, the `tf.data` API will raise an end-of-input exception automatically after the last batch has been produced. Note: Estimator.predict returns a Python generator. Please consume all the data from the generator so that TPUEstimator can shutdown the TPU system properly for user. Current limitations: -------------------- 1. TPU prediction only works on a single host (one TPU worker). 2. `input_fn` must return a `Dataset` instance rather than `features`. In fact, .train() and .evaluate() also support Dataset as return value. Example (MNIST): ---------------- ``` height = 32 width = 32 total_examples = 100 def predict_input_fn(params): batch_size = params['batch_size'] images = tf.random.uniform( [total_examples, height, width, 3], minval=-1, maxval=1) dataset = tf.data.Dataset.from_tensor_slices(images) dataset = dataset.map(lambda images: {'image': images}) dataset = dataset.batch(batch_size) return dataset def model_fn(features, labels, params, mode): # Generate predictions, called 'output', from features['image'] if mode == tf.estimator.ModeKeys.PREDICT: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ 'predictions': output, 'is_padding': features['is_padding'] }) tpu_est = TPUEstimator( model_fn=model_fn, ..., predict_batch_size=16) # Fully consume the generator so that TPUEstimator can shutdown the TPU # system. for item in tpu_est.predict(input_fn=input_fn): # Filter out item if the `is_padding` is 1. # Process the 'predictions' ``` Exporting ========= `export_saved_model` exports 2 metagraphs, one with `saved_model.SERVING`, and another with `saved_model.SERVING` and `saved_model.TPU` tags. At serving time, these tags are used to select the appropriate metagraph to load. Before running the graph on TPU, the TPU system needs to be initialized. If TensorFlow Serving model-server is used, this is done automatically. If not, please use `session.run(tpu.initialize_system())`. There are two versions of the API: 1 or 2. In V1, the exported CPU graph is `model_fn` as it is. The exported TPU graph wraps `tpu.rewrite()` and `TPUPartitionedCallOp` around `model_fn` so `model_fn` is on TPU by default. To place ops on CPU, `tpu_replication.outside_compilation(host_call, logits)` can be used. Example: ---------------- ``` def model_fn(features, labels, mode, config, params): ... logits = ... export_outputs = { 'logits': export_output_lib.PredictOutput( {'logits': logits}) } def host_call(logits): class_ids = math_ops.argmax(logits) classes = string_ops.as_string(class_ids) export_outputs['classes'] = export_output_lib.ClassificationOutput(classes=classes) tpu_replication.outside_compilation(host_call, logits) ... ``` In V2, `export_saved_model()` sets up `params['use_tpu']` flag to let the user know if the code is exporting to TPU (or not). When `params['use_tpu']` is `True`, users need to call `tpu.rewrite()`, `TPUPartitionedCallOp` and/or `batch_function()`. TIP: V2 is recommended as it is more flexible (eg: batching, etc). @compatibility(TF2) TPU Estimator manages its own TensorFlow graph and session, so it is not compatible with TF2 behaviors. We recommend that you migrate to the newer `tf.distribute.TPUStrategy`. See the [TPU guide](https://www.tensorflow.org/guide/tpu) for details. @end_compatibility """ def __init__(self, model_fn=None, model_dir=None, config=None, params=None, use_tpu=True, train_batch_size=None, eval_batch_size=None, predict_batch_size=None, batch_axis=None, eval_on_tpu=True, export_to_tpu=True, export_to_cpu=True, warm_start_from=None, embedding_config_spec=None, export_saved_model_api_version=ExportSavedModelApiVersion.V1): """Constructs an `TPUEstimator` instance. Args: model_fn: Model function as required by `Estimator` which returns EstimatorSpec or TPUEstimatorSpec. `training_hooks`, 'evaluation_hooks', and `prediction_hooks` must not capure any TPU Tensor inside the model_fn. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. If `None`, the model_dir in `config` will be used if set. If both are set, they must be same. If both are `None`, a temporary directory will be used. config: An `tpu_config.RunConfig` configuration object. Cannot be `None`. params: An optional `dict` of hyper parameters that will be passed into `input_fn` and `model_fn`. Keys are names of parameters, values are basic python types. There are reserved keys for `TPUEstimator`, including 'batch_size'. use_tpu: A bool indicating whether TPU support is enabled. Currently, - TPU training and evaluation respect this bit, but eval_on_tpu can override execution of eval. See below. train_batch_size: An int representing the global training batch size. TPUEstimator transforms this global batch size to a per-shard batch size, as params['batch_size'], when calling `input_fn` and `model_fn`. Cannot be `None` if `use_tpu` is `True`. Must be divisible by total number of replicas. eval_batch_size: An int representing evaluation batch size. Must be divisible by total number of replicas. predict_batch_size: An int representing the prediction batch size. Must be divisible by total number of replicas. batch_axis: A python tuple of int values describing how each tensor produced by the Estimator `input_fn` should be split across the TPU compute shards. For example, if your input_fn produced (images, labels) where the images tensor is in `HWCN` format, your shard dimensions would be [3, 0], where 3 corresponds to the `N` dimension of your images Tensor, and 0 corresponds to the dimension along which to split the labels to match up with the corresponding images. If None is supplied, and per_host_input_for_training is True, batches will be sharded based on the major dimension. If tpu_config.per_host_input_for_training is False or `PER_HOST_V2`, batch_axis is ignored. eval_on_tpu: If False, evaluation runs on CPU or GPU. In this case, the model_fn must return `EstimatorSpec` when called with `mode` as `EVAL`. export_to_tpu: If True, `export_saved_model()` exports a metagraph for serving on TPU. Note that unsupported export modes such as EVAL will be ignored. For those modes, only a CPU model will be exported. Currently, export_to_tpu only supports PREDICT. export_to_cpu: If True, `export_saved_model()` exports a metagraph for serving on CPU. warm_start_from: Optional string filepath to a checkpoint or SavedModel to warm-start from, or a `tf.estimator.WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all variables are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. embedding_config_spec: Optional EmbeddingConfigSpec instance to support using TPU embedding. export_saved_model_api_version: an integer: 1 or 2. 1 corresponds to V1, 2 corresponds to V2. (Defaults to V1). With V1, `export_saved_model()` adds rewrite() and TPUPartitionedCallOp() for user; while in v2, user is expected to add rewrite(), TPUPartitionedCallOp() etc in their model_fn. Raises: ValueError: `params` has reserved keys already. """ if config is None or not isinstance(config, tpu_config.RunConfig): raise ValueError( '`config` must be provided with type `tpu_config.RunConfig`') if params is not None and any(k in params for k in _RESERVED_PARAMS_KEYS): raise ValueError('{} are reserved keys but existed in params {}.'.format( _RESERVED_PARAMS_KEYS, params)) if use_tpu: # Perform some very basic validations. More validations will be found in # _InternalTPUContext. if train_batch_size is None: raise ValueError('`train_batch_size` cannot be `None`') util_lib.check_positive_integer(train_batch_size, 'train_batch_size') if (config.tpu_config.per_host_input_for_training is tpu_config.InputPipelineConfig.PER_SHARD_V1 and config.tpu_config.num_cores_per_replica): raise ValueError( 'Model parallelism only supports per host input for training. ' 'Please adjust TPURunconfig.per_host_input_for_training.') if eval_batch_size is not None: util_lib.check_positive_integer(eval_batch_size, 'eval_batch_size') if predict_batch_size is not None: util_lib.check_positive_integer(predict_batch_size, 'predict_batch_size') if embedding_config_spec: if (config.tpu_config.per_host_input_for_training not in ( tpu_config.InputPipelineConfig.PER_HOST_V1, tpu_config.InputPipelineConfig.PER_HOST_V2)): raise ValueError('Only PER_HOST_V1 and PER_HOST_V2 is supported when ' 'using TPU Embedding; got {}.'.format( config.tpu_config.per_host_input_for_training)) self._embedding_from_feature_columns = ( embedding_config_spec.feature_columns is not None) if (not (use_tpu and eval_on_tpu) and embedding_config_spec and embedding_config_spec.partition_strategy == 'mod'): raise ValueError('Mod sharding of embedding tables not supported on ' 'CPU.') _tpu_estimator_gauge.get_cell().set(True) # Verifies the model_fn signature according to Estimator framework. estimator_lib._verify_model_fn_args(model_fn, params) # pylint: disable=protected-access # We cannot store config and params in this constructor as parent # constructor might change them, such as assigning a temp dir for # config.model_dir. model_function = self._augment_model_fn(model_fn, batch_axis) # Overwrite log_step_count_steps to disable TensorLoggingHook and # StepCounterHook from being created in Estimator. TPUEstimator already # added equivalent hooks in _augment_model_fn above. self._log_every_n_steps = config.log_step_count_steps config = config.replace(log_step_count_steps=None) # Passing non-None params as wrapped model_fn has it. params = params or {} super(TPUEstimator, self).__init__( model_fn=model_function, model_dir=model_dir, config=config, params=params, warm_start_from=warm_start_from) self._iterations_per_training_loop = util_lib.parse_iterations_per_loop( self._config.tpu_config.iterations_per_loop) # In absence of an explicit `log_every_n_secs` config, if the # `iterations_per_loop` value is specified as time in seconds, enable # logging every n secs based on the `iterations_per_loop` value. A trade-off # avoiding API change on the current release. # TODO(henrytan): add `log_every_n_secs` to RunConfig. if self._iterations_per_training_loop.unit == 'seconds': self._log_every_n_secs = self._iterations_per_training_loop.value self._log_every_n_steps = None elif self._iterations_per_training_loop.unit == 'count': if self._log_every_n_steps is not None: # Each session.run() lasts for iterations_per_loop. We can't log # in-between a session.run(), and we can only log after the # `iterations_per_loop` steps, so we can only approximate. If a user # requests to log every N steps, we actually want to roughly log every # N / `iterations_per_loop` steps to match the original intention. self._log_every_n_steps = ( int( math.ceil( float(self._log_every_n_steps) / self._iterations_per_training_loop.value))) self._log_every_n_secs = None else: assert False, ('Invalid TPUConfig `iterations_per_loop` value. ' 'Indicates a bug in `iterations_per_loop` ' 'parsing.') # All properties passed to _InternalTPUContext are immutable. # pylint: disable=protected-access self._ctx = tpu_context._get_tpu_context(self._config, train_batch_size, eval_batch_size, predict_batch_size, use_tpu, eval_on_tpu, embedding_config_spec) self._export_to_cpu = export_to_cpu self._export_to_tpu = export_to_tpu if not (isinstance(export_saved_model_api_version, ExportSavedModelApiVersion) or export_saved_model_api_version == 1 or export_saved_model_api_version == 2): raise ValueError('export_saved_model_api_version should be 1 or 2; ' 'got {}.'.format( export_saved_model_api_version)) self._export_saved_model_api_version = export_saved_model_api_version self._is_input_fn_invoked = None self._rendezvous = {} def _add_meta_graph_for_mode(self, builder, input_receiver_fn_map, checkpoint_path, save_variables=True, mode=model_fn_lib.ModeKeys.PREDICT, export_tags=None, check_variables=True, strip_default_attrs=True): if self._export_to_tpu and mode != model_fn_lib.ModeKeys.PREDICT: tf.compat.v1.logging.warn( 'TPUEstimator only handles mode PREDICT for exporting ' 'when `export_to_tpu` is `True`; Mode {} will be ignored ' 'for TPU.'.format(mode)) if not self._export_to_cpu and not self._export_to_tpu: raise ValueError('One of export_to_cpu and export_to_tpu must be true.') if self._export_to_cpu: (super(TPUEstimator, self)._add_meta_graph_for_mode( builder, input_receiver_fn_map, checkpoint_path, save_variables, mode=mode, export_tags=export_tags, check_variables=check_variables, strip_default_attrs=strip_default_attrs)) if self._export_to_tpu and mode == model_fn_lib.ModeKeys.PREDICT: input_receiver_fn_map = { _INFERENCE_ON_TPU_MODE: input_receiver_fn_map[mode] } export_tags = [tf.saved_model.SERVING, tf.saved_model.TPU] mode = _INFERENCE_ON_TPU_MODE # See b/110052256 for why `check_variables` is `False`. if not self._export_to_cpu: check_variables = save_variables = True else: check_variables = save_variables = False (super(TPUEstimator, self)._add_meta_graph_for_mode( builder, input_receiver_fn_map, checkpoint_path, save_variables=save_variables, mode=mode, export_tags=export_tags, check_variables=check_variables, strip_default_attrs=strip_default_attrs)) def _call_model_fn(self, features, labels, mode, config): if mode == _INFERENCE_ON_TPU_MODE: context = tpu._TPUInferenceContext('tpu_inference', check_ops=False) try: context.Enter() if ( (self._export_saved_model_api_version == ExportSavedModelApiVersion.V1) or self._export_saved_model_api_version == 1): result = self._call_model_fn_for_inference(features, labels, mode, config) else: result = super(TPUEstimator, self)._call_model_fn(features, labels, mode, config) finally: context.Exit() return result else: return super(TPUEstimator, self)._call_model_fn(features, labels, mode, config) def _call_model_fn_for_inference(self, features, labels, mode, config): """Wraps `_call_model_fn` for `export_saved_model`.""" if mode != _INFERENCE_ON_TPU_MODE: raise ValueError('mode must be {}; ' 'got {}.'.format(_INFERENCE_ON_TPU_MODE, mode)) return model_fn_inference_on_tpu( self._model_fn, features, labels, config, self._params, batch_config=None) def _create_global_step(self, graph): """Creates a global step suitable for TPUs. Args: graph: The graph in which to create the global step. Returns: A global step `Tensor`. Raises: ValueError: if the global step tensor is already defined. """ return _create_global_step(graph) def _convert_train_steps_to_hooks(self, steps, max_steps): with self._ctx.with_mode(model_fn_lib.ModeKeys.TRAIN) as ctx: if ctx.is_running_on_cpu(): return super(TPUEstimator, self)._convert_train_steps_to_hooks(steps, max_steps) # On TPU. if steps is None and max_steps is None: raise ValueError( 'For TPU training, one of `steps` or `max_steps` must be set. ' 'Cannot be both `None`.') # Estimator.train has explicit positiveness check. if steps is not None: util_lib.check_positive_integer(steps, 'Train steps') if max_steps is not None: util_lib.check_positive_integer(max_steps, 'Train max_steps') return [ _TPUStopAtStepHook(self._iterations_per_training_loop, steps, max_steps) ] def _convert_eval_steps_to_hooks(self, steps): with self._ctx.with_mode(model_fn_lib.ModeKeys.EVAL) as ctx: if ctx.is_running_on_cpu(): return super(TPUEstimator, self)._convert_eval_steps_to_hooks(steps) if steps is None: raise ValueError('Evaluate `steps` must be set on TPU. Cannot be `None`.') util_lib.check_positive_integer(steps, 'Eval steps') return [ evaluation._StopAfterNEvalsHook( # pylint: disable=protected-access num_evals=steps), _SetEvalIterationsHook(steps) ] def _call_input_fn(self, input_fn, mode, input_context=None): """Calls the input function. Args: input_fn: The input function. mode: ModeKeys input_context: Optional instance of `tf.distribute.InputContext`. Returns: In TPU mode, returns an input_fn to be called later in model_fn. Otherwise, calls the input_fn and returns either fatures or (features, labels). Raises: ValueError: if input_fn takes invalid arguments or does not have `params`. """ input_fn_args = function_utils.fn_args(input_fn) config = self.config # a deep copy. kwargs = {} if 'params' in input_fn_args: kwargs['params'] = self.params # a deep copy. else: raise ValueError('input_fn ({}) does not include params argument, ' 'required by TPUEstimator to pass batch size as ' 'params["batch_size"]'.format(input_fn)) if 'config' in input_fn_args: kwargs['config'] = config if 'mode' in input_fn_args: kwargs['mode'] = mode if 'input_context' in input_fn_args: kwargs['input_context'] = input_context # Records the fact input_fn has been invoked. self._is_input_fn_invoked = True with self._ctx.with_mode(mode) as ctx: if (ctx.is_running_on_cpu() and ctx.is_input_slice_broadcast_to_all_cores()): raise ValueError('Invalid TPUConfig `eval_training_input_configuration`' ' value. SLICED mode only works on use_tpu = True.') # Setting the batch size in params first. This helps user to have same # input_fn for use_tpu=True/False. batch_size_for_input_fn = ctx.batch_size_for_input_fn if batch_size_for_input_fn is not None: _add_item_to_params(kwargs['params'], _BATCH_SIZE_KEY, batch_size_for_input_fn) # For export_saved_model, input_fn is never passed to Estimator. So, # `is_export_mode` must be False. if ctx.is_running_on_cpu(is_export_mode=False): with tf.compat.v1.device('/device:CPU:0'): return input_fn(**kwargs) # For TPU computation, input_fn should be invoked in a tf.while_loop for # performance. While constructing the tf.while_loop, the structure of # inputs returned by the `input_fn` needs to be recorded. The structure # includes whether features or labels is dict or single Tensor, dict keys, # tensor shapes, and dtypes. The recorded structure is used to create the # infeed dequeue ops, which must be wrapped and passed as a Fn, called # inside the TPU computation, as the TPU computation is wrapped inside a # tf.while_loop also. So, we either pass input_fn to model_fn or pass # dequeue_fn to model_fn. Here, `input_fn` is passed directly as # `features` in `model_fn` signature. def _input_fn(ctx): _add_item_to_params(kwargs['params'], _CTX_KEY, ctx) return input_fn(**kwargs) return _input_fn def _validate_features_in_predict_input(self, result): """Skip the validation. For TPUEstimator, we do not need to check the result type. `_InputPipeline` has stronger check. Parent class's check generates confusing warning msg. Args: result: `features` returned by input_fn. """ pass def train(self, input_fn, hooks=None, steps=None, max_steps=None, saving_listeners=None): rendezvous = error_handling.ErrorRendezvous(num_sources=3) self._rendezvous[model_fn_lib.ModeKeys.TRAIN] = rendezvous try: return super(TPUEstimator, self).train( input_fn=input_fn, hooks=hooks, steps=steps, max_steps=max_steps, saving_listeners=saving_listeners) except Exception: # pylint: disable=broad-except rendezvous.record_error('training_loop', sys.exc_info()) finally: rendezvous.record_done('training_loop') rendezvous.raise_errors() def evaluate(self, input_fn, steps=None, hooks=None, checkpoint_path=None, name=None): rendezvous = error_handling.ErrorRendezvous(num_sources=3) self._rendezvous[model_fn_lib.ModeKeys.EVAL] = rendezvous try: return super(TPUEstimator, self).evaluate( input_fn, steps=steps, hooks=hooks, checkpoint_path=checkpoint_path, name=name) except Exception: # pylint: disable=broad-except rendezvous.record_error('evaluation_loop', sys.exc_info()) finally: rendezvous.record_done('evaluation_loop') rendezvous.raise_errors() def predict(self, input_fn, predict_keys=None, hooks=None, checkpoint_path=None, yield_single_examples=True): rendezvous = error_handling.ErrorRendezvous(num_sources=3) self._rendezvous[model_fn_lib.ModeKeys.PREDICT] = rendezvous try: for result in super(TPUEstimator, self).predict( input_fn=input_fn, predict_keys=predict_keys, hooks=hooks, checkpoint_path=checkpoint_path, yield_single_examples=yield_single_examples): yield result except Exception: # pylint: disable=broad-except rendezvous.record_error('prediction_loop', sys.exc_info()) finally: rendezvous.record_done('prediction_loop') rendezvous.raise_errors() rendezvous.record_done('prediction_loop') rendezvous.raise_errors() def _augment_model_fn(self, model_fn, batch_axis): """Returns a new model_fn, which wraps the TPU support.""" def _model_fn(features, labels, mode, config, params): """A Estimator `model_fn` for TPUEstimator.""" # `input_fn` is called in `train()`, `evaluate()`, and `predict()`, # but not in `export_saved_model()`. if self._is_input_fn_invoked: is_export_mode = False else: is_export_mode = True # Clear the bit. self._is_input_fn_invoked = None if is_export_mode: if mode == _INFERENCE_ON_TPU_MODE: _add_item_to_params(params, _USE_TPU_KEY, True) mode = model_fn_lib.ModeKeys.PREDICT else: _add_item_to_params(params, _USE_TPU_KEY, False) with self._ctx.with_mode(mode) as ctx: model_fn_wrapper = _ModelFnWrapper(model_fn, config, params, ctx) # examples_hook is added to training_hooks for both CPU and TPU # execution. if (self._log_every_n_steps is not None or self._log_every_n_secs is not None): examples_hook = ExamplesPerSecondHook( ctx.global_batch_size, # pylint:disable=g-long-ternary output_dir=(self.model_dir if not config or config.save_summary_steps else None), # pylint:enable=g-long-ternary every_n_steps=self._log_every_n_steps, every_n_secs=self._log_every_n_secs) if ctx.is_running_on_cpu(is_export_mode=is_export_mode): tf.compat.v1.logging.info('Running %s on CPU/GPU', mode) estimator_spec = model_fn_wrapper.call_without_tpu( features, labels, is_export_mode=is_export_mode) if (self._log_every_n_steps is not None or self._log_every_n_secs is not None): estimator_spec = estimator_spec._replace( training_hooks=estimator_spec.training_hooks + (examples_hook,)) return estimator_spec assert labels is None, '`labels` passed to `model_fn` must be `None`.' # TPUEstimator._call_input_fn passes `input_fn` as features to here. assert callable(features), '`input_fn` is not callable.' input_fn = features tpu_init_ops = [] if ctx.embedding_config and mode == model_fn_lib.ModeKeys.TRAIN: dummy_table_variables, dummy_table_variables_init = ( tpu_embedding_gradient.create_dummy_table_variables( ctx.embedding_config.tpu_embedding)) ctx.embedding_config.dummy_table_variables = dummy_table_variables tpu_init_ops.append(dummy_table_variables_init) input_holders = _InputPipeline(input_fn, batch_axis, ctx) enqueue_ops, dequeue_fn, input_hooks, run_infeed_loop_on_coordinator = ( input_holders.generate_infeed_enqueue_ops_and_dequeue_fn()) graph = tf.compat.v1.get_default_graph() for enqueue_op in enqueue_ops: if isinstance(enqueue_op, list): graph.get_collection_ref(_TPU_ENQUEUE_OPS).extend(enqueue_op) else: graph.add_to_collection(_TPU_ENQUEUE_OPS, enqueue_op) if mode == model_fn_lib.ModeKeys.TRAIN: compile_op, loss, host_call, scaffold_fn, training_hooks = ( _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn)) has_saver_hook = training_hooks and any( isinstance(hook, tf.compat.v1.train.CheckpointSaverHook) for hook in training_hooks) if ctx.embedding_config: g = tf.compat.v1.get_default_graph() table_to_config_dict = ( ctx.embedding_config.tpu_embedding.table_to_config_dict) optimization_parameters = ( ctx.embedding_config.tpu_embedding.optimization_parameters) if self._embedding_from_feature_columns: embedding_variable_name_by_table, slot_variable_names_by_table = ( _tpu_estimator_embedding.get_full_variable_names( g, table_to_config_dict, optimization_parameters)) else: embedding_variable_name_by_table = None slot_variable_names_by_table = None embedding_variables_and_ops = ( ctx.embedding_config.tpu_embedding.create_variables_and_ops( embedding_variable_name_by_table, slot_variable_names_by_table)) tpu_init_ops.extend(embedding_variables_and_ops.load_ops()) # scaffold_fn must be called after variables for TPU embedding has # been created on CPU, as user might reinitialize those from some # checkpoint within scaffold_fn. scaffold = _get_scaffold(scaffold_fn) host_ops = host_call.create_tpu_hostcall() shutdown_hooks = [] shutdown_mode = os.environ.get('TF_TPU_GRACEFUL_SHUTDOWN_MODE', 'reset_computation') if shutdown_mode: if shutdown_mode == 'shutdown_worker': finalizer_hooks = [ session_support.ShutdownLameWorkers(), ] elif shutdown_mode == 'shutdown_all_workers': finalizer_hooks = [ session_support.ShutdownAllWorkers(), ] elif shutdown_mode == 'reset_computation': finalizer_hooks = [ session_support.ResetComputation(), ] elif not shutdown_mode: finalizer_hooks = [] else: raise ValueError('Unknown TF_TPU_GRACEFUL_SHUTDOWN_MODE "%s"' % shutdown_mode) if finalizer_hooks: if has_saver_hook: saver = _NotSaver( 'No save on shutdown when there are user-defined ' 'CheckpointSaverHooks') else: saver = None # Yes automatic save on shutdown. shutdown_hooks.append( session_support.GracefulShutdownHook( checkpoint_prefix=self.model_dir + '/model.ckpt', on_shutdown_hooks=finalizer_hooks, saver=saver)) with tf.control_dependencies([loss]): global_step = tf.identity(tf.compat.v1.train.get_global_step()) hooks = input_hooks + shutdown_hooks if ctx.feed_hook is not None: tf.compat.v1.logging.info( 'Use user implemented tpu infeed outfeed session hook class.') infeed_outfeed_session_hook_class = ctx.feed_hook else: infeed_outfeed_session_hook_class = TPUInfeedOutfeedSessionHook hooks.extend([ infeed_outfeed_session_hook_class( ctx, enqueue_ops, host_ops, tpu_compile_op=compile_op, run_infeed_loop_on_coordinator=( run_infeed_loop_on_coordinator), rendezvous=self._rendezvous[mode], master=self._config.master, session_config=self._session_config, tpu_init_ops=tpu_init_ops, outfeed_every_n_steps=self._config.tpu_config .experimental_host_call_every_n_steps), InstallSignalHandlerHook() ]) if _check_add_preemption_hook(self._config.cluster): hooks.extend( [preempted_hook.CloudTPUPreemptedHook(self._config.cluster)]) if (self._log_every_n_steps is not None or self._log_every_n_secs is not None): if self._iterations_per_training_loop.unit == 'count': examples_hook._set_steps_per_run( # pylint: disable=protected-access self._iterations_per_training_loop.value) hooks.append( tf.compat.v1.train.LoggingTensorHook( { 'loss': tf.identity(loss), 'step': global_step, }, every_n_iter=self._log_every_n_steps, every_n_secs=self._log_every_n_secs)) hooks.append(examples_hook) if training_hooks: hooks.extend(training_hooks) chief_hooks = [] if (not has_saver_hook and (self._config.save_checkpoints_secs or self._config.save_checkpoints_steps)): checkpoint_hook = tf.compat.v1.train.CheckpointSaverHook( self.model_dir, save_secs=self._config.save_checkpoints_secs, save_steps=self._config.save_checkpoints_steps, scaffold=scaffold, save_graph_def=self._config.checkpoint_save_graph_def) if self._iterations_per_training_loop.unit == 'count': checkpoint_hook._set_steps_per_run( # pylint: disable=protected-access self._iterations_per_training_loop.value) chief_hooks.append(checkpoint_hook) else: tf.compat.v1.logging.info('Bypassing TPUEstimator hook') tf.compat.v1.summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss) with tf.control_dependencies([loss]): update_ops = _sync_variables_ops(ctx) if ctx.embedding_config: update_ops.extend(embedding_variables_and_ops.retrieve_ops()) # Validate the TPU training graph to catch basic errors _validate_tpu_training_graph(ctx) train_op = tf.group(*update_ops) graph.add_to_collection(_TPU_TRAIN_OP, train_op) return model_fn_lib.EstimatorSpec( mode, loss=loss, training_chief_hooks=chief_hooks, training_hooks=hooks, train_op=train_op, scaffold=scaffold) if mode == model_fn_lib.ModeKeys.EVAL: compile_op, total_loss, host_calls, scaffold_fn, eval_hooks = ( _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn)) if ctx.embedding_config: g = tf.compat.v1.get_default_graph() table_to_config_dict = ( ctx.embedding_config.tpu_embedding.table_to_config_dict) if self._embedding_from_feature_columns: embedding_variable_name_by_table, _ = ( _tpu_estimator_embedding.get_full_variable_names( g, table_to_config_dict)) else: embedding_variable_name_by_table = None embedding_variables_and_ops = ( ctx.embedding_config.tpu_embedding.create_variables_and_ops( embedding_variable_name_by_table)) tpu_init_ops.extend(embedding_variables_and_ops.load_ops()) # scaffold_fn must be called after variables for TPU embedding has # been created on CPU, as user might reinitialize those from some # checkpoint within scaffold_fn. scaffold = _get_scaffold(scaffold_fn) iterations_per_loop_var = _create_or_get_iterations_per_loop() mean_loss = tf.compat.v1.div( total_loss, tf.cast(iterations_per_loop_var, dtype=total_loss.dtype)) with tf.control_dependencies([mean_loss]): # After TPU evaluation computation is done (the mean_loss tensor), # reads all variables back from TPU and updates the eval step # counter properly internal_ops_to_run = _sync_variables_ops(ctx) internal_ops_to_run.append( _increase_eval_step_op(iterations_per_loop_var)) host_call_ret = host_calls.create_tpu_hostcall() eval_metric_ops = {} eval_update_ops = [] eval_metrics = host_call_ret.get('eval_metrics', {}) if eval_metrics: # Creates a dummy metric update_op for all metrics. Estimator # expects all metrics in `eval_metric_ops` have update_op and calls # them one by one. The real metric update_ops are invoked in a # separated thread. So, here give Estimator the dummy op for all # metrics. with tf.control_dependencies(internal_ops_to_run): dummy_update_op = tf.no_op() for k, v in eval_metrics.items(): eval_metric_ops[k] = (v[0], dummy_update_op) eval_update_ops.append(v[1]) else: # If no eval metrics are passed, create an identity node for the # loss and add `internal_ops_to_run` to its dependencies. So # `internal_ops_to_run` can be executed. with tf.control_dependencies(internal_ops_to_run): mean_loss = tf.identity(mean_loss) if 'host_call' not in host_call_ret: host_ops = [] else: host_ops = host_call_ret['host_call'] hooks = [ TPUInfeedOutfeedSessionHook( ctx, enqueue_ops, eval_update_ops + host_ops, tpu_compile_op=compile_op, run_infeed_loop_on_coordinator=( run_infeed_loop_on_coordinator), rendezvous=self._rendezvous[mode], master=self._config.evaluation_master, session_config=self._session_config, tpu_init_ops=tpu_init_ops) ] + input_hooks if _check_add_preemption_hook(self._config.cluster): hooks.extend( [preempted_hook.CloudTPUPreemptedHook(self._config.cluster)]) if eval_hooks: hooks.extend(eval_hooks) return model_fn_lib.EstimatorSpec( mode, loss=mean_loss, evaluation_hooks=hooks, eval_metric_ops=eval_metric_ops, scaffold=scaffold) # Predict assert mode == model_fn_lib.ModeKeys.PREDICT (compile_op, dummy_predict_op, host_calls, scaffold_fn, prediction_hooks) = _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn) scaffold = _get_scaffold(scaffold_fn) with tf.control_dependencies([dummy_predict_op]): internal_ops_to_run = _sync_variables_ops(ctx) with tf.control_dependencies(internal_ops_to_run): dummy_predict_op = tf.no_op() # In train and evaluation, the main TPU program is passed to monitored # training session to run. Infeed enqueue and outfeed dequeue are # executed in side threads. This is not the configuration for # prediction mode. # # For prediction, the Estimator executes the EstimatorSpec.predictions # directly and yield the element (via generator) to call site. So, the # outfeed based prediction must be passed to MonitoredSession directly. # Other parts of the TPU execution are organized as follows. # # 1. All outfeed based Tensors must be grouped with predictions Tensors # to form a single invocation. This avoid the issue we might trigger # multiple outfeeds incorrectly. To achieve this, `host_call` is # placed in control_dependencies of `stopping_signals`, and # `stopping_signals` is passed into _StoppingPredictHook, which sets # the `stopping_signals` as SessionRunArgs. MonitoredSession merges # all SessionRunArgs with the fetch in session.run together. # # 2. The TPU program (dummy_predict_op) and enqueue_ops (infeed Enqueue) # are grouped together. They will be launched once and only once in # side threads and they quit naturally according to the SAME stopping # condition. enqueue_ops.append(dummy_predict_op) host_call_ret = host_calls.create_tpu_hostcall() if 'host_call' not in host_call_ret: host_ops = [] else: host_ops = host_call_ret['host_call'] predictions = host_call_ret['predictions'] _verify_cross_hosts_transfer_size( predictions, message=( 'The estimated size for TPUEstimatorSpec.predictions is too ' 'large.')) signals = host_call_ret['signals'] with tf.control_dependencies(host_ops): host_ops = [] # Empty, we do do not need it anymore. scalar_stopping_signal = _StopSignals.as_scalar_stopping_signal( signals) predictions = _PaddingSignals.slice_tensor_or_dict( predictions, signals) hooks = [ _StoppingPredictHook(scalar_stopping_signal), TPUInfeedOutfeedSessionHookForPrediction( ctx, enqueue_ops, host_ops, rendezvous=self._rendezvous[mode], tpu_compile_op=compile_op, master=self._config.master, session_config=self._session_config), ] + input_hooks if prediction_hooks: hooks.extend(prediction_hooks) return model_fn_lib.EstimatorSpec( mode, prediction_hooks=hooks, predictions=predictions, scaffold=scaffold) return _model_fn def _check_add_preemption_hook(cluster): return (tpu_cluster_resolver.is_running_in_gce() and cluster and isinstance( cluster, tf.distribute.cluster_resolver.TPUClusterResolver) and cluster._cloud_tpu_client.api_available()) def _export_output_to_tensors(export_output): """Get a list of `Tensors` used in `export_output`. Args: export_output: an `ExportOutput` object such as `ClassificationOutput`, `RegressionOutput`, or `PredictOutput`. Returns: a list of tensors used in export_output. Raises: ValueError: if `export_output` is not one of `ClassificationOutput`, `RegressionOutput`, or `PredictOutput`. """ if isinstance(export_output, export_output_lib.ClassificationOutput): return [export_output.scores, export_output.classes] elif isinstance(export_output, export_output_lib.RegressionOutput): return [export_output.value] elif isinstance(export_output, export_output_lib.PredictOutput): return list(export_output.outputs.values()) else: raise ValueError( '`export_output` must be have type `ClassificationOutput`, ' '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output)) def _clone_export_output_with_tensors(export_output, tensors): """Clones `export_output` but with new `tensors`. Args: export_output: an `ExportOutput` object such as `ClassificationOutput`, `RegressionOutput`, or `PredictOutput`. tensors: a list of `Tensors` used to construct a new `export_output`. Returns: A dict similar to `export_output` but with `tensors`. Raises: ValueError: if `export_output` is not one of `ClassificationOutput`, `RegressionOutput`, or `PredictOutput`. """ if isinstance(export_output, export_output_lib.ClassificationOutput): if len(tensors) != 2: raise ValueError('tensors must be of length 2; ' 'got {}.'.format(len(tensors))) return export_output_lib.ClassificationOutput(*tensors) elif isinstance(export_output, export_output_lib.RegressionOutput): if len(tensors) != 1: raise ValueError('tensors must be of length 1; ' 'got {}'.format(len(tensors))) return export_output_lib.RegressionOutput(*tensors) elif isinstance(export_output, export_output_lib.PredictOutput): return export_output_lib.PredictOutput( dict(zip(export_output.outputs.keys(), tensors))) else: raise ValueError( '`export_output` must be have type `ClassificationOutput`, ' '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output)) def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn): """Executes `model_fn_wrapper` multiple times on all TPU shards.""" iterations_per_loop_var = _create_or_get_iterations_per_loop() (single_tpu_eval_step, host_calls, captured_scaffold_fn, captured_eval_hooks ) = model_fn_wrapper.convert_to_single_tpu_eval_step(dequeue_fn) @tpu_function.on_device_training_loop def multi_tpu_eval_steps_on_single_shard(replica_id): # `tpu.split_compile_and_shard()` splits and passes input for each # replica as an array. As so, correctly reshape the input to be a # scalar. replica_id = tf.reshape(replica_id, []) with tpu_context._TPUEstimatorReplicaContext(replica_id): # pylint: disable=protected-access return training_loop.repeat(iterations_per_loop_var, single_tpu_eval_step, [_ZERO_LOSS]) # Add input that represents id for each replica in sync so that # _TPUEstimatorReplicaContext can be correctly entered during # replicated computation. replica_id_inputs = [] replica_id_inputs.append([tf.constant(i) for i in range(ctx.num_replicas)]) ( compile_op, loss, ) = tpu.split_compile_and_shard( multi_tpu_eval_steps_on_single_shard, inputs=replica_id_inputs, num_shards=ctx.num_replicas, outputs_from_all_shards=False, device_assignment=ctx.device_assignment) loss = loss[0] return (compile_op, loss, host_calls, captured_scaffold_fn, captured_eval_hooks.get()) def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn): """Executes `model_fn_wrapper` multiple times on all TPU shards.""" iterations_per_loop_var = _create_or_get_iterations_per_loop() (single_tpu_train_step, host_call, captured_scaffold_fn, captured_training_hooks) = ( model_fn_wrapper.convert_to_single_tpu_train_step(dequeue_fn)) @tpu_function.on_device_training_loop def multi_tpu_train_steps_on_single_shard(replica_id): # `tpu.split_compile_and_shard()` splits and passes input for each # replica as an array. As so, correctly reshape the input to be a # scalar. replica_id = tf.reshape(replica_id, []) with tpu_context._TPUEstimatorReplicaContext(replica_id): # pylint: disable=protected-access outputs = training_loop.while_loop( lambda i, loss: i < iterations_per_loop_var, lambda i, loss: [i + 1, single_tpu_train_step(i)], inputs=[0, _INITIAL_LOSS]) return outputs[1:] # Add input that represents id for each replica in sync so that # _TPUEstimatorReplicaContext can be correctly entered during # replicated computation. replica_id_inputs = [] replica_id_inputs.append([tf.constant(i) for i in range(ctx.num_replicas)]) (compile_op, loss) = tpu.split_compile_and_shard( multi_tpu_train_steps_on_single_shard, inputs=replica_id_inputs, num_shards=ctx.num_replicas, outputs_from_all_shards=False, device_assignment=ctx.device_assignment) loss = loss[0] return (compile_op, loss, host_call, captured_scaffold_fn, captured_training_hooks.get()) def _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn): """Executes `model_fn_wrapper` multiple times on all TPU shards.""" (single_tpu_predict_step, host_calls, captured_scaffold_fn, captured_predict_hooks ) = model_fn_wrapper.convert_to_single_tpu_predict_step(dequeue_fn) @tpu_function.on_device_training_loop def multi_tpu_predict_steps_on_single_shard(replica_id): # `tpu.split_compile_and_shard()` splits and passes input for each # replica as an array. As so, correctly reshape the input to be a # scalar. replica_id = tf.reshape(replica_id, []) with tpu_context._TPUEstimatorReplicaContext(replica_id): # pylint: disable=protected-access def cond(scalar_stopping_signal): return tf.math.logical_not( _StopSignals.should_stop(scalar_stopping_signal)) inputs = [_StopSignals.NON_STOPPING_SIGNAL] outputs = training_loop.while_loop( cond, single_tpu_predict_step, inputs=inputs, name=b'loop') return outputs # Add input that represents id for each replica in sync so that # _TPUEstimatorReplicaContext can be correctly entered during # replicated computation. replica_id_inputs = [] replica_id_inputs.append([tf.constant(i) for i in range(ctx.num_replicas)]) ( compile_op, dummy_predict_op, ) = tpu.split_compile_and_shard( multi_tpu_predict_steps_on_single_shard, inputs=replica_id_inputs, num_shards=ctx.num_replicas, outputs_from_all_shards=False, device_assignment=ctx.device_assignment) dummy_predict_op = dummy_predict_op[0] return (compile_op, dummy_predict_op, host_calls, captured_scaffold_fn, captured_predict_hooks.get()) def _wrap_computation_in_while_loop(device, op_fn): """Wraps the ops generated by `op_fn` in tf.while_loop.""" def computation(i): with tf.control_dependencies(op_fn()): return i + 1 iterations_per_loop_var = _create_or_get_iterations_per_loop() # By setting parallel_iterations=1, the parallel execution in while_loop is # basically turned off. with tf.compat.v1.device(device): iterations = tf.identity(iterations_per_loop_var) return tf.compat.v1.while_loop( lambda i: i < iterations, computation, [tf.constant(0)], parallel_iterations=1) def _wrap_computation_in_while_loop_with_stopping_signals(device, op_fn): """Wraps the ops generated by `op_fn` in tf.while_loop.""" def cond(scalar_stopping_signal): return tf.math.logical_not(_StopSignals.should_stop(scalar_stopping_signal)) def computation(unused_scalar_stopping_signal): return_value = op_fn() execute_ops = return_value['ops'] signals = return_value['signals'] with tf.control_dependencies(execute_ops): return _StopSignals.as_scalar_stopping_signal(signals) # By setting parallel_iterations=1, the parallel execution in while_loop is # basically turned off. with tf.compat.v1.device(device): return tf.compat.v1.while_loop( cond, computation, [_StopSignals.NON_STOPPING_SIGNAL], parallel_iterations=1) def _validate_tpu_training_graph(ctx): """Validate graph before running distributed training. Args: ctx: A `_InternalTPUContext` instance with mode. Raises: ValueError: If the graph seems invalid for running on device """ if control_flow_util.ENABLE_CONTROL_FLOW_V2: return # b/124241278 operations = tf.compat.v1.get_default_graph().get_operations() # Check if there is atleast one CrossReplicaSum operation in the graph # This should be introduced by using the CrossShardOptimizer wrapper cross_replica_sum_ops = [ o for o in operations if o.type == _CROSS_REPLICA_SUM_OP ] if not cross_replica_sum_ops and ctx.num_replicas > 1: raise ValueError( 'CrossShardOptimizer must be used for model training on TPUs.') class _CapturedObject(object): """A placeholder to capture an object. This is useful when we need to capture a Python object in the Tensorflow control flow body function and use it outside the control flow. """ def __init__(self): self._object = None self._captured = False def capture(self, o): if self._captured: raise RuntimeError( 'InternalError: Object can capture only once. Please file bug.') self._captured = True self._object = o def get(self): if not self._captured: raise RuntimeError( 'InternalError: Object is not captured properly before `get`. ' 'Please file bug.') return self._object def _get_scaffold(captured_scaffold_fn): """Retrieves the Scaffold from `captured_scaffold_fn`.""" with _CapturingContext(message='Inside scaffold_fn'): scaffold_fn = captured_scaffold_fn.get() if scaffold_fn: scaffold = scaffold_fn() if scaffold is None: raise ValueError( 'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed') else: scaffold = None if scaffold: wrapped_finalize = scaffold.finalize def _finalize(): with _CapturingContext('Inside Scaffold.finalize'): wrapped_finalize() scaffold.finalize = _finalize return scaffold class _CapturingContext(control_flow_ops.ControlFlowContext): """Tracks references to Tensors defined in TPU replication.""" def __init__(self, message): control_flow_ops.ControlFlowContext.__init__(self) self._message = message def to_control_flow_context_def(self, context_def, export_scope=None): # pylint: disable=useless-super-delegation # NOTE(slebedev): the method is required by `ControlFlowContext`. super(_CapturingContext, self).to_control_flow_context_def(context_def, export_scope) def AddOp(self, op): # pylint: disable=invalid-name for c in op.inputs: if tpu_replication._TPU_REPLICATE_ATTR in c.op.node_def.attr: # pylint: disable=protected-access raise ValueError('{}: Op {} depends on TPU computation {}, ' 'which is not allowed.'.format(self._message, op, c)) def AddValue(self, value): self.AddOp(value.op) return value def __enter__(self): # pylint: disable=protected-access self._g = tf.compat.v1.get_default_graph() self._old = self._g._get_control_flow_context() self._g._set_control_flow_context(self) # pylint: enable=protected-access def __exit__(self, _, __, ___): # pylint: disable=invalid-name self._g._set_control_flow_context(self._old) # pylint: disable=protected-access class _Inputs(object): """A data structure representing the input_fn returned values. This also supports the returned value from input_fn as `Dataset`. """ def __init__(self, features=None, labels=None, dataset=None, signals=None): if dataset is not None and (features is not None or labels is not None or signals is not None): raise RuntimeError('Internal Error: Either (features and labels) or ' 'dataset should be provided, not both. Please file ' 'bug') self._features = features self._labels = labels self._signals = signals self._dataset = dataset self._iterator = None @staticmethod def from_input_fn(return_values): """Returns an `_Inputs` instance according to `input_fn` return value.""" if isinstance(return_values, tf.compat.v2.data.Dataset): dataset = return_values return _Inputs(dataset=dataset) features, labels = _Inputs._parse_inputs(return_values) return _Inputs(features, labels) @staticmethod def _parse_inputs(return_values): if isinstance(return_values, tuple): features, labels = return_values else: features, labels = return_values, None return features, labels @property def is_dataset(self): """Returns True if the return value from input_fn is Dataset.""" return self._dataset is not None def dataset_initializer(self): """Returns the dataset's initializer. The initializer must be run before calling `features_and_labels`. """ self._iterator = tf.compat.v1.data.make_initializable_iterator( self._dataset) return self._iterator.initializer def features_and_labels(self): """Gets `features` and `labels`.""" if self.is_dataset: if self._iterator is None: raise RuntimeError('Internal error: Must run dataset_initializer ' 'before calling features_and_labels(). Please file ' 'a bug!') return _Inputs._parse_inputs(self._iterator.get_next()) return (self._features, self._labels) def signals(self): return self._signals @property def dataset(self): return self._dataset class _InputsWithStoppingSignals(_Inputs): """Inputs with `_StopSignals` inserted into the dataset.""" def __init__(self, dataset, batch_size, add_padding=False, num_invocations_per_step=1): assert dataset is not None user_provided_dataset = dataset.map( _InputsWithStoppingSignals.insert_stopping_signal( stop=False, batch_size=batch_size, add_padding=add_padding)) if num_invocations_per_step == 1: final_batch_dataset = dataset.take(1).map( _InputsWithStoppingSignals.insert_stopping_signal( stop=True, batch_size=batch_size, add_padding=add_padding)) else: # We append (2 * num_invocations_per_step - 1) batches for exhausting the # user_provided_dataset and stop properly. # For example, if num_invocations_per_step is 2, we append 3 additional # padding batches: b1, b2, b3. # If user_provided_dataset contains two batches: a1, a2 # Step 1: [a1, a2] # Step 2: [b1, b2] -> STOP # If user_provided_dataset contains three batches: a1, a2, a3. # The training loops: # Step 1: [a1, a2] # Step 2: [a3, b1] # Step 3: [b2, b3] -> STOP. final_batch_dataset = dataset.take(1).map( _InputsWithStoppingSignals.insert_stopping_signal( stop=True, batch_size=batch_size, add_padding=add_padding)) final_batch_dataset = final_batch_dataset.repeat( 2 * num_invocations_per_step - 1) def _set_mask(data_dict): signals = data_dict['signals'] signals['padding_mask'] = tf.compat.v1.ones_like( signals['padding_mask']) data_dict['signals'] = signals return data_dict # Mask out the extra batch. final_batch_dataset = final_batch_dataset.map(_set_mask) dataset = user_provided_dataset.concatenate(final_batch_dataset).prefetch(2) super(_InputsWithStoppingSignals, self).__init__(dataset=dataset) self._current_inputs = None def features_and_labels(self): if self._current_inputs is not None: raise RuntimeError( 'Internal Error: The previous inputs have not been properly ' 'consumed. First call features_and_labels, then call signals.') inputs_with_signals = self._iterator.get_next() features = inputs_with_signals['features'] labels = inputs_with_signals.get('labels') self._current_inputs = inputs_with_signals return features, labels def signals(self): """Returns the `Signals` from `_Inputs`.""" if self._current_inputs is None: raise RuntimeError( 'Internal Error: The current inputs have not been properly ' 'generated. First call features_and_labels, then call signals.') signals = self._current_inputs['signals'] self._current_inputs = None return signals @staticmethod def insert_stopping_signal(stop, batch_size, add_padding=False): """Inserts stopping_signal into dataset via _map_fn. Here we change the data structure in the dataset, such that the return value is a dictionary now and `features`, `labels`, and `signals` are three distinguished keys in that dict. This provides a better structure, which eases the process to decompose the inputs (see `features_and_labels`). Args: stop: bool, state of current stopping signals. batch_size: int, batch size. add_padding: bool, whether to pad the tensor to full batch size. Returns: A map_fn passed to dataset.map API. """ def _map_fn(*args): """The map fn to insert signals.""" if len(args) == 1: # Unpack the single Tensor/dict argument as features. This is required # for the input_fn returns no labels. args = args[0] features, labels = _Inputs._parse_inputs(args) new_input_dict = {} if add_padding: padding_mask, features, labels = ( _PaddingSignals.pad_features_and_labels(features, labels, batch_size)) new_input_dict['features'] = features if labels is not None: new_input_dict['labels'] = labels else: new_input_dict['features'] = features if labels is not None: new_input_dict['labels'] = labels padding_mask = None new_input_dict['signals'] = _StopSignals( stop=stop, batch_size=batch_size, padding_mask=padding_mask).as_dict() return new_input_dict return _map_fn class _StopSignals(object): """Signals class holding all logic to handle TPU stopping condition.""" NON_STOPPING_SIGNAL = False STOPPING_SIGNAL = True def __init__(self, stop, batch_size, padding_mask=None): self._stop = stop self._batch_size = batch_size self._padding_mask = padding_mask def as_dict(self): """Returns the signals as Python dict.""" shape = [self._batch_size, 1] dtype = tf.dtypes.bool if self._stop: stopping = tf.ones(shape=shape, dtype=dtype) else: stopping = tf.zeros(shape=shape, dtype=dtype) signals = {'stopping': stopping} if self._padding_mask is not None: signals['padding_mask'] = self._padding_mask return signals @staticmethod def as_scalar_stopping_signal(signals): return tf.identity(signals['stopping'][0][0]) @staticmethod def should_stop(scalar_stopping_signal): """Detects whether scalar_stopping_signal indicates stopping.""" if isinstance(scalar_stopping_signal, tf.Tensor): # STOPPING_SIGNAL is a constant True. Here, the logical_and is just the TF # way to express the bool check whether scalar_stopping_signal is True. return tf.math.logical_and(scalar_stopping_signal, _StopSignals.STOPPING_SIGNAL) else: # For non Tensor case, it is used in SessionRunHook. So, we cannot modify # the graph anymore. Here, we use pure Python. return bool(scalar_stopping_signal) class _PaddingSignals(object): """Signals class holding all logic to handle padding.""" @staticmethod def pad_features_and_labels(features, labels, batch_size): """Pads out the batch dimension of features and labels.""" real_batch_size = tf.compat.v1.shape( _PaddingSignals._find_any_tensor(features))[0] batch_size_tensor = tf.constant(batch_size, tf.dtypes.int32) check_greater = tf.compat.v1.debugging.assert_greater_equal( batch_size_tensor, real_batch_size, data=(batch_size_tensor, real_batch_size), message='The real batch size should not be greater than batch_size.') with tf.control_dependencies([check_greater]): missing_count = batch_size_tensor - real_batch_size def pad_single_tensor(tensor): """Pads out the batch dimension of a tensor to the complete batch_size.""" rank = len(tensor.shape) assert rank > 0 padding = tf.stack([[0, missing_count]] + [[0, 0]] * (rank - 1)) padded_shape = (batch_size,) + tuple(tensor.shape[1:]) padded_tensor = tf.compat.v1.pad(tensor, padding) padded_tensor.set_shape(padded_shape) return padded_tensor def nest_pad(tensor_or_dict): return tf.nest.map_structure(pad_single_tensor, tensor_or_dict) features = nest_pad(features) if labels is not None: labels = nest_pad(labels) padding_mask = _PaddingSignals._padding_mask(real_batch_size, missing_count, batch_size) return padding_mask, features, labels @staticmethod def slice_tensor_or_dict(tensor_or_dict, signals): """Slice the real Tensors according to padding mask in signals.""" padding_mask = signals['padding_mask'] batch_size = tf.compat.v1.shape(padding_mask)[0] def verify_batch_size(tensor): check_batch_size = tf.math.equal(batch_size, tensor.shape[0]) with tf.control_dependencies([check_batch_size]): return tf.identity(tensor) def slice_single_tensor(tensor): rank = len(tensor.shape) assert rank > 0 real_batch_size = batch_size - tf.math.reduce_sum(padding_mask) return verify_batch_size(tensor)[0:real_batch_size] # As we split the Tensors to all TPU cores and concat them back, it is # important to ensure the real data is placed before padded ones, i.e., # order is preserved. By that, the sliced padding mask should have all 0's. # If this assertion failed, # the slice logic here would not hold. sliced_padding_mask = slice_single_tensor(padding_mask) assert_padding_mask = tf.math.equal( tf.math.reduce_sum(sliced_padding_mask), 0) with tf.control_dependencies([assert_padding_mask]): should_stop = _StopSignals.should_stop( _StopSignals.as_scalar_stopping_signal(signals)) is_full_batch = tf.math.equal(tf.math.reduce_sum(padding_mask), 0) def slice_fn(tensor): # If the current batch is full batch or part of stopping signals, we do # not need to slice to save performance. return tf.compat.v1.cond( tf.math.logical_or(should_stop, is_full_batch), (lambda: verify_batch_size(tensor)), (lambda: slice_single_tensor(tensor))) return tf.nest.map_structure(slice_fn, tensor_or_dict) @staticmethod def _find_any_tensor(batch_features): tensors = [ x for x in tf.nest.flatten(batch_features) if isinstance(x, tf.Tensor) ] if not tensors: raise ValueError('Cannot find any Tensor in features dict.') return tensors[0] @staticmethod def _padding_mask(real_batch_size, missing_count, batch_size): padding_mask = tf.concat([ tf.zeros((real_batch_size,), dtype=tf.dtypes.int32), tf.ones((missing_count,), dtype=tf.dtypes.int32) ], axis=0) padding_mask.set_shape((batch_size,)) return padding_mask def _verify_cross_hosts_transfer_size(tensor_dict, message): total_size = 0 tensor_structure = {} for key, tensor in tensor_dict.items(): shape = tensor.shape size = np.prod(shape) * tensor.dtype.size tensor_structure[key] = shape total_size += size if total_size >= _ONE_GIGABYTE: raise ValueError( '{} The transfer size is larger than the protobuf limit. Please ' 'consider to use Tensors with smaller shapes or reduce batch ' 'size. Given:\n' '{}'.format( message, '\n'.join([ ' -- Key: {}, Shape: {}'.format(k, v) for k, v in tensor_structure.items() ]))) def _add_item_to_params(params, key, value): """Adds a new item into `params`.""" if hasattr(params, 'set_hparam'): # For HParams, we need to use special API. if key in params: params.set_hparam(key, value) else: params.add_hparam(key, value) else: # Now params is Python dict. params[key] = value def export_estimator_savedmodel(estimator, export_dir_base, serving_input_receiver_fn, assets_extra=None, as_text=False, checkpoint_path=None): """Export `Estimator` trained model for TPU inference. Args: estimator: `Estimator` with which model has been trained. export_dir_base: A string containing a directory in which to create timestamped subdirectories containing exported SavedModels. serving_input_receiver_fn: A function that takes no argument and returns a `ServingInputReceiver` or `TensorServingInputReceiver`. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel, or `None` if no extra assets are needed. as_text: whether to write the SavedModel proto in text format. checkpoint_path: The checkpoint path to export. If `None` (the default), the most recent checkpoint found within the model directory is chosen. Returns: The string path to the exported directory. """ # `TPUEstimator` requires `tpu_config.RunConfig`, so we cannot use # `estimator.config`. config = tpu_config.RunConfig(model_dir=estimator.model_dir) est = TPUEstimator( estimator._model_fn, # pylint: disable=protected-access config=config, params=estimator.params, use_tpu=True, train_batch_size=2048, # Does not matter. eval_batch_size=2048, # Does not matter. ) return est.export_saved_model(export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path) def model_fn_inference_on_tpu(model_fn, features, labels=None, config=None, params=None, batch_config=None): """Convenience wrapper for export_saved_model API v2 for a model_fn. WARNING:THIS METHOD IS DEPRECATED AND NOT PART OF THE APIS. Make sure to set `export_saved_model_api_version=tpu_estimator.ExportSavedModelApiVersion.V2` when initializing TPUEstimator (default API version is V1). This is because 1) `tpu.rewrite` (or `tpu.compile`) shouldn't be called in a nested way (otherwise validation will throw error like "NotImplementedError: tpu_shard_context cannot be nested.") 2) When using V1 API, Estimator calls `tpu.rewrite` so using `model_fn_inference_on_tpu` will trigger a nested call. When using V2 API, users of Estimator needs to call `tpu.rewrite` (which the wrapper does). It attempts to execute the entire model function on the TPU for prediction. Note that this does not support features which are SparseTensors. If you have SparseTensor features, consider partitioning your model function further and use inference_on_tpu. Args: model_fn: the model_fn for which we want to inference on TPU. features: a tensor or dict of tensors, serves as the feature inputs to the model. labels: a tensor or dict of tensors, serves as the labels inputs to the model. config: auxiliary config to the Estimator. params: hparams that we want to pass to the model_fn. batch_config: a named tuple to wrap the inference batching configuration inputs. Returns: An EstimatorSpec containing the outputs in export_outputs and predictions. """ computation, capture = _build_computation_for_inference( model_fn, labels, config, params) tensors = call_computation(features, computation, batch_config=batch_config) estimator_spec, export_outputs_dict, predictions_dict, none_indices = ( capture.get()) predictions_list = tensors[:len(predictions_dict)] export_outputs_list_without_none = tensors[len(predictions_dict):] # Reinsert `None`s which we've taken out in # `_build_computation_for_inference()`. export_outputs_list = [] while none_indices or export_outputs_list_without_none: if none_indices and none_indices[0] == len(export_outputs_list): export_outputs_list.append(None) none_indices.pop(0) else: export_outputs_list.append(export_outputs_list_without_none.pop(0)) # Reconstruct `export_outputs` with updated tensors. new_export_outputs_dict = tf.nest.pack_sequence_as(export_outputs_dict, export_outputs_list) export_outputs = estimator_spec.export_outputs new_export_outputs = collections.OrderedDict( (k, _clone_export_output_with_tensors(export_outputs[k], v)) for k, v in six.iteritems(new_export_outputs_dict)) # Reconstruct `predictions` with updated tensors. new_predictions = tf.nest.pack_sequence_as(predictions_dict, predictions_list) if (len(new_predictions) == 1 and _KEY_WHEN_PREDICTIONS_IS_A_TENSOR in new_predictions): new_predictions = new_predictions[_KEY_WHEN_PREDICTIONS_IS_A_TENSOR] return estimator_spec._replace( export_outputs=new_export_outputs, predictions=new_predictions) def _build_computation_for_inference(model_fn, labels, config, params): """Builds the computation with calls the model_fn for inference.""" capture = _CapturedObject() def computation(computation_input): """Computation to be passed to `TPUPartitionedCall()`.""" tpu_computation, tpu_capture = _build_tpu_computation_for_inference( model_fn, computation_input, labels, config, params) tensors_on_cpu = tf.compat.v1.tpu.rewrite(tpu_computation) tpu.prune_unconnected_ops_from_xla(tf.compat.v1.get_default_graph()) (estimator_spec, export_outputs_dict, export_outputs_list, predictions_dict) = ( tpu_capture.get()) predictions_list = tensors_on_cpu[:len(predictions_dict)] export_outputs_tpu_on_cpu_list = tensors_on_cpu[len(predictions_dict):] # Reconstruct tensors used in export_outputs, with TPU tensors replaced # with their CPU counterpart returned from `rewrite_for_inference()`. # `function.Defun()` does not like `None`s in return values, so we leave # `None`s out but record their positions for later reconstruction. export_outputs_list_without_none = [] none_indices = [] for i, t in enumerate(export_outputs_list): if t is None: none_indices.append(i) else: export_outputs_list_without_none.append( export_outputs_tpu_on_cpu_list.pop(0)) capture.capture( (estimator_spec, export_outputs_dict, predictions_dict, none_indices)) return predictions_list + export_outputs_list_without_none return computation, capture def _build_tpu_computation_for_inference(model_fn, features, labels, config, params): """Builds the TPU computation for inference on TPU.""" capture = _CapturedObject() def computation(): """Compute tpu tensors used in export_outputs. Passed to rewrite_for_inference so that model_fn will be called under the rewriting contexts. Only tpu tensors are returned, but export_outputs and scaffold are captured. Returns: A list of Tensors used in export_outputs and not marked for outside_compilation. """ # We should only call model fn once and it should be inside `computation` # so that building the graph will happen under `rewrite_for_inference`. model_fn_args = function_utils.fn_args(model_fn) kwargs = {} # Makes deep copy with `config` and params` in case user mutates them. if 'labels' in model_fn_args: kwargs['labels'] = labels if 'mode' in model_fn_args: kwargs['mode'] = model_fn_lib.ModeKeys.PREDICT if 'config' in model_fn_args: kwargs['config'] = config if 'params' in model_fn_args: kwargs['params'] = params estimator_spec = model_fn(features, **kwargs) # We pick the TPU tensors out from `export_output` and later return them # from `computation` for rewriting. export_outputs_dict = collections.OrderedDict( (k, _export_output_to_tensors(v)) for k, v in six.iteritems(estimator_spec.export_outputs)) export_outputs_list = tf.nest.flatten(export_outputs_dict) export_outputs_tpu_list = [t for t in export_outputs_list if t is not None] if isinstance(estimator_spec.predictions, dict): predictions_dict = collections.OrderedDict( (k, v) for k, v in six.iteritems(estimator_spec.predictions)) else: predictions_dict = { _KEY_WHEN_PREDICTIONS_IS_A_TENSOR: estimator_spec.predictions } predictions_list = tf.nest.flatten(predictions_dict) # We cannot return everything we want through the return values, so # capture the rest here for later use. capture.capture((estimator_spec, export_outputs_dict, export_outputs_list, predictions_dict)) return predictions_list + export_outputs_tpu_list return computation, capture def inference_on_tpu(computation, inputs_to_tpu, num_batch_threads, max_batch_size, batch_timeout_micros, allowed_batch_sizes=None, max_enqueued_batches=100): """Convenient wrapper for export_saved_model API v2 to wrap TPU computation. WARNING: THIS METHOD IS DEPRECATED AND NOT PART OF THE APIS. Make sure to set `export_saved_model_api_version=tpu_estimator.ExportSavedModelApiVersion.V2` when initializing TPUEstimator (default API version is V1). This is because 1) `tpu.rewrite` (or `tpu.compile`) shouldn't be called in a nested way (otherwise validation will throw error like "NotImplementedError: tpu_shard_context cannot be nested.") 2) When using V1 API, Estimator calls `tpu.rewrite` so using `model_fn_inference_on_tpu` will trigger a nested call. When using V2 API, users of Estimator needs to call `tpu.rewrite` (which the wrapper does). It puts computation on TPU, add batching around it and round robin computation between TPU cores. See tpu_estimator_test.py for an example. Args: computation: computation to be put on TPU, which takes inputs_to_tpu as arguments. inputs_to_tpu: a list of tensors as input to computation. num_batch_threads: Number of scheduling threads for processing batches of work. Determines the number of batches processed in parallel. max_batch_size: Batch sizes will never be bigger than this. If None or 0, no batching will done. batch_timeout_micros: Maximum number of microseconds to wait before outputting an incomplete batch. allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does nothing. Otherwise, supplies a list of batch sizes, causing the op to pad batches up to one of those sizes. The entries must increase monotonically, and the final entry must equal max_batch_size. max_enqueued_batches: The maximum depth of the batch queue. Defaults to 100. Returns: The unbatched computation output Tensors. """ def _tpu_call(args): """Function to either call or feed into BatchFunction.""" @function.Defun(capture_resource_var_by_value=False) def tpu_computation(): """Function to feed into the TPUPartitionedCallOp.""" tensors_on_cpu = tf.compat.v1.tpu.rewrite(computation, args) tpu.prune_unconnected_ops_from_xla(tf.compat.v1.get_default_graph()) return tensors_on_cpu return tpu_functional.TPUPartitionedCall( args=tpu_computation.captured_inputs, device_ordinal=tpu_ops.tpu_ordinal_selector(), Tout=[o.type for o in tpu_computation.definition.signature.output_arg], f=tpu_computation) if not max_batch_size: return _tpu_call(inputs_to_tpu) @tf.nondifferentiable_batch_function(num_batch_threads, max_batch_size, batch_timeout_micros, allowed_batch_sizes, max_enqueued_batches) def batched_tpu_computation(*args): """Function to feed into the BatchOp.""" return _tpu_call(args) return batched_tpu_computation(*inputs_to_tpu) ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_embedding_test.py ================================================ # Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for TPUEstimator.""" import itertools import os import tempfile from absl import flags from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow.python.tpu import feature_column_v2 as tpu_fc_v2 from tensorflow.python.tpu import tpu_embedding from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.util import tf_keras_v1 from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.export import export_output as export_output_lib from tensorflow_estimator.python.estimator.tpu import _tpu_estimator_embedding from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_estimator flags.DEFINE_integer('test_num_shards', 8, 'number of replicas to test') FLAGS = flags.FLAGS _TRAIN = model_fn_lib.ModeKeys.TRAIN _EVAL = model_fn_lib.ModeKeys.EVAL _PREDICT = model_fn_lib.ModeKeys.PREDICT _PER_HOST_V1 = tpu_config.InputPipelineConfig.PER_HOST_V1 _PER_HOST_V2 = tpu_config.InputPipelineConfig.PER_HOST_V2 # Constant used for tests that uses categorical_column with vocabulary _VOCAB_EMBEDDING_DIM = 10 _VOCAB_SIZE = 4 _VOCAB_NUM_BUCKETS = 5 def dense_computation(features): return tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) def create_run_config(iterations_per_loop, **kwargs): return tpu_config.RunConfig( master='', tpu_config=tpu_config.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.test_num_shards, **kwargs), ) class TPUEstimatorFeatureColumnTestBase(tf.test.TestCase): def setUp(self): self._old_value = tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP feature_spec = { 'x': tf.io.SparseFeature(['ix0', 'ix1'], 'val', tf.int64, [1, 100]), 'y': tf.io.SparseFeature(['ix0', 'ix1'], 'val', tf.int64, [1, 100])} self._serving_input_receiver_fn = ( export.build_parsing_serving_input_receiver_fn(feature_spec)) super().setUp() def tearDown(self): tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = self._old_value super().tearDown() def _create_estimator_with_feature_columns(self, feature_columns, numeric_check=False, use_cpu=False, input_method=_PER_HOST_V2): """Returns TPUEstimator which uses `feature_columns` in model_fn.""" def _model_fn(features, labels, mode, params): """Creates simple TF model using feature_columns to create input layer.""" del params sequence_columns, non_sequence_columns = ( tpu_fc_v2.split_sequence_columns_v2(feature_columns)) if sequence_columns: sequence_layer = tf_keras.experimental.SequenceFeatures(sequence_columns) sequence_features, sequence_lengths = sequence_layer(features) sequence_lengths = tf.dtypes.cast(sequence_lengths, tf.float32) if non_sequence_columns: dense_layer = tf_keras_v1.layers.DenseFeatures(non_sequence_columns) input_layer = dense_layer(features) if numeric_check: # Make predictions the same as input_layer. This is used in some tests # where we set the labels to be the same as input_layer, which forces # the loss to be zero. if sequence_columns: # For sequence columns, we return the sequence lengths, so that we can # verify that these have been correctly calculated. predictions = tf.concat(sequence_lengths, -1) else: predictions = tf.identity(input_layer) else: if sequence_columns: # At this point we know that all the sequence features have the same # max sequence length. To get the total number of entries, so we can # reshape, we need total embedding dimension * max_sequence_length sequence_entries_per_batch = ( sequence_features.shape[-1] * sequence_columns[0].get_max_sequence_length()) flattened = tf.reshape( sequence_features, [-1, sequence_entries_per_batch]) sequence_lengths = tf.compat.v1.expand_dims(sequence_lengths, -1) input_layer = tf.concat( [input_layer, flattened, sequence_lengths], -1) predictions = tf_keras_v1.__internal__.legacy.layers.dense( input_layer, 1, kernel_initializer=tf.compat.v1.zeros_initializer()) loss = None train_op = None eval_metrics = None export_outputs = None if mode == model_fn_lib.ModeKeys.TRAIN: loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) optimizer = tf.compat.v1.train.AdagradOptimizer(learning_rate=0.5) optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize( loss, global_step=tf.compat.v1.train.get_global_step()) elif mode == model_fn_lib.ModeKeys.EVAL: loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) def metric_fn_on_cpu(labels, predictions): return { 'mse': tf.compat.v1.metrics.mean_absolute_error(labels, predictions), } eval_metrics = (metric_fn_on_cpu, [labels, predictions]) else: export_outputs = {'prediction': export_output_lib.PredictOutput( {'prediction': predictions})} return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss, eval_metrics=eval_metrics, export_outputs=export_outputs, predictions=predictions) run_config = create_run_config( iterations_per_loop=2, per_host_input_for_training=input_method) embedding_config_spec = tpu_estimator.EmbeddingConfigSpec( feature_columns=feature_columns, optimization_parameters=tpu_estimator.AdagradParameters( learning_rate=.01, initial_accumulator=0.1), ) return tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=8, eval_batch_size=8, use_tpu=not use_cpu, embedding_config_spec=embedding_config_spec, export_to_tpu=True) class TPUEstimatorFeatureColumnTest(TPUEstimatorFeatureColumnTestBase, parameterized.TestCase): def test_get_tpu_embedding_config_from_feature_columns(self): feature_a = 'a' feature_b = 'b' # shared feature_c = 'c' # shared, weighted feature_d = 'd' # weighted feature_e = 'e' # sequence feature_f = 'f' # shared sequence feature_g = 'g' # shared sequence feature_h = 'h' # shared non-sequence categorical_column_a = tf.feature_column.categorical_column_with_identity( key=feature_a, num_buckets=3) categorical_column_b = tf.feature_column.categorical_column_with_identity( key=feature_b, num_buckets=6) categorical_column_c = tf.feature_column.categorical_column_with_identity( key=feature_c, num_buckets=6) weight_feature_key_c = 'c_weight' weighted_column_c = tf.feature_column.weighted_categorical_column( categorical_column=categorical_column_c, weight_feature_key=weight_feature_key_c) categorical_column_d = tf.feature_column.categorical_column_with_identity( key=feature_d, num_buckets=3) weight_feature_key_d = 'd_weight' weighted_column_d = tf.feature_column.weighted_categorical_column( categorical_column=categorical_column_d, weight_feature_key=weight_feature_key_d) sequence_categorical_column_e = ( tf.feature_column.sequence_categorical_column_with_identity( key=feature_e, num_buckets=7)) sequence_categorical_column_f = ( tf.feature_column.sequence_categorical_column_with_identity( key=feature_f, num_buckets=4)) sequence_categorical_column_g = ( tf.feature_column.sequence_categorical_column_with_identity( key=feature_g, num_buckets=4)) categorical_column_h = ( tf.feature_column.categorical_column_with_identity( key=feature_h, num_buckets=4)) table_a = 'tbl_a' table_bc = 'tbl_b_c_weighted_by_c_weight_shared_embedding' table_e = 'tbl_e' table_fgh = 'tbl_f_g_h_shared_embedding' embedding_dimension_a = 2 embedding_dimension_bc = 5 embedding_dimension_d = 2 embedding_dimension_e = 3 embedding_dimension_fgh = 4 column_a = tf.compat.v1.tpu.experimental.embedding_column( categorical_column_a, dimension=embedding_dimension_a, combiner='mean', initializer=lambda: 'my_initializer_a') column_b, column_c = tf.compat.v1.tpu.experimental.shared_embedding_columns( [categorical_column_b, weighted_column_c], dimension=embedding_dimension_bc, combiner='sqrtn', initializer=lambda: 'my_initializer_b_c') column_d = tf.compat.v1.tpu.experimental.embedding_column( weighted_column_d, dimension=embedding_dimension_d, combiner='mean', initializer=lambda: 'my_initializer_d') sequence_column_e = tf.compat.v1.tpu.experimental.embedding_column( sequence_categorical_column_e, max_sequence_length=3, dimension=embedding_dimension_e, initializer=lambda: 'my_initializer_e') sequence_column_f, sequence_column_g, column_h = ( tf.compat.v1.tpu.experimental.shared_embedding_columns( [sequence_categorical_column_f, sequence_categorical_column_g, categorical_column_h], max_sequence_lengths=[2, 1, 0], dimension=embedding_dimension_fgh, initializer=lambda: 'my_initializer_f_g_h')) table_to_config, feature_to_config = ( _tpu_estimator_embedding.get_configs_from_feature_columns( [column_a, column_b, column_c, column_d, sequence_column_e, sequence_column_f, sequence_column_g, column_h])) self.assertEqual(feature_to_config[feature_a].table_id, table_a) self.assertEqual(feature_to_config[feature_b].table_id, table_bc) self.assertEqual(feature_to_config[feature_e].table_id, table_e) self.assertEqual(feature_to_config[feature_f].table_id, table_fgh) self.assertEqual(feature_to_config[feature_e].max_sequence_length, 3) self.assertEqual(feature_to_config[feature_f].max_sequence_length, 2) self.assertEqual(feature_to_config[feature_g].max_sequence_length, 1) self.assertEqual(feature_to_config[feature_h].max_sequence_length, 0) self.assertEqual(table_to_config[table_a].vocabulary_size, 3) self.assertEqual(table_to_config[table_bc].vocabulary_size, 6) self.assertEqual(table_to_config[table_e].vocabulary_size, 7) self.assertEqual(table_to_config[table_fgh].vocabulary_size, 4) self.assertEqual(table_to_config[table_a].dimension, embedding_dimension_a) self.assertEqual(table_to_config[table_bc].dimension, embedding_dimension_bc) self.assertEqual(table_to_config[table_e].dimension, embedding_dimension_e) self.assertEqual(table_to_config[table_fgh].dimension, embedding_dimension_fgh) self.assertEqual(table_to_config[table_a].combiner, 'mean') self.assertEqual(table_to_config[table_bc].combiner, 'sqrtn') self.assertEqual(table_to_config[table_a].initializer(), 'my_initializer_a') self.assertEqual(table_to_config[table_bc].initializer(), 'my_initializer_b_c') self.assertEqual(table_to_config[table_e].initializer(), 'my_initializer_e') self.assertEqual(table_to_config[table_fgh].initializer(), 'my_initializer_f_g_h') self.assertEqual(feature_to_config[feature_a].weight_key, None) self.assertEqual(feature_to_config[feature_b].weight_key, None) self.assertEqual(feature_to_config[feature_c].weight_key, weight_feature_key_c) self.assertEqual(feature_to_config[feature_d].weight_key, weight_feature_key_d) def _create_estimator_with_config_dicts(self, feature_to_config_dict, table_to_config_dict, use_cpu=False, partition_strategy='div', input_method=_PER_HOST_V2): """Returns TPUEstimator which uses `feature_columns` in model_fn.""" def _model_fn(features, labels, mode, params): """Creates simple TF model using feature_columns to create input layer.""" del params input_features = [] for feature in features: if len(features[feature].shape) == 1: input_features.append(tf.compat.v1.expand_dims(features[feature], -1)) elif len(features[feature].shape) > 2: input_features.append( tf.reshape(features[feature], [features[feature].shape[0], -1])) else: input_features.append(features[feature]) input_features = [ tf.dtypes.cast(feature, tf.float32) for feature in input_features] input_layer = tf.concat(input_features, -1) predictions = tf_keras_v1.__internal__.legacy.layers.dense( input_layer, 1, kernel_initializer=tf.compat.v1.zeros_initializer()) loss = None train_op = None eval_metrics = None export_outputs = None if mode == model_fn_lib.ModeKeys.TRAIN: loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.5) optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize( loss, global_step=tf.compat.v1.train.get_global_step()) elif mode == model_fn_lib.ModeKeys.EVAL: loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) def metric_fn_on_cpu(labels, predictions): return { 'mse': tf.compat.v1.metrics.mean_absolute_error(labels, predictions), } eval_metrics = (metric_fn_on_cpu, [labels, predictions]) else: export_outputs = {'prediction': export_output_lib.PredictOutput( {'prediction': predictions})} return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss, eval_metrics=eval_metrics, export_outputs=export_outputs, predictions=predictions) run_config = create_run_config( iterations_per_loop=2, per_host_input_for_training=input_method) embedding_config_spec = tpu_estimator.EmbeddingConfigSpec( table_to_config_dict=table_to_config_dict, feature_to_config_dict=feature_to_config_dict, optimization_parameters=tpu_estimator.AdagradParameters( learning_rate=.01, initial_accumulator=0.1), partition_strategy=partition_strategy, ) return tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=8, eval_batch_size=8, use_tpu=not use_cpu, embedding_config_spec=embedding_config_spec, export_to_tpu=True) def _get_vocab_feature_columns(self, embedding_initializer=None, is_vocabulary_file=True): """Return feature columns for categorical_column_with_vocabulary_x tests.""" if is_vocabulary_file: vocab_file = os.path.join(tempfile.mkdtemp(), 'vocab') with open(vocab_file, 'w') as f: f.write('\n'.join([str(i) for i in range(_VOCAB_SIZE)])) vocab_column = tf.compat.v1.feature_column.categorical_column_with_vocabulary_file( key='x', vocabulary_file=vocab_file, vocabulary_size=_VOCAB_SIZE, num_oov_buckets=_VOCAB_NUM_BUCKETS - _VOCAB_SIZE) else: vocab_list = [str(i) for i in range(_VOCAB_SIZE)] vocab_column = tf.feature_column.categorical_column_with_vocabulary_list( key='x', vocabulary_list=vocab_list, num_oov_buckets=_VOCAB_NUM_BUCKETS - _VOCAB_SIZE) feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column( categorical_column=vocab_column, dimension=_VOCAB_EMBEDDING_DIM, initializer=embedding_initializer), ] return set(feature_columns) def _get_vocab_input_fn_and_feature_columns(self, numeric_check=False, is_vocabulary_file=True, is_dataset=False): """Return input_fn and feature_columns for vocabulary column tests. Args: numeric_check: A boolean flag. When set to be True, the labels in input_fn are set to be the expected input_layer value from the input features and embedding initialization. This is to allow test to conveniently do numerical check by comparing the labels against input_layer in model_fn. is_vocabulary_file: use categorical_column_with_vocabulary_file when set True, else categorical_column_with_vocabulary_list. is_dataset: A boolean value indicating whether the input_fn returns dataset or not. Returns: A tuple consists of an input_fn and a set of feature columns. """ # Initialize embedding to # 1 0 0 0 0 .. # 0 2 0 0 0 .. # 0 0 3 0 0 .. # 0 0 0 4 0 .. embedding_init = np.zeros((_VOCAB_SIZE, _VOCAB_EMBEDDING_DIM)) for i in range(_VOCAB_SIZE): embedding_init[i, i] = i + 1 embedding_initializer = tf_keras_v1.initializers.constant(embedding_init) def input_fn(params): # Data index is [3, 2, 1, 0] feature_data = tf.sparse.SparseTensor( indices=[[i, 0] for i in range(_VOCAB_SIZE)], values=[str(_VOCAB_SIZE - 1 - i) for i in range(_VOCAB_SIZE)], dense_shape=[_VOCAB_SIZE, 1]) if numeric_check: # Expected input_layer is # 0 0 0 4 0 .. # 0 0 3 0 0 .. # 0 2 0 0 0 .. # 1 0 0 0 0 .. labels = np.zeros((_VOCAB_SIZE, _VOCAB_EMBEDDING_DIM), dtype=np.float32) for i in range(_VOCAB_SIZE): labels[i, _VOCAB_SIZE - i - 1] = _VOCAB_SIZE - i else: labels = np.zeros((_VOCAB_SIZE, 1), dtype=np.float32) data = tf.compat.v1.data.Dataset.from_tensor_slices(({ 'x': feature_data, }, labels)) data = data.repeat() data = data.batch(params['batch_size'], drop_remainder=True) if is_dataset: return data iterator = data.make_one_shot_iterator() return iterator.get_next() return input_fn, self._get_vocab_feature_columns( embedding_initializer, is_vocabulary_file=is_vocabulary_file) def test_feature_in_two_embeddings(self): sparse_column = tf.feature_column.categorical_column_with_identity( key='x', num_buckets=10) feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column, dimension=2), tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column, dimension=4)] with self.assertRaisesRegex( ValueError, 'is used with multiple embeddings and this ' 'is not supported.'): est = self._create_estimator_with_feature_columns( feature_columns) est.train(input_fn=(lambda params: 'Not used'), steps=1) def _test_two_features(self, shared_embedding, sequence_column, input_method, use_cpu=False): sparse_column1 = tf.feature_column.categorical_column_with_identity( key='x', num_buckets=10) if sequence_column: sparse_column2 = tf.feature_column.sequence_categorical_column_with_identity( key='y', num_buckets=10) else: sparse_column2 = tf.feature_column.categorical_column_with_identity( key='y', num_buckets=10) if shared_embedding: if sequence_column: feature_columns = tf.compat.v1.tpu.experimental.shared_embedding_columns( [sparse_column1, sparse_column2], dimension=2, max_sequence_lengths=[0, 2]) else: feature_columns = tf.compat.v1.tpu.experimental.shared_embedding_columns( [sparse_column1, sparse_column2], dimension=2) else: if sequence_column: feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column1, dimension=2), tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column2, dimension=4, max_sequence_length=2)] else: feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column1, dimension=2), tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column2, dimension=4)] def _input_fn(params): feature1_data = tf.compat.v1.data.Dataset.from_tensor_slices( tf.sparse.SparseTensor( indices=[[i, j] for i in range(params['batch_size']) for j in [0, 1]], values=[1] * (2 * params['batch_size']), dense_shape=[params['batch_size'], 2])) feature2_data = tf.compat.v1.data.Dataset.from_tensor_slices( tf.sparse.SparseTensor( indices=[[i, j] for i in range(params['batch_size']) for j in [0, 1]], values=[2] * (2 * params['batch_size']), dense_shape=[params['batch_size'], 2])) labels_data = tf.compat.v1.data.Dataset.from_tensor_slices( np.array([[0]] * params['batch_size'], dtype=np.int32)) dataset = tf.compat.v1.data.Dataset.zip( (feature1_data, feature2_data, labels_data)) dataset = dataset.repeat() dataset = dataset.batch(params['batch_size'], drop_remainder=True) def _map(x, y, z): return {'x': x, 'y': y}, z dataset = dataset.map(_map) return dataset est = self._create_estimator_with_feature_columns(feature_columns, use_cpu=use_cpu, input_method=input_method) est.train(input_fn=_input_fn, steps=1) checkpoint_reader = tf.compat.v1.train.NewCheckpointReader( tf.train.latest_checkpoint(est.config.model_dir)) return checkpoint_reader.get_variable_to_shape_map().keys() @parameterized.named_parameters( ('non_shared_non_sequence_v1', False, False, _PER_HOST_V1), ('shared_non_sequence_v1', True, False, _PER_HOST_V1), ('non_shared_sequence_v1', False, True, _PER_HOST_V1), ('shared_sequence_v1', True, True, _PER_HOST_V1), ('non_shared_non_sequence_v2', False, False, _PER_HOST_V2), ('shared_non_sequence_v2', True, False, _PER_HOST_V2), ('non_shared_sequence_v2', False, True, _PER_HOST_V2), ('shared_sequence_v2', True, True, _PER_HOST_V2)) def test_two_features_with_config_dicts(self, shared_embedding, sequence_column, input_method): y_max_seq_length = 2 if sequence_column else 0 y_table = 't1' if shared_embedding else 't2' feature_to_config_dict = { 'x': tpu_embedding.FeatureConfig(table_id='t1'), 'y': tpu_embedding.FeatureConfig(table_id=y_table, max_sequence_length=y_max_seq_length) } table_to_config_dict = { 't1': tpu_embedding.TableConfig(vocabulary_size=10, dimension=2) } if not shared_embedding: table_to_config_dict['t2'] = tpu_embedding.TableConfig( vocabulary_size=10, dimension=4) def _input_fn(params): feature1_data = tf.compat.v1.data.Dataset.from_tensor_slices( tf.sparse.SparseTensor( indices=list( itertools.product(range(params['batch_size']), [0, 1])), values=[1] * (2 * params['batch_size']), dense_shape=[params['batch_size'], 2])) feature2_data = tf.compat.v1.data.Dataset.from_tensor_slices( tf.sparse.SparseTensor( indices=list( itertools.product(range(params['batch_size']), [0, 1])), values=[2] * (2 * params['batch_size']), dense_shape=[params['batch_size'], 2])) labels_data = tf.compat.v1.data.Dataset.from_tensor_slices( np.array([[0]] * params['batch_size'], dtype=np.int32)) dataset = tf.compat.v1.data.Dataset.zip( (feature1_data, feature2_data, labels_data)) dataset = dataset.repeat() dataset = dataset.batch(params['batch_size'], drop_remainder=True) def _map(x, y, z): return {'x': x, 'y': y}, z dataset = dataset.map(_map) return dataset est = self._create_estimator_with_config_dicts(feature_to_config_dict, table_to_config_dict, input_method=input_method) est.train(input_fn=_input_fn, steps=1) def test_non_tpu_embedding_column(self): sparse_column = tf.feature_column.categorical_column_with_identity( key='x', num_buckets=10) sparse_column2 = tf.feature_column.categorical_column_with_identity( key='y', num_buckets=10) feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column( categorical_column=sparse_column, dimension=2), tf.feature_column.embedding_column(categorical_column=sparse_column2, dimension=4) ] with self.assertRaisesRegex(TypeError, 'Unsupported feature column'): est = self._create_estimator_with_feature_columns( feature_columns) est.train(input_fn=(lambda params: 'Not used'), steps=1) def test_feature_in_embedding_and_shared_embedding(self): sparse_column1 = tf.feature_column.categorical_column_with_identity( key='x', num_buckets=10) sparse_column2 = tf.feature_column.categorical_column_with_identity( key='y', num_buckets=10) feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column1, dimension=2) ] + tf.compat.v1.tpu.experimental.shared_embedding_columns([sparse_column1, sparse_column2], dimension=4) with self.assertRaisesRegex( ValueError, 'is used with multiple embeddings and this ' 'is not supported.'): est = self._create_estimator_with_feature_columns( feature_columns) est.train(input_fn=(lambda params: 'Not used'), steps=1) def test_sequence_column_with_no_max_length(self): sparse_column = tf.feature_column.sequence_categorical_column_with_identity( key='x', num_buckets=10) with self.assertRaisesRegex( ValueError, 'max_sequence_length must be greater than 0 ' 'for sequence columns. Got max_sequence_length' '=0 for sequence column x.'): tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column, dimension=2) def test_non_sequence_column_with_max_length(self): sparse_column = tf.feature_column.categorical_column_with_identity( key='x', num_buckets=10) with self.assertRaisesRegex( ValueError, 'Non zero max_seq_length=2 specified for non ' 'sequence column x.'): tf.compat.v1.tpu.experimental.embedding_column(categorical_column=sparse_column, dimension=2, max_sequence_length=2) def test_sequence_column_shared_embedding_wrong_max_sequence_length(self): sparse_column_x = tf.feature_column.sequence_categorical_column_with_identity( key='x', num_buckets=10) sparse_column_y = tf.feature_column.sequence_categorical_column_with_identity( key='y', num_buckets=10) with self.assertRaisesRegex( ValueError, 'max_sequence_lengths and categorical_columns must be of'): tf.compat.v1.tpu.experimental.shared_embedding_columns( categorical_columns=[sparse_column_x, sparse_column_y], dimension=2, max_sequence_lengths=[2]) def test_sequence_column_shared_embedding_non_sequence_with_max_length(self): sparse_column_x = tf.feature_column.sequence_categorical_column_with_identity( key='x', num_buckets=10) sparse_column_y = tf.feature_column.categorical_column_with_identity( key='y', num_buckets=10) with self.assertRaisesRegex(ValueError, 'Non zero max_seq_length=1 specified for non'): tf.compat.v1.tpu.experimental.shared_embedding_columns( categorical_columns=[sparse_column_x, sparse_column_y], dimension=2, max_sequence_lengths=[2, 1]) def test_sequence_column_shared_embedding_sequence_without_max_length(self): sparse_column_x = tf.feature_column.sequence_categorical_column_with_identity( key='x', num_buckets=10) sparse_column_y = tf.feature_column.categorical_column_with_identity( key='y', num_buckets=10) with self.assertRaisesRegex(ValueError, 'max_sequence_length must be greater than 0'): tf.compat.v1.tpu.experimental.shared_embedding_columns( categorical_columns=[sparse_column_x, sparse_column_y], dimension=2) @parameterized.named_parameters( ('per_host_v1', _PER_HOST_V1), ('per_host_v2', _PER_HOST_V2)) def test_sequence_column_length(self, input_method): sequence_column = tf.feature_column.sequence_categorical_column_with_identity( key='x', num_buckets=10) feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column( categorical_column=sequence_column, dimension=4, max_sequence_length=10) ] def _input_fn(params): sequence_lengths = np.random.randint(1, 10, params['batch_size']) total = sum(sequence_lengths) indices = [] for i in range(params['batch_size']): for j in range(sequence_lengths[i]): indices.append([i, j]) feature_data = tf.compat.v1.data.Dataset.from_tensor_slices( tf.sparse.SparseTensor( indices=indices, values=[1] * total, dense_shape=[params['batch_size'], 10]) ) labels_data = tf.compat.v1.data.Dataset.from_tensor_slices( np.array(sequence_lengths, dtype=np.float32)) dataset = tf.compat.v1.data.Dataset.zip( (feature_data, labels_data)) dataset = dataset.repeat() dataset = dataset.batch(params['batch_size'], drop_remainder=True) def _map(x, y): return {'x': x}, y dataset = dataset.map(_map) return dataset est = self._create_estimator_with_feature_columns( feature_columns, numeric_check=True, input_method=input_method) res = est.evaluate(input_fn=_input_fn, steps=1) self.assertAllClose(res['loss'], 0) def test_unknown_partition_strategy(self): feature_to_config_dict = {'x': tpu_embedding.FeatureConfig(table_id='t1')} table_to_config_dict = { 't1': tpu_embedding.TableConfig(vocabulary_size=10, dimension=2) } with self.assertRaisesRegex( ValueError, 'Invalid partition_strategy invalid. Must be ' 'one of "mod" or "div".'): self._create_estimator_with_config_dicts( feature_to_config_dict, table_to_config_dict, use_cpu=True, partition_strategy='invalid') def test_mod_partition_strategy_on_cpu(self): feature_to_config_dict = {'x': tpu_embedding.FeatureConfig(table_id='t1')} table_to_config_dict = { 't1': tpu_embedding.TableConfig(vocabulary_size=10, dimension=2) } with self.assertRaisesRegex( ValueError, 'Mod sharding of embedding tables not ' 'supported on CPU.'): self._create_estimator_with_config_dicts( feature_to_config_dict, table_to_config_dict, use_cpu=True, partition_strategy='mod') @parameterized.named_parameters( ('non_shared_non_sequence_v1', False, False, _PER_HOST_V1), ('shared_non_sequence_v1', True, False, _PER_HOST_V1), ('non_shared_sequence_v1', False, True, _PER_HOST_V1), ('shared_sequence_v1', True, True, _PER_HOST_V1), ('non_shared_non_sequence_v2', False, False, _PER_HOST_V2), ('shared_non_sequence_v2', True, False, _PER_HOST_V2), ('non_shared_sequence_v2', False, True, _PER_HOST_V2), ('shared_sequence_v2', True, True, _PER_HOST_V2)) def test_two_features(self, shared, sequence, input_method): cpu_names = self._test_two_features(shared_embedding=shared, sequence_column=sequence, input_method=input_method, use_cpu=True) tpu_names = self._test_two_features(shared_embedding=shared, sequence_column=sequence, input_method=input_method, use_cpu=False) # TPU will have some extra variables but all CPU variables should be in the # TPU checkpoint for name in cpu_names: self.assertIn(name, tpu_names) @parameterized.named_parameters( ('per_host_v1', _PER_HOST_V1), ('per_host_v2', _PER_HOST_V2)) def test_dynamic_learning_rate(self, input_method): sparse_column_a = tf.feature_column.categorical_column_with_identity( key='a', num_buckets=10) sparse_column_b = tf.feature_column.categorical_column_with_identity( key='b', num_buckets=10) sparse_column_c = tf.feature_column.categorical_column_with_identity( key='c', num_buckets=10) sparse_column_d = tf.feature_column.categorical_column_with_identity( key='d', num_buckets=10) sparse_column_e = tf.feature_column.categorical_column_with_identity( key='e', num_buckets=10) sparse_column_f = tf.feature_column.categorical_column_with_identity( key='f', num_buckets=10) static_lr = 1 def dynamic_learning_rate(global_step): return tf.compat.v1.cond( tf.math.equal(global_step, 0), lambda: 2, lambda: 0) def shared_dynamic_learning_rate(global_step): return tf.compat.v1.cond( tf.math.equal(global_step, 0), lambda: 3, lambda: 0) embedding_column_static = tf.compat.v1.tpu.experimental.embedding_column( categorical_column=sparse_column_a, dimension=2, initializer=tf.compat.v1.ones_initializer()) embedding_column_dynamic = tf.compat.v1.tpu.experimental.embedding_column( categorical_column=sparse_column_b, dimension=2, initializer=tf.compat.v1.ones_initializer(), learning_rate_fn=dynamic_learning_rate) shared_embedding_columns_static = tf.compat.v1.tpu.experimental.shared_embedding_columns( [sparse_column_c, sparse_column_d], dimension=2, initializer=tf.compat.v1.ones_initializer()) shared_embedding_columns_dynamic = tf.compat.v1.tpu.experimental.shared_embedding_columns( [sparse_column_e, sparse_column_f], dimension=2, initializer=tf.compat.v1.ones_initializer(), learning_rate_fn=shared_dynamic_learning_rate) feature_columns = ([embedding_column_static] + [embedding_column_dynamic] + shared_embedding_columns_static + shared_embedding_columns_dynamic) def _input_fn(params): feature_indices = [[0, 0], [1, 0], [1, 1], [1, 2]] feature_values = [3, 0, 1, 2] feature = tf.sparse.SparseTensor( indices=feature_indices, values=feature_values, dense_shape=[2, 3]) feature_datas = tuple( tf.compat.v1.data.Dataset.from_tensor_slices(feature) for _ in range(6)) labels_data = tf.compat.v1.data.Dataset.from_tensor_slices( np.array([[0]] * 2, dtype=np.int32)) dataset = tf.compat.v1.data.Dataset.zip(feature_datas + (labels_data,)) dataset = dataset.repeat() def _map(a, b, c, d, e, f, z): return {'a': a, 'b': b, 'c': c, 'd': d, 'e': e, 'f': f}, z dataset = dataset.map(_map) dataset = dataset.batch(params['batch_size'], drop_remainder=True) return dataset def _model_fn(features, labels, mode, params): """Creates simple TF model using feature_columns to create input layer.""" del params assert mode == model_fn_lib.ModeKeys.TRAIN dense_layer = tf_keras_v1.layers.DenseFeatures(feature_columns) input_layer = dense_layer(features) predictions = tf_keras_v1.__internal__.legacy.layers.dense( input_layer, 1, kernel_initializer=tf.compat.v1.ones_initializer()) loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) optimizer = tf.compat.v1.train.AdagradOptimizer(learning_rate=0.5) optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize( loss, global_step=tf.compat.v1.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss) run_config = create_run_config( iterations_per_loop=1, per_host_input_for_training=input_method) optimization_parameters = ( tpu_estimator.StochasticGradientDescentParameters( learning_rate=static_lr)) embedding_config_spec = tpu_estimator.EmbeddingConfigSpec( feature_columns=feature_columns, optimization_parameters=optimization_parameters) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=4, embedding_config_spec=embedding_config_spec) est.train(input_fn=_input_fn, steps=1) checkpoint_reader = tf.compat.v1.train.NewCheckpointReader( tf.train.latest_checkpoint(est.config.model_dir)) embedding_static = checkpoint_reader.get_tensor( 'dense_features/a_embedding/embedding_weights') embedding_dynamic = checkpoint_reader.get_tensor( 'dense_features/b_embedding/embedding_weights') shared_embedding_static = checkpoint_reader.get_tensor( 'c_d_shared_embedding') shared_embedding_dynamic = checkpoint_reader.get_tensor( 'e_f_shared_embedding') unit_update = embedding_static - 1. unit_update_shared = shared_embedding_static - 1. # The asserts below are only valid if unit updates are not all zero. self.assertFalse(np.allclose(unit_update, 0.)) self.assertFalse((np.allclose(unit_update_shared, 0.))) self.assertAllClose(embedding_dynamic - 1., unit_update * 2) self.assertAllClose(shared_embedding_dynamic - 1., unit_update_shared * 3) # train for another step est.train(input_fn=_input_fn, steps=1) checkpoint_reader2 = tf.compat.v1.train.NewCheckpointReader( tf.train.latest_checkpoint(est.config.model_dir)) embedding_static2 = checkpoint_reader2.get_tensor( 'dense_features/a_embedding/embedding_weights') embedding_dynamic2 = checkpoint_reader2.get_tensor( 'dense_features/b_embedding/embedding_weights') shared_embedding_static2 = checkpoint_reader2.get_tensor( 'c_d_shared_embedding') shared_embedding_dynamic2 = checkpoint_reader2.get_tensor( 'e_f_shared_embedding') self.assertFalse(np.allclose(embedding_static, embedding_static2)) self.assertFalse((np.allclose(shared_embedding_static, shared_embedding_static2))) self.assertAllClose(embedding_dynamic, embedding_dynamic2) self.assertAllClose(shared_embedding_dynamic, shared_embedding_dynamic2) class TPUEstimatorWeightedFeatureColumnTest(TPUEstimatorFeatureColumnTestBase, parameterized.TestCase): @parameterized.named_parameters( ('per_host_v1', _PER_HOST_V1), ('per_host_v2', _PER_HOST_V2)) def test_embedding_with_weighted_categorical_column(self, input_method): num_buckets = 3 embedding_dim = 5 sparse_id_column = tf.feature_column.categorical_column_with_identity( key='ids', num_buckets=num_buckets) weighted_sparse_id_column = tf.feature_column.weighted_categorical_column( categorical_column=sparse_id_column, weight_feature_key='values') embedding_init = np.zeros((num_buckets, embedding_dim)) # Embedding initialized to # 1 1 1 1 1 # 2 2 2 2 2 # 3 3 3 3 3 for i in range(num_buckets): embedding_init[i, :] = [i + 1] * embedding_dim feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column( categorical_column=weighted_sparse_id_column, dimension=embedding_dim, combiner='mean', initializer=tf_keras_v1.initializers.constant(embedding_init)) ] def _input_fn(params): sample_size = 2 dense_shape = (sample_size, num_buckets) indices = ((0, 0), (0, 2), (1, 0), (1, 1)) id_values = (2, 1, 1, 0) weight_values = (0.5, 1.0, 0.2, 0.0) inputs = tf.sparse.SparseTensor( indices=indices, values=id_values, dense_shape=dense_shape) weights = tf.sparse.SparseTensor( indices=indices, values=weight_values, dense_shape=dense_shape) # Setup labels so that the loss is zero labels = np.zeros((sample_size, embedding_dim), dtype=np.float32) for j in range(embedding_dim): # "mean" is the weighted sum divided by the total weight. labels[0, j] = (3 * 0.5 + 2 * 1.0) / (0.5 + 1.0) labels[1, j] = (2 * 0.2 + 1 * 0.0) / (0.2 + 0.0) data = tf.compat.v1.data.Dataset.from_tensor_slices(({ 'ids': inputs, 'values': weights }, labels)) data = data.repeat() data = data.batch(params['batch_size'], drop_remainder=True) return data est = self._create_estimator_with_feature_columns( feature_columns, numeric_check=True, input_method=input_method) est.train(input_fn=_input_fn, steps=1) res = est.evaluate(input_fn=_input_fn, steps=1) self.assertAllClose(res['loss'], 0) @parameterized.named_parameters( ('per_host_v1', _PER_HOST_V1), ('per_host_v2', _PER_HOST_V2)) def test_shared_embedding_with_weighted_categorical_column_and_dataset( self, input_method): num_buckets = 3 embedding_dim = 5 sparse_id_column1 = tf.feature_column.categorical_column_with_identity( key='ids1', num_buckets=num_buckets) weighted_sparse_id_column = tf.feature_column.weighted_categorical_column( categorical_column=sparse_id_column1, weight_feature_key='values') sparse_id_column2 = tf.feature_column.categorical_column_with_identity( key='ids2', num_buckets=num_buckets) # Embedding initialized to # 1 1 1 1 1 # 2 2 2 2 2 # 3 3 3 3 3 embedding_init = np.zeros((num_buckets, embedding_dim)) for i in range(num_buckets): embedding_init[i, :] = [i + 1] * embedding_dim feature_columns = tf.compat.v1.tpu.experimental.shared_embedding_columns( categorical_columns=[weighted_sparse_id_column, sparse_id_column2], dimension=embedding_dim, combiner='sum', initializer=tf_keras_v1.initializers.constant(embedding_init)) def _input_fn(params): sample_size = 2 dense_shape = (sample_size, num_buckets) id1_indices = ((0, 0), (0, 2), (1, 0), (1, 1)) id1_values = (2, 1, 1, 0) id1_weight_values = (1, 2, 3, 0) # test integer weights id2_indices = ((0, 1), (1, 0), (1, 2)) id2_values = (1, 2, 0) inputs1 = tf.sparse.SparseTensor( indices=id1_indices, values=id1_values, dense_shape=dense_shape) inputs1_weights = tf.sparse.SparseTensor( indices=id1_indices, values=id1_weight_values, dense_shape=dense_shape) inputs2 = tf.sparse.SparseTensor( indices=id2_indices, values=id2_values, dense_shape=dense_shape) # Setup labels so that the loss is zero labels = np.zeros((2, embedding_dim * 2), dtype=np.float32) for j in range(embedding_dim): labels[0, j] = 3 * 1 + 2 * 2 labels[1, j] = 2 * 3 for j in range(embedding_dim, embedding_dim * 2): labels[0, j] = 2 labels[1, j] = 3 + 1 data = tf.compat.v1.data.Dataset.from_tensor_slices(({ 'ids1': inputs1, 'ids2': inputs2, 'values': inputs1_weights }, labels)) data = data.repeat() data = data.batch(params['batch_size'], drop_remainder=True) return data est = self._create_estimator_with_feature_columns( feature_columns, numeric_check=True, input_method=input_method) est.train(input_fn=_input_fn, steps=1) res = est.evaluate(input_fn=_input_fn, steps=1) self.assertAllClose(res['loss'], 0) def test_embedding_with_with_weighted_categorical_column_with_vocab_error( self): vocab_list = [str(i) for i in range(_VOCAB_SIZE)] vocab_column = tf.feature_column.categorical_column_with_vocabulary_list( key='x', vocabulary_list=vocab_list, num_oov_buckets=_VOCAB_NUM_BUCKETS - _VOCAB_SIZE) weighted_vocab_column = tf.feature_column.weighted_categorical_column( categorical_column=vocab_column, weight_feature_key='values') # Embedding initialized to # 0 0 0 0 0 ... # 1 1 1 1 1 ... # 2 2 2 2 2 ... # 3 3 3 3 3 ... embedding_init = np.zeros((_VOCAB_SIZE, _VOCAB_EMBEDDING_DIM)) for i in range(_VOCAB_SIZE): embedding_init[i, :] = [i] * _VOCAB_EMBEDDING_DIM embedding_initializer = tf_keras_v1.initializers.constant(embedding_init) feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column( categorical_column=weighted_vocab_column, dimension=_VOCAB_EMBEDDING_DIM, initializer=embedding_initializer), ] def _input_fn(params): # Dense data after vocab -> integer conversion # 2 _ 1 _ _ # 1 3 _ _ _ sample_size = 2 dense_shape = (sample_size, _VOCAB_NUM_BUCKETS) indices = ((0, 0), (0, 2), (1, 0), (1, 1)) id_values = [str(id_value) for id_value in (2, 1, 1, 0)] weight_values = (0.5, 1.0, 0.2, 0.0) inputs = tf.sparse.SparseTensor( indices=indices, values=(id_values), dense_shape=dense_shape) inputs = tf.sparse.SparseTensor( indices=indices, values=id_values, dense_shape=dense_shape) weights = tf.sparse.SparseTensor( indices=indices, values=weight_values, dense_shape=dense_shape) # setup labels to be the same as what input_layer so the loss is zero # Expected input_layer is labels = np.zeros((sample_size, _VOCAB_EMBEDDING_DIM), dtype=np.float32) for j in range(_VOCAB_EMBEDDING_DIM): # "mean" is the weighted sum divided by the total weight. labels[0, j] = (2 * 0.5 + 1 * 1.0) / (0.5 + 1.0) labels[1, j] = (1 * 0.2 + 3 * 0.0) / (0.2 + 0.0) data = tf.compat.v1.data.Dataset.from_tensor_slices(({ 'x': inputs, 'values': weights, }, labels)) data = data.repeat() data = data.batch(params['batch_size'], drop_remainder=True) return data est = self._create_estimator_with_feature_columns( feature_columns, numeric_check=True) with self.assertRaisesRegex( ValueError, 'SparseTensor with string as values are not supported.'): est.train(input_fn=_input_fn, steps=1) def test_embedding_with_weighted_categorical_column_dense_weights_error(self): num_buckets = 5 embedding_dim = 10 sparse_id_column = tf.feature_column.categorical_column_with_identity( key='ids', num_buckets=num_buckets) weighted_sparse_id_column = tf.feature_column.weighted_categorical_column( categorical_column=sparse_id_column, weight_feature_key='values') feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column( categorical_column=weighted_sparse_id_column, dimension=embedding_dim) ] def _input_fn(params): sample_size = 2 dense_shape = (sample_size, num_buckets) indices = ((0, 0), (0, 2), (1, 0), (1, 1)) id_values = (2, 1, 1, 0) weight_values = (0.5, 1.0, 0.2, 0.0) inputs = tf.sparse.SparseTensor( indices=indices, values=id_values, dense_shape=dense_shape) weights = tf.sparse.to_dense( tf.sparse.SparseTensor( indices=indices, values=weight_values, dense_shape=dense_shape)) labels = np.zeros((sample_size, embedding_dim), dtype=np.float32) data = tf.compat.v1.data.Dataset.from_tensor_slices(({ 'ids': inputs, 'values': weights }, labels)) data = data.repeat() data = data.batch(params['batch_size'], drop_remainder=True) return data est = self._create_estimator_with_feature_columns( feature_columns, numeric_check=True) with self.assertRaisesRegex(ValueError, 'Dense weights are not supported on TPU'): est.train(input_fn=_input_fn, steps=1) def test_embedding_with_weighted_categorical_column_share_weights_error(self): num_buckets = 5 embedding_dim = 10 sparse_id_column1 = tf.feature_column.categorical_column_with_identity( key='ids1', num_buckets=num_buckets) weighted_sparse_id_column1 = tf.feature_column.weighted_categorical_column( categorical_column=sparse_id_column1, weight_feature_key='values') sparse_id_column2 = tf.feature_column.categorical_column_with_identity( key='ids2', num_buckets=num_buckets) weighted_sparse_id_column2 = tf.feature_column.weighted_categorical_column( categorical_column=sparse_id_column2, weight_feature_key='values') feature_columns = tf.compat.v1.tpu.experimental.shared_embedding_columns( categorical_columns=[ weighted_sparse_id_column1, weighted_sparse_id_column2 ], dimension=embedding_dim) def _input_fn(params): sample_size = 2 dense_shape = (sample_size, num_buckets) indices = ((0, 0), (0, 2), (1, 0), (1, 1)) id_values = (2, 1, 1, 0) weight_values = (0.5, 1.0, 0.2, 0.0) inputs1 = tf.sparse.SparseTensor( indices=indices, values=id_values, dense_shape=dense_shape) inputs2 = tf.sparse.SparseTensor( indices=indices, values=id_values, dense_shape=dense_shape) weights = tf.sparse.SparseTensor( indices=indices, values=weight_values, dense_shape=dense_shape) labels = np.zeros((sample_size, embedding_dim), dtype=np.float32) data = tf.compat.v1.data.Dataset.from_tensor_slices(({ 'ids1': inputs1, 'ids2': inputs2, 'values': weights }, labels)) data = data.repeat() data = data.batch(params['batch_size'], drop_remainder=True) return data est = self._create_estimator_with_feature_columns( feature_columns, numeric_check=True) with self.assertRaisesRegex( ValueError, 'Please check if the weights are present in feature dict. Also note' ' weight-sharing among weighted_categorical_column is not supported on ' 'TPU.'): est.train(input_fn=_input_fn, steps=1) def _test_tensor_core_embedding(self, shared_embedding, both_embeddings, input_method, use_cpu=False): sparse_column1 = tf.feature_column.categorical_column_with_identity( key='x', num_buckets=10) sparse_column2 = tf.feature_column.categorical_column_with_identity( key='y', num_buckets=10) if shared_embedding: feature_columns = tf.compat.v1.tpu.experimental.shared_embedding_columns( [sparse_column1, sparse_column2], dimension=2, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 2]) else: feature_columns = [ tf.compat.v1.tpu.experimental.embedding_column( categorical_column=sparse_column1, dimension=2, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 2]), ] if both_embeddings: feature_columns.append( tf.compat.v1.tpu.experimental.embedding_column( categorical_column=sparse_column2, dimension=4, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 2])) else: feature_columns.append( tf.compat.v1.tpu.experimental.embedding_column( categorical_column=sparse_column2, dimension=4)) def _input_fn(params): indices = [] for i in range(params['batch_size']): for j in [0, 1]: indices.append([i, j]) feature1_data = tf.compat.v1.data.Dataset.from_tensor_slices( tf.sparse.SparseTensor( indices=indices, values=[1] * (2 * params['batch_size']), dense_shape=[params['batch_size'], 2])) feature2_data = tf.compat.v1.data.Dataset.from_tensor_slices( tf.sparse.SparseTensor( indices=indices, values=[2] * (2 * params['batch_size']), dense_shape=[params['batch_size'], 2])) labels_data = tf.compat.v1.data.Dataset.from_tensor_slices( np.array([[0]] * params['batch_size'], dtype=np.int32)) dataset = tf.compat.v1.data.Dataset.zip( (feature1_data, feature2_data, labels_data)) dataset = dataset.repeat() dataset = dataset.batch(params['batch_size'], drop_remainder=True) def _map(x, y, z): return {'x': x, 'y': y}, z dataset = dataset.map(_map) return dataset est = self._create_estimator_with_feature_columns( feature_columns, use_cpu=use_cpu, input_method=input_method) est.train(input_fn=_input_fn, steps=1) checkpoint_reader = tf.compat.v1.train.NewCheckpointReader( tf.train.latest_checkpoint(est.config.model_dir)) return checkpoint_reader.get_variable_to_shape_map().keys() @parameterized.named_parameters( ('non_shared_single_v1', False, False, _PER_HOST_V1), ('non_shared_both_v1', False, True, _PER_HOST_V1), ('shared_v1', True, True, _PER_HOST_V1), ('non_shared_single_v2', False, False, _PER_HOST_V2), ('non_shared_both_v2', False, True, _PER_HOST_V2), ('shared_v2', True, True, _PER_HOST_V2)) def test_tensor_core_embedding(self, shared, both_embeddings, input_method): cpu_names = self._test_tensor_core_embedding( shared_embedding=shared, both_embeddings=both_embeddings, input_method=input_method, use_cpu=True) tpu_names = self._test_tensor_core_embedding( shared_embedding=shared, both_embeddings=both_embeddings, input_method=input_method, use_cpu=False) # TPU will have some extra variables but all CPU variables should be in the # TPU checkpoint for name in cpu_names: self.assertIn(name, tpu_names) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_evaluation_test.py ================================================ # Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for TPUEstimator evaluation related functionalities.""" from absl import flags import numpy as np import tensorflow as tf from tensorflow.python.training import evaluation from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.export import export_output as export_output_lib from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_estimator from tensorflow_estimator.python.estimator.util import tf_keras_v1 # pylint: enable=g-direct-tensorflow-import flags.DEFINE_integer('test_num_shards', 8, 'number of replicas to test') FLAGS = flags.FLAGS _TRAIN = model_fn_lib.ModeKeys.TRAIN _EVAL = model_fn_lib.ModeKeys.EVAL _PREDICT = model_fn_lib.ModeKeys.PREDICT _PER_HOST = 'per_host_sharding' _PER_SHARD = 'per_shard_sharding' _UNSHARDED = 'unsharded' _INPUT_PIPELINE_WITH_QUEUE_RUNNER = ( 'Input pipeline contains one or more QueueRunners') def dense_computation(features): return tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) def get_model_fn(export_tpu_tensor=True, export_cpu_tensor=False, tpu_estimator_spec=True): def model_fn(features, labels, mode, params): del params loss = None train_op = None predictions = dense_computation(features) export_outputs = None if mode != _PREDICT: loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) optimizer = tf.compat.v1.tpu.CrossShardOptimizer( tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, tf.compat.v1.train.get_global_step()) else: if export_tpu_tensor: key = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY export_outputs = { key: export_output_lib.PredictOutput({ 'prediction': predictions }) } else: export_outputs = {} if export_cpu_tensor: def host_call(predictions): classes = tf.as_string(predictions, name='classes') classification_output = export_output_lib.ClassificationOutput( classes=classes) export_outputs['classification'] = classification_output tf.compat.v1.tpu.outside_compilation(host_call, predictions) if tpu_estimator_spec: spec_type = tpu_estimator.TPUEstimatorSpec else: spec_type = model_fn_lib.EstimatorSpec return spec_type( mode, loss=loss, train_op=train_op, predictions={'predictions': predictions}, export_outputs=export_outputs) return model_fn def dummy_input_fn_with_dataset(batch_size, repeat=True, x=None): if x is None: x = np.random.normal(size=[batch_size, 1]).astype(np.float32) labels = [[2.0]] * batch_size dataset1 = tf.compat.v1.data.Dataset.from_tensor_slices(x) dataset2 = tf.compat.v1.data.Dataset.from_tensor_slices(labels) dataset = tf.compat.v1.data.Dataset.zip((dataset1, dataset2)) if repeat: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) def _map(x, y): return {'x': x}, y return dataset.map(_map) def dummy_input_fn(batch_size, repeat=True): dataset = dummy_input_fn_with_dataset(batch_size, repeat) iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) return iterator.get_next() def create_run_config(iterations_per_loop, **kwargs): return tpu_config.RunConfig( master='', tpu_config=tpu_config.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.test_num_shards, **kwargs), ) class TPUEstimatorEvaluationTest(tf.test.TestCase): def _create_input_fn(self): def _input_fn(params): return dummy_input_fn(params['batch_size']) return _input_fn def _create_head(self, mode, loss, eval_metrics): """Creates a head returning `TPUEstimatorSpec` based on mode.""" if mode == _EVAL: return tpu_estimator.TPUEstimatorSpec( mode=mode, eval_metrics=eval_metrics, loss=loss) # Train optimizer = tf.compat.v1.tpu.CrossShardOptimizer( tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, global_step=tf.compat.v1.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss) def _create_head_with_eval_metric_ops(self, mode, loss, eval_metric_ops): """Creates a head returning `TPUEstimatorSpec` based on mode. This version contains eval that will not run on TPUs, where eval_metric_ops has not been split into a metrics_fn that runs on CPUs. The intent is to test the entire eval (model_fn forward pass) and metrics output on CPU. Args: mode: The mode such as TRAIN, EVAL. loss: Training loss `Tensor`. Must be either scalar, or with shape `[1]`. eval_metric_ops: Dict of metric results keyed by name. Returns: An EstimatorSpec for EVAL or TPUEstimatorSpec otherwise. """ if mode == _EVAL: return model_fn_lib.EstimatorSpec( mode=mode, eval_metric_ops=eval_metric_ops, loss=loss) # Train optimizer = tf.compat.v1.tpu.CrossShardOptimizer( tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, global_step=tf.compat.v1.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss) def _metric_fn_on_cpu(self, labels, predictions): return { 'mse': tf.compat.v1.metrics.mean_absolute_error(labels, predictions), } def _model_fn_without_eval_metrics(self, features, labels, mode, params): del params # unused. predictions = tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) return self._create_head(mode, loss, None) def _model_fn_with_eval_tensor_list(self, features, labels, mode, params): del params # unused. predictions = tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) return self._create_head( mode, loss, eval_metrics=(self._metric_fn_on_cpu, [labels, predictions])) def _model_fn_with_eval_dict(self, features, labels, mode, params): del params # unused. predictions = tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) return self._create_head( mode, loss, eval_metrics=(self._metric_fn_on_cpu, { 'labels': labels, 'predictions': predictions})) def _model_fn_with_eval_metric_ops(self, features, labels, mode, params): del params # unused. predictions = tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) eval_metric_ops = self._metric_fn_on_cpu(labels, predictions) return self._create_head_with_eval_metric_ops( mode, loss, eval_metric_ops) def _test_eval_steps(self, model_fn, expected_eval_steps, iterations): run_config = create_run_config(iterations_per_loop=iterations) est = tpu_estimator.TPUEstimator( model_fn=model_fn, config=run_config, train_batch_size=16, eval_batch_size=16) est.train(self._create_input_fn(), steps=1) class _EvalStepCheckHook(tf.compat.v1.train.SessionRunHook): """Check eval step counter after one session.run. As the evaluation sets the eval iterations as the eval steps, the after_run should be invoked only once. """ def __init__(self, iterations_per_loop, test_case): """Constructs the run hook.""" self._iterations = iterations_per_loop self._invoked = False self._test_case = test_case def before_run(self, run_context): del run_context # For eval on TPU, the hook should be run only once. self._test_case.assertFalse(self._invoked) def after_run(self, run_context, run_values): # To avoid race condition between the eval step read and increment in # evaluation graph, we read the value explicitly here. eval_steps = run_context.session.run( evaluation._get_or_create_eval_step()) self._test_case.assertEqual(expected_eval_steps, eval_steps) self._test_case.assertFalse(self._invoked) self._invoked = True est.evaluate(self._create_input_fn(), steps=expected_eval_steps, hooks=[_EvalStepCheckHook(iterations, self)]) def test_no_eval_metrics(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_without_eval_metrics, config=run_config, train_batch_size=16, eval_batch_size=16) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_steps_not_effected_by_training_iterations(self): self._test_eval_steps( model_fn=self._model_fn_with_eval_tensor_list, expected_eval_steps=2, iterations=4) self._test_eval_steps( model_fn=self._model_fn_with_eval_tensor_list, expected_eval_steps=6, iterations=4) def test_eval_steps_with_no_eval_metrics(self): self._test_eval_steps( model_fn=self._model_fn_without_eval_metrics, expected_eval_steps=6, iterations=1) def test_eval_metrics_with_tensor_list(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_tensor_list, config=run_config, train_batch_size=16, eval_batch_size=16) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_batch_size_with_non_divisible_num_shards_broadcast_mode(self): run_config = create_run_config( iterations_per_loop=2, per_host_input_for_training=tpu_config.InputPipelineConfig.BROADCAST) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_tensor_list, config=run_config, train_batch_size=7, eval_batch_size=7) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_metrics_with_tensor_list_on_cpu(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_tensor_list, config=run_config, train_batch_size=16, eval_batch_size=16, use_tpu=False) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_metrics_with_dict(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_dict, config=run_config, train_batch_size=16, eval_batch_size=16) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_metrics_with_dict_on_cpu(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_dict, config=run_config, train_batch_size=16, eval_batch_size=16, use_tpu=False) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_metrics_ops_cpu_training(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_metric_ops, config=run_config, train_batch_size=16, eval_batch_size=16, use_tpu=False, eval_on_tpu=False) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_metrics_ops_cpu_training_warning(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_metric_ops, config=run_config, train_batch_size=16, eval_batch_size=16, use_tpu=False, # eval_on_tpu is ignored if use_tpu is False eval_on_tpu=True) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_metrics_ops_tpu_training(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_metric_ops, config=run_config, train_batch_size=16, eval_batch_size=16, use_tpu=True, eval_on_tpu=False) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_metrics_ops_tpu_training_failure(self): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_metric_ops, config=run_config, train_batch_size=16, eval_batch_size=16, use_tpu=True, # Generates an error on eval, because model_fn(mode=EVAL) # has not been split into an eval_metrics_fn. eval_on_tpu=True) est.train(self._create_input_fn(), steps=1) with self.assertRaisesRegex( RuntimeError, 'TPU evaluation must have type`TPUEstimatorSpec`'): est.evaluate(self._create_input_fn(), steps=1) def test_error_out_if_steps_is_float(self): with self.assertRaisesRegex(TypeError, 'must be int'): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_dict, config=run_config, train_batch_size=16, eval_batch_size=16, use_tpu=True) est.evaluate(self._create_input_fn(), steps=12.3) def test_error_out_if_steps_is_invalid(self): with self.assertRaisesRegex(ValueError, 'must be positive'): run_config = create_run_config(iterations_per_loop=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_dict, config=run_config, train_batch_size=16, eval_batch_size=16, use_tpu=True) est.evaluate(self._create_input_fn(), steps=-321) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_export_test.py ================================================ # Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for TPUEstimator export related functionalities.""" from absl import flags from absl.testing import parameterized import numpy as np import os import tempfile import tensorflow.compat.v1 as tf # pylint: disable=g-direct-tensorflow-import from tensorflow.core.example import example_pb2 from tensorflow.python import data as dataset_lib from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops.losses import losses from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.saved_model import loader from tensorflow.python.saved_model import loader_impl from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import tag_constants from tensorflow.python.training import training from tensorflow.python.util import compat from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.export import export_lib from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_estimator from tensorflow_estimator.python.estimator.util import tf_keras_v1 # pylint: enable=g-direct-tensorflow-import flags.DEFINE_integer('test_num_shards', 8, 'number of replicas to test') FLAGS = flags.FLAGS _TRAIN = model_fn_lib.ModeKeys.TRAIN _EVAL = model_fn_lib.ModeKeys.EVAL _PREDICT = model_fn_lib.ModeKeys.PREDICT _PER_HOST = 'per_host_sharding' _PER_SHARD = 'per_shard_sharding' _UNSHARDED = 'unsharded' _INPUT_PIPELINE_WITH_QUEUE_RUNNER = ( 'Input pipeline contains one or more QueueRunners') def dense_computation(features): return tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=init_ops.zeros_initializer()) def get_model_fn(export_tpu_tensor=True, export_cpu_tensor=False, tpu_estimator_spec=True): def model_fn(features, labels, mode, params): del params loss = None train_op = None predictions = dense_computation(features) export_outputs = None if mode != _PREDICT: loss = losses.mean_squared_error(labels, predictions) optimizer = tf.tpu.CrossShardOptimizer( training.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, training.get_global_step()) else: if export_tpu_tensor: key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY export_outputs = { key: export_lib.PredictOutput({ 'prediction': predictions }) } else: export_outputs = {} if export_cpu_tensor: def host_call(predictions): return string_ops.as_string(predictions, name='classes') classes = tf.tpu.outside_compilation(host_call, predictions) classification_output = export_lib.ClassificationOutput( classes=classes) export_outputs['classification'] = classification_output if tpu_estimator_spec: spec_type = tpu_estimator.TPUEstimatorSpec else: spec_type = model_fn_lib.EstimatorSpec return spec_type( mode, loss=loss, train_op=train_op, predictions={'predictions': predictions}, export_outputs=export_outputs) return model_fn def dummy_input_fn_with_dataset(batch_size, repeat=True, x=None): if x is None: x = np.random.normal(size=[batch_size, 1]).astype(np.float32) labels = [[2.0]] * batch_size dataset1 = dataset_lib.Dataset.from_tensor_slices(x) dataset2 = dataset_lib.Dataset.from_tensor_slices(labels) dataset = dataset_lib.Dataset.zip((dataset1, dataset2)) if repeat: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) def _map(x, y): return {'x': x}, y return dataset.map(_map) def dummy_input_fn(batch_size, repeat=True): dataset = dummy_input_fn_with_dataset(batch_size, repeat) iterator = dataset_ops.make_one_shot_iterator(dataset) return iterator.get_next() def create_run_config(iterations_per_loop, **kwargs): if 'num_shards' not in kwargs: kwargs['num_shards'] = FLAGS.test_num_shards return tpu_config.RunConfig( master='', tpu_config=tpu_config.TPUConfig( iterations_per_loop=iterations_per_loop, **kwargs), ) class TPUEstimatorExportTest(parameterized.TestCase): def setUp(self): super().setUp() feature_spec = {'x': parsing_ops.FixedLenFeature([1], dtypes.float32)} self._serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) feature_spec = { 'x': array_ops.placeholder(dtype=dtypes.float32, shape=(2, 1), name='x'), } label_spec = array_ops.placeholder( dtype=dtypes.float32, shape=(1, 1), name='truth') self._supervised_input_receiver_fn = ( export_lib.build_raw_supervised_input_receiver_fn( feature_spec, label_spec)) @parameterized.parameters( (True, False, False), (True, True, False), (False, True, False), (True, False, True), (True, True, True), (False, True, True)) def test_export_tpu_savedmodel_e2e(self, export_tpu_tensor, export_cpu_tensor, use_export_mode_v2): tmpdir = tempfile.mkdtemp() def _input_fn(params): return dummy_input_fn(params['batch_size']) model_fn = get_model_fn(export_tpu_tensor, export_cpu_tensor) run_config = create_run_config(iterations_per_loop=4) if use_export_mode_v2: export_api_version = tpu_estimator.ExportSavedModelApiVersion.V2 batch_config = tpu_estimator.BatchConfig( num_batch_threads=1, max_batch_size=1, batch_timeout_micros=100, allowed_batch_sizes=[1]) def tpu_model_fn(features, labels, mode, params): if mode == _PREDICT and params['use_tpu']: return tpu_estimator.model_fn_inference_on_tpu( model_fn, features, labels, mode, params, batch_config) else: return model_fn(features, labels, mode, params) est_model_fn = tpu_model_fn else: export_api_version = tpu_estimator.ExportSavedModelApiVersion.V1 est_model_fn = model_fn est = tpu_estimator.TPUEstimator( model_fn=est_model_fn, config=run_config, train_batch_size=16, export_to_tpu=True, export_saved_model_api_version=export_api_version) est.train(_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, self._serving_input_receiver_fn) self._validate_export(export_dir_base, export_dir, export_tpu_tensor, export_cpu_tensor) # Clean up. gfile.DeleteRecursively(tmpdir) def _validate_export(self, export_dir_base, export_dir, export_tpu_tensor, export_cpu_tensor): # Check that all the files are in the right places. self.assertTrue(gfile.Exists(export_dir_base)) self.assertTrue(gfile.Exists(export_dir)) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes('saved_model.pb')))) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes('variables')))) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes('variables/variables.index')))) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes('variables/variables.data-00000-of-00001')))) def session_run(): example = example_pb2.Example() example.features.feature['x'].float_list.value.append(1) tensor_name_prediction = None tensor_name_classes = None if export_tpu_tensor: key_prediction = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY tensor_name_prediction = ( meta_graph.signature_def[key_prediction]. outputs['prediction'].name) tensor_name_input = (meta_graph.signature_def[key_prediction]. inputs['examples'].name) if export_cpu_tensor: key_classification = 'classification' tensor_name_classes = (meta_graph.signature_def[key_classification]. outputs['classes'].name) tensor_name_input = (meta_graph.signature_def[key_classification]. inputs['inputs'].name) if export_tpu_tensor: sess.run( tensor_name_prediction, feed_dict={tensor_name_input: [example.SerializeToString()]}) if export_cpu_tensor: sess.run( tensor_name_classes, feed_dict={tensor_name_input: [example.SerializeToString()]}) if export_cpu_tensor and export_tpu_tensor: sess.run( [tensor_name_prediction, tensor_name_classes], feed_dict={tensor_name_input: [example.SerializeToString()]}) # Restore, to validate that the export was well-formed. with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: meta_graph = loader.load( sess, [tag_constants.SERVING, tag_constants.TPU], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertIn('input_example_tensor', graph_ops) self.assertIn('ParseExample/ParseExampleV2', graph_ops) self.assertNotIn('dense/kernel/GuaranteeConst', graph_ops) sess.run(tf.tpu.initialize_system()) session_run() # Restore, to validate that the export was well-formed. with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: meta_graph = loader.load(sess, [tag_constants.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertIn('input_example_tensor', graph_ops) self.assertIn('ParseExample/ParseExampleV2', graph_ops) self.assertIn('dense/kernel', graph_ops) # GuaranteeConst ops won't be present in the CPU-only graph. self.assertNotIn('dense/kernel/GuaranteeConst', graph_ops) session_run() def test_export_tpu_savedmodel_export_to_cpu_false(self): # Test that when `export_to_cpu` is `False`, CPU metagraph is not exported. tmpdir = tempfile.mkdtemp() model_fn = get_model_fn(export_tpu_tensor=True, export_cpu_tensor=True) run_config = create_run_config(iterations_per_loop=4) def _input_fn(params): return dummy_input_fn(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=model_fn, config=run_config, train_batch_size=16, export_to_tpu=True, export_to_cpu=False) est.train(_input_fn, steps=1) export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export_no_tpu')) export_dir = est.export_saved_model(export_dir_base, self._serving_input_receiver_fn) saved_model = loader_impl.parse_saved_model(export_dir) self.assertLen(saved_model.meta_graphs, 1) tags = set(saved_model.meta_graphs[0].meta_info_def.tags) self.assertEqual(tags, set([tag_constants.SERVING, tag_constants.TPU])) # Clean up. gfile.DeleteRecursively(tmpdir) def test_export_tpu_savedmodel_export_to_tpu_false(self): # Test that when `export_to_tpu` is `False`, TPU metagraph is not exported. tmpdir = tempfile.mkdtemp() model_fn = get_model_fn(export_tpu_tensor=True, export_cpu_tensor=True) run_config = create_run_config(iterations_per_loop=4) def _input_fn(params): return dummy_input_fn(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=model_fn, config=run_config, train_batch_size=16, export_to_tpu=False) est.train(_input_fn, steps=1) export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export_no_tpu')) export_dir = est.export_saved_model(export_dir_base, self._serving_input_receiver_fn) with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: with self.assertRaisesRegex( RuntimeError, 'MetaGraphDef associated with tags \'serve\', \'tpu\' could not be ' 'found in SavedModel.'): loader.load( sess, [tag_constants.SERVING, tag_constants.TPU], export_dir) loader.load( sess, [tag_constants.SERVING], export_dir) # Clean up. gfile.DeleteRecursively(tmpdir) def test_export_tpu_savedmodel_export_to_tpu_false_eval(self): # Test exporting CPU evaulation graph when `export_to_tpu` is `False`. tmpdir = tempfile.mkdtemp() mode = model_fn_lib.ModeKeys.EVAL model_fn = get_model_fn(export_tpu_tensor=True, export_cpu_tensor=True) run_config = create_run_config(iterations_per_loop=4) def _input_fn(params): return dummy_input_fn(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=model_fn, config=run_config, train_batch_size=16, export_to_tpu=False) est.train(_input_fn, steps=1) export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export_no_tpu_eval')) export_dir = est.export_saved_model( export_dir_base, self._supervised_input_receiver_fn, experimental_mode=mode) # Check that all the files are in the right places. self.assertTrue(gfile.Exists(export_dir_base)) # Restore, to validate that the export was well-formed. tag_set = export_lib.EXPORT_TAG_MAP[mode] with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: loader.load(sess, tag_set, export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertIn('dense/kernel', graph_ops) # Clean up. gfile.DeleteRecursively(tmpdir) def test_export_estimator_savedmodel(self): export_tpu_tensor = True export_cpu_tensor = False tmpdir = tempfile.mkdtemp() def _input_fn(params): del params # Estimator does not pass `batch_size` to `input_fn`. return dummy_input_fn(batch_size=1) model_fn = get_model_fn(export_tpu_tensor=export_tpu_tensor, export_cpu_tensor=export_cpu_tensor, tpu_estimator_spec=False) est = estimator_lib.Estimator(model_fn=model_fn) est.train(_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export')) export_dir = tpu_estimator.export_estimator_savedmodel( est, export_dir_base, self._serving_input_receiver_fn) self._validate_export(export_dir_base, export_dir, export_tpu_tensor, export_cpu_tensor) # Clean up. gfile.DeleteRecursively(tmpdir) def test_regression_output_tensors_roundtrip(self): value = array_ops.placeholder(dtypes.float32, 1, name='value') regression_output = export_lib.RegressionOutput(value) self.assertSequenceEqual( [value], tpu_estimator._export_output_to_tensors(regression_output)) value_new = array_ops.placeholder(dtypes.float32, 1, name='value_new') regression_output_new = ( tpu_estimator._clone_export_output_with_tensors( regression_output, [value_new] ) ) self.assertEqual(value_new, regression_output_new.value) def test_predict_output_tensors_roundtrip(self): value1 = array_ops.placeholder(dtypes.float32, 1, name='value1') value2 = array_ops.placeholder(dtypes.float32, 1, name='value2') predict_output = export_lib.PredictOutput({ 'value1': value1, 'value2': value2 }) export_output_tensors = tpu_estimator._export_output_to_tensors( predict_output) self.assertSameElements([value1, value2], export_output_tensors) self.assertLen(export_output_tensors, 2) tensors_new = [ array_ops.identity(t, name=t.name.split(':')[0] + '_new') for t in export_output_tensors ] predict_output_new = tpu_estimator._clone_export_output_with_tensors( predict_output, tensors_new) outputs = predict_output_new.outputs self.assertLen(outputs, 2) self.assertEqual(outputs['value1'].name, 'value1_new:0') self.assertEqual(outputs['value2'].name, 'value2_new:0') def test_classification_output_tensors_roundtrip_classes_only(self): classes = array_ops.placeholder(dtypes.string, 1, name='classes') classification_output = export_lib.ClassificationOutput( classes=classes) classification_output_tensors = (tpu_estimator. _export_output_to_tensors( classification_output)) self.assertEqual(classification_output_tensors, [None, classes]) classes_new = array_ops.placeholder(dtypes.string, 1, name='classes_new') classification_output_new = (tpu_estimator. _clone_export_output_with_tensors( classification_output, [None, classes_new])) self.assertEqual(classification_output_new.classes, classes_new) def test_classification_output_tensors_roundtrip_scores_only(self): scores = array_ops.placeholder(dtypes.float32, 1, name='scores') classification_output = export_lib.ClassificationOutput( scores=scores) classification_output_tensors = (tpu_estimator. _export_output_to_tensors( classification_output)) self.assertEqual(classification_output_tensors, [scores, None]) scores_new = array_ops.placeholder(dtypes.float32, 1, name='scores_new') classification_output_new = (tpu_estimator. _clone_export_output_with_tensors( classification_output, [scores_new, None])) self.assertEqual(classification_output_new.scores, scores_new) def test_classification_output_tensors_roundtrip_classify_both(self): classes = array_ops.placeholder(dtypes.string, 1, name='classes') scores = array_ops.placeholder(dtypes.float32, 1, name='scores') classification_output = export_lib.ClassificationOutput( scores, classes) classification_output_tensors = (tpu_estimator. _export_output_to_tensors( classification_output)) self.assertSequenceEqual(classification_output_tensors, [scores, classes]) classes_new = array_ops.placeholder(dtypes.string, 1, name='classes_new') scores_new = array_ops.placeholder(dtypes.float32, 1, name='scores_new') classification_output_new = (tpu_estimator. _clone_export_output_with_tensors( classification_output, [scores_new, classes_new])) self.assertEqual(classification_output_new.classes, classes_new) self.assertEqual(classification_output_new.scores, scores_new) def get_model_fn_v2(): def model_fn(features, labels, mode, params): loss = None train_op = None export_outputs = None # This could be some pre-processing on CPU like calls to input layer with # embedding columns. x2 = features['x'] * 2 def computation(input_tensor): return tf_keras_v1.__internal__.legacy.layers.dense( input_tensor, 1, kernel_initializer=init_ops.zeros_initializer()) if mode != _PREDICT: predictions = computation(x2) loss = losses.mean_squared_error(labels, predictions) optimizer = tf.tpu.CrossShardOptimizer( training.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, training.get_global_step()) else: inputs = [x2] if params['use_tpu']: predictions = array_ops.identity( tpu_estimator.inference_on_tpu( computation, inputs, num_batch_threads=1, max_batch_size=2, batch_timeout_micros=100), name='predictions') else: predictions = array_ops.identity( computation(*inputs), name='predictions') key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY export_outputs = { key: export_lib.PredictOutput({'prediction': predictions}) } classes = string_ops.as_string(predictions, name='classes') classification_output = export_lib.ClassificationOutput(classes=classes) export_outputs['classification'] = classification_output return tpu_estimator.TPUEstimatorSpec( mode, loss=loss, train_op=train_op, predictions={'predictions': predictions}, export_outputs=export_outputs) return model_fn class TPUEstimatorExportV2Test(parameterized.TestCase): def setUp(self): super().setUp() feature_spec = {'x': parsing_ops.FixedLenFeature([1], dtypes.float32)} self._serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) def test_export_tpu_savedmodel_e2e(self): tmpdir = tempfile.mkdtemp() def _input_fn(params): return dummy_input_fn(params['batch_size']) model_fn = get_model_fn_v2() run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=model_fn, config=run_config, train_batch_size=16, export_to_tpu=True, export_saved_model_api_version=tpu_estimator.ExportSavedModelApiVersion .V2) est.train(_input_fn, steps=1) # Perform the export. export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export')) export_dir = est.export_saved_model(export_dir_base, self._serving_input_receiver_fn) self._validate_export(export_dir_base, export_dir) # Clean up. gfile.DeleteRecursively(tmpdir) def _validate_export(self, export_dir_base, export_dir): # Check that all the files are in the right places. self.assertTrue(gfile.Exists(export_dir_base)) self.assertTrue(gfile.Exists(export_dir)) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes('saved_model.pb')))) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes('variables')))) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes('variables/variables.index')))) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes('variables/variables.data-00000-of-00001')))) def session_run(): example = example_pb2.Example() example.features.feature['x'].float_list.value.append(1) tensor_name_prediction = None tensor_name_classes = None key_prediction = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY tensor_name_prediction = ( meta_graph.signature_def[key_prediction].outputs['prediction'].name) key_classification = 'classification' tensor_name_classes = ( meta_graph.signature_def[key_classification].outputs['classes'].name) sess.run( tensor_name_prediction, feed_dict={'input_example_tensor:0': [example.SerializeToString()]}) sess.run( tensor_name_classes, feed_dict={'input_example_tensor:0': [example.SerializeToString()]}) sess.run( [tensor_name_prediction, tensor_name_classes], feed_dict={'input_example_tensor:0': [example.SerializeToString()]}) # Restore, to validate that the export was well-formed. with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: meta_graph = loader.load(sess, [tag_constants.SERVING, tag_constants.TPU], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertIn('input_example_tensor', graph_ops) self.assertIn('ParseExample/ParseExampleV2', graph_ops) self.assertNotIn('dense/kernel/GuaranteeConst', graph_ops) self.assertIn('batch/BatchFunction', graph_ops) sess.run(tf.tpu.initialize_system()) session_run() # Restore, to validate that the export was well-formed. with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: meta_graph = loader.load(sess, [tag_constants.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertIn('input_example_tensor', graph_ops) self.assertIn('ParseExample/ParseExampleV2', graph_ops) self.assertIn('dense/kernel', graph_ops) # GuaranteeConst ops won't be present in the CPU-only graph. self.assertNotIn('dense/kernel/GuaranteeConst', graph_ops) session_run() def test_export_tpu_savedmodel_export_to_tpu_false(self): # Test that when `export_to_tpu` is `False`, TPU metagraph is not exported. tmpdir = tempfile.mkdtemp() model_fn = get_model_fn_v2() run_config = create_run_config(iterations_per_loop=4) def _input_fn(params): return dummy_input_fn(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=model_fn, config=run_config, train_batch_size=16, export_to_tpu=False, export_saved_model_api_version=tpu_estimator.ExportSavedModelApiVersion .V2) est.train(_input_fn, steps=1) export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export_no_tpu')) export_dir = est.export_saved_model(export_dir_base, self._serving_input_receiver_fn) with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: with self.assertRaisesRegex( RuntimeError, 'MetaGraphDef associated with tags \'serve\', \'tpu\' could not be ' 'found in SavedModel.'): loader.load(sess, [tag_constants.SERVING, tag_constants.TPU], export_dir) loader.load(sess, [tag_constants.SERVING], export_dir) # Clean up. gfile.DeleteRecursively(tmpdir) if __name__ == '__main__': tf.disable_v2_behavior() test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_gradients_test.py ================================================ # Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests to check gradients of TPUEstimator + TPU Embeddings.""" import math import tempfile from absl import flags import numpy as np import tensorflow.compat.v1 as tf from tensorflow_estimator.python.estimator.util import tf_keras_v1 from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_estimator flags.DEFINE_integer('test_num_shards', 2, 'number of replicas to test') FLAGS = flags.FLAGS LEARNING_RATE = 0.12 HIDDEN_LAYER_SIZE = 20 KERNEL_INIT_VALUE = 0.1 BIAS_INIT_VALUE = 0.2 ADADGRAD_INIT_VALUE = 0.1 BUCKET_SIZE = 8 EMBEDDING_DIM = 3 KEY_NAME = 'x' GRAD_MULTIPLIER = 1000. BIAS_VAR = 'dense/bias:0' CPU_EMBEDDING_VAR = 'dense_features/x_embedding/embedding_weights:0' CPU_EMBEDDING_ACCUM_VAR = 'dense_features/x_embedding/embedding_weights/Adagrad:0' TPU_EMBEDDING_VAR = 'dense_features/x_embedding/embedding_weights/part_0:0' TPU_EMBEDDING_ACCUM_VAR = 'dense_features/x_embedding/embedding_weights/Adagrad/part_0:0' # This test must be running with "--xla_jf_conv_full_precision=true", DEFAULT_TOL = 1e-6 def create_model_fn(feature_columns, optimizer_type='adagrad'): def model_fn(features, labels, mode, params): del params dense_features = tf_keras_v1.layers.DenseFeatures(feature_columns) input_layer = dense_features(features) hidden_layer = tf_keras_v1.__internal__.legacy.layers.dense( input_layer, HIDDEN_LAYER_SIZE, kernel_initializer=tf.constant_initializer(KERNEL_INIT_VALUE), bias_initializer=tf.constant_initializer(BIAS_INIT_VALUE)) last_layer = tf.reduce_sum(hidden_layer, axis=1) logits = tf.reshape(last_layer, [-1]) labels = tf.reshape(labels, [-1]) losses = tf.square(labels - logits) # Use reduce_mean to match the CrossShardOptimizer reduction. loss = tf.reduce_mean(losses) if optimizer_type == 'adagrad': optimizer = tf.train.AdagradOptimizer( LEARNING_RATE, initial_accumulator_value=ADADGRAD_INIT_VALUE) elif optimizer_type == 'sgd': optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) else: raise ValueError('{} is not supported.'.format(optimizer_type)) # Default reduction=tf.losses.Reduction.MEAN optimizer = tf.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op) return model_fn def get_estimator(use_tpu, output_dir, feature_columns, batch_size, optimizer_type='adagrad', grad_multiplier_fn=None): run_config = tpu_config.RunConfig( master='', model_dir=output_dir, session_config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False), tpu_config=tpu_config.TPUConfig( iterations_per_loop=1, num_shards=FLAGS.test_num_shards, per_host_input_for_training=( tpu_config.InputPipelineConfig.PER_HOST_V2)), save_checkpoints_steps=1) if optimizer_type == 'adagrad': optimization_parameters = tpu_estimator.AdagradParameters( LEARNING_RATE, ADADGRAD_INIT_VALUE, use_gradient_accumulation=False) elif optimizer_type == 'sgd': optimization_parameters = tpu_estimator.StochasticGradientDescentParameters( LEARNING_RATE) estimator = tpu_estimator.TPUEstimator( model_fn=create_model_fn(feature_columns, optimizer_type), use_tpu=use_tpu, config=run_config, train_batch_size=batch_size, eval_batch_size=batch_size, embedding_config_spec=tpu_estimator.EmbeddingConfigSpec( feature_columns=feature_columns, optimization_parameters=optimization_parameters, experimental_gradient_multiplier_fn=grad_multiplier_fn)) return estimator def get_feature_columns(): initializer = tf.zeros_initializer() column = tf.feature_column.categorical_column_with_identity( key=KEY_NAME, num_buckets=BUCKET_SIZE) embedding_fc = tf.tpu.experimental.embedding_column( column, dimension=EMBEDDING_DIM, combiner='mean', initializer=initializer) all_fc = [embedding_fc] return all_fc class _EmbeddingVariableHook(tf.train.SessionRunHook): """A hook to record the embedding variable.""" def __init__(self, use_tpu, include_slot_vars=True): self._use_tpu = use_tpu self._include_slot_vars = include_slot_vars def _set_bias_var(self): self._bias_var = [v for v in tf.trainable_variables() if v.name == BIAS_VAR] def begin(self): search_var = TPU_EMBEDDING_VAR if self._use_tpu else CPU_EMBEDDING_VAR self._var = [v for v in tf.global_variables() if v.name == search_var][0] if self._include_slot_vars: search_accum_var = TPU_EMBEDDING_ACCUM_VAR if self._use_tpu else CPU_EMBEDDING_ACCUM_VAR self._slot_var = [ v for v in tf.global_variables() if v.name == search_accum_var ][0] self._set_bias_var() self.bias_values = [] self.var_values = [] self.slot_var_values = [] def after_create_session(self, session, coord): del coord self.bias_values.append(session.run(self._bias_var)) self.var_values.append(session.run(self._var)) if self._include_slot_vars: self.slot_var_values.append(session.run(self._slot_var)) def after_run(self, run_context, run_values): self.bias_values.append(run_context.session.run(self._bias_var)) self.var_values.append(run_context.session.run(self._var)) if self._include_slot_vars: self.slot_var_values.append(run_context.session.run(self._slot_var)) def get_activation_gradients(label): """Gets the sample gradient w.r.t activation according to the model_fn.""" # The sample loss is (label - logits)**2, where # logits = \sum_j^HIDDEN_LAYER_SIZE ( # \sum_i^EMBEDDING_DIM w_i * kernel + bias) # # Note kernel and bias are both constant initializer in this test. # # So, gradients of loss w.r.t w_i is # grads = 2 * (label - logits) gradients( logits w.r.t. w_i) # = 2 * (label - logits) (-1 * HIDDEN_LAYER_SIZE * kernel) # # Given the weights are zero initializer, # grads = - 2 HIDDEN_LAYER_SIZE * kernel (label - HIDDEN_LAYER_SIZE * bias) return -2 * HIDDEN_LAYER_SIZE * KERNEL_INIT_VALUE * ( label - HIDDEN_LAYER_SIZE * BIAS_INIT_VALUE) def get_embedding_update(gradient, previous_accum_inc=0.0): """Gets the embedding update according to Adagrad. Args: gradient: the embedding gradient. previous_accum_inc: The previous total accumulator increment (in addition to the initialize value) Returns: the value to apply gradient. """ return -LEARNING_RATE * ( gradient / math.sqrt(ADADGRAD_INIT_VALUE + previous_accum_inc + gradient**2)) def dense_to_sparse(dense_tensor, out_type, ignore_value=-1): indices = tf.where( tf.not_equal(dense_tensor, tf.constant(ignore_value, dense_tensor.dtype))) values = tf.gather_nd(dense_tensor, indices) shape = tf.shape(dense_tensor, out_type=out_type) return tf.SparseTensor(indices, values, shape) class TPUEstimatorGradientsSimpleTest(tf.test.TestCase): """Test gradients for different Ids in global batch. In all examples examined by this test, in one global batch, each embedding ID appears only once. So, we can expect the embedding variable and accumulate variable will be same after one CPU training and TPU training. For more complicated example, each ID can appear multiple times in one core mini-batch and across multiple cores, see TPUEstimatorGradientsWithIdCollisionTest. """ def setUp(self): self._model_dir = tempfile.mkdtemp() def _input_fn(self, params): # This input_fn returns a tuple of sparse tensor and a dense tensor in # sequence. # sample 0: sparse tensor value [0] dense (target) [1] # sample 1: sparse tensor value [1] dense (target) [2] # sample 2: sparse tensor value [2] dense (target) [3] # ... batch_size = params['batch_size'] ds = tf.data.Dataset.range(8) def _map_fn(index): index = tf.reshape(index, [1]) dense_tensor = tf.cast(index + 1, tf.float32) return ({KEY_NAME: dense_to_sparse(index, tf.int64)}, dense_tensor) ds = ds.map(_map_fn) ds = ds.batch(batch_size, drop_remainder=True) return ds def test_input_fn(self): ds = self._input_fn({'batch_size': 1}) gn = ds.make_one_shot_iterator().get_next() with tf.Session() as sess: for i in range(8): features, dense_tensor = sess.run(gn) sparse_tensor = features[KEY_NAME] self.assertAllEqual([[0, 0]], sparse_tensor.indices) self.assertAllEqual([i], sparse_tensor.values) self.assertAllEqual([[i + 1]], dense_tensor) tf.reset_default_graph() ds = self._input_fn({'batch_size': 2}) gn = ds.make_one_shot_iterator().get_next() with tf.Session() as sess: for i in range(4): features, dense_tensor = sess.run(gn) sparse_tensor = features[KEY_NAME] self.assertAllEqual([[0, 0], [1, 0]], sparse_tensor.indices) self.assertAllEqual([i * 2, i * 2 + 1], sparse_tensor.values) self.assertAllEqual([[i * 2 + 1], [i * 2 + 2]], dense_tensor) def assert_embedding_variables(self, gradients_for_embedding, hand_calculated_embedding_values, values_in_hook, tol=DEFAULT_TOL): """Assert the embedding variables after training one step.""" expected_embedding_var_values = [] # Before training, all zeros (zeros_initializer) expected_embedding_var_values.append(np.zeros((BUCKET_SIZE, EMBEDDING_DIM))) after_training_var_values = np.zeros((BUCKET_SIZE, EMBEDDING_DIM)) embedding_row_value_after_one_step = [ get_embedding_update(g) for g in gradients_for_embedding ] for i in range(len(embedding_row_value_after_one_step)): after_training_var_values[i][:] = embedding_row_value_after_one_step[i] expected_embedding_var_values.append(after_training_var_values) # Check against hand calculated value. self.assertAllClose(hand_calculated_embedding_values, embedding_row_value_after_one_step) # Check against the value recorded during training. self.assertAllClose( expected_embedding_var_values, values_in_hook, atol=tol, rtol=tol) def assert_embedding_slot_variables(self, gradients_for_embedding, hand_calculated_embedding_slot_values, values_in_hook, tol): """Assert the embedding slot variables after training one step.""" expected_embedding_slot_var_values = [] # Before training, all same (ADADGRAD_INIT_VALUE) expected_embedding_slot_var_values.append( np.ones((BUCKET_SIZE, EMBEDDING_DIM)) * ADADGRAD_INIT_VALUE) after_training_slot_var_values = np.zeros((BUCKET_SIZE, EMBEDDING_DIM)) accumulator_sum = [ ADADGRAD_INIT_VALUE + g * g for g in gradients_for_embedding ] for i in range(len(accumulator_sum)): after_training_slot_var_values[i][:] = accumulator_sum[i] expected_embedding_slot_var_values.append(after_training_slot_var_values) # Check against hand calculated value. self.assertAllClose(hand_calculated_embedding_slot_values, accumulator_sum) # Check against the value recorded during training. self.assertAllClose( expected_embedding_slot_var_values, values_in_hook, atol=tol, rtol=tol) def test_one_sample_per_core(self): use_tpu = True per_core_batch_size = 1 num_shards = FLAGS.test_num_shards batch_size = num_shards * per_core_batch_size hook = _EmbeddingVariableHook(use_tpu=use_tpu) estimator = get_estimator(use_tpu, self._model_dir, get_feature_columns(), batch_size) estimator.train(self._input_fn, steps=1, hooks=[hook]) # After training one step, the core 0 gets one sample with ID 0, and core 1 # gets one sample with ID 1. So, all other IDs' embedding vars remain as # zeros. gradients_for_embedding = [ get_activation_gradients(label=1), get_activation_gradients(label=2) ] gradients_for_embedding += [0] * (BUCKET_SIZE - batch_size) # Scale the gradients by 1/num_shards as CrossShardOptimizer scales the # loss for MEAN reduction. gradients_for_embedding = np.array(gradients_for_embedding) / num_shards hand_calculated_embedding_values = [0] * BUCKET_SIZE # Gradients are 6.0 and 4.0. the embedding value should # - LEARNING_RATE* x / math.sqrt(ADADGRAD_INIT_VALUE + x*x) hand_calculated_embedding_values[:2] = [ -0.1198336797537491, -0.11962674870701442 ] self.assert_embedding_variables( gradients_for_embedding=gradients_for_embedding, hand_calculated_embedding_values=hand_calculated_embedding_values, values_in_hook=hook.var_values, tol=DEFAULT_TOL) hand_calculated_embedding_slot_values = [ADADGRAD_INIT_VALUE] * BUCKET_SIZE hand_calculated_embedding_slot_values[0] += 6.0**2 hand_calculated_embedding_slot_values[1] += 4.0**2 self.assert_embedding_slot_variables( gradients_for_embedding=gradients_for_embedding, hand_calculated_embedding_slot_values=( hand_calculated_embedding_slot_values), values_in_hook=hook.slot_var_values, tol=DEFAULT_TOL) def test_one_sample_per_core_tpu_vs_cpu(self): use_tpu = True per_core_batch_size = 1 num_shards = FLAGS.test_num_shards batch_size = num_shards * per_core_batch_size # TPU tpu_hook = _EmbeddingVariableHook(use_tpu=use_tpu) estimator = get_estimator(use_tpu, self._model_dir + '_tpu', get_feature_columns(), batch_size) estimator.train(self._input_fn, steps=1, hooks=[tpu_hook]) # CPU use_tpu = False cpu_hook = _EmbeddingVariableHook(use_tpu=use_tpu) cpu_estimator = get_estimator(use_tpu, self._model_dir + '_cpu', get_feature_columns(), batch_size) cpu_estimator.train(self._input_fn, steps=1, hooks=[cpu_hook]) tol = DEFAULT_TOL self.assertAllClose( tpu_hook.var_values, cpu_hook.var_values, atol=tol, rtol=tol) self.assertAllClose( tpu_hook.slot_var_values, cpu_hook.slot_var_values, atol=tol, rtol=tol) # Also check dense. self.assertAllClose( tpu_hook.bias_values, cpu_hook.bias_values, atol=tol, rtol=tol) def test_multi_samples_per_core(self): use_tpu = True per_core_batch_size = 2 num_shards = FLAGS.test_num_shards batch_size = num_shards * per_core_batch_size hook = _EmbeddingVariableHook(use_tpu=use_tpu) estimator = get_estimator(use_tpu, self._model_dir, get_feature_columns(), batch_size) estimator.train(self._input_fn, steps=1, hooks=[hook]) # After training one step, the core 0 gets two samples with ID 0 and 1. For # core 1 gets two samples with ID 2 and 3. So, all other IDs' embedding vars # remain as zeros. gradients_for_embedding = [ get_activation_gradients(label=1), get_activation_gradients(label=2), get_activation_gradients(label=3), get_activation_gradients(label=4) ] gradients_for_embedding += [0] * (BUCKET_SIZE - batch_size) gradients_for_embedding = np.array(gradients_for_embedding) # Scale the gradients by 1/ per_core_batch_size, as for each core the loss # is mean loss. gradients_for_embedding /= per_core_batch_size # Further scale the gradients by 1/num_shards as CrossShardOptimizer scales # the loss for MEAN reduction. gradients_for_embedding /= num_shards # Gradients are 3.0, 2.0, 1.0, and 0.0. the embedding value should # - LEARNING_RATE* x / math.sqrt(ADADGRAD_INIT_VALUE + x*x) hand_calculated_embedding_values = [0] * BUCKET_SIZE hand_calculated_embedding_values[:2] = [-0.119338837, -0.118527551] hand_calculated_embedding_values[2:4] = [-0.1144155107094, 0] self.assert_embedding_variables( gradients_for_embedding=gradients_for_embedding, hand_calculated_embedding_values=hand_calculated_embedding_values, values_in_hook=hook.var_values, tol=DEFAULT_TOL) hand_calculated_embedding_slot_values = [ADADGRAD_INIT_VALUE] * BUCKET_SIZE hand_calculated_embedding_slot_values[0] += 3.0**2 hand_calculated_embedding_slot_values[1] += 2.0**2 hand_calculated_embedding_slot_values[2] += 1.0**2 self.assert_embedding_slot_variables( gradients_for_embedding=gradients_for_embedding, hand_calculated_embedding_slot_values=( hand_calculated_embedding_slot_values), values_in_hook=hook.slot_var_values, tol=DEFAULT_TOL) def test_multi_samples_per_core_tpu_vs_cpu(self): use_tpu = True per_core_batch_size = 2 num_shards = FLAGS.test_num_shards batch_size = num_shards * per_core_batch_size # TPU tpu_hook = _EmbeddingVariableHook(use_tpu=use_tpu) estimator = get_estimator(use_tpu, self._model_dir + '_tpu', get_feature_columns(), batch_size) estimator.train(self._input_fn, steps=1, hooks=[tpu_hook]) # CPU use_tpu = False cpu_hook = _EmbeddingVariableHook(use_tpu=use_tpu) cpu_estimator = get_estimator(use_tpu, self._model_dir + '_cpu', get_feature_columns(), batch_size) cpu_estimator.train(self._input_fn, steps=1, hooks=[cpu_hook]) tol = DEFAULT_TOL self.assertAllClose( tpu_hook.var_values, cpu_hook.var_values, atol=tol, rtol=tol) self.assertAllClose( tpu_hook.slot_var_values, cpu_hook.slot_var_values, atol=tol, rtol=tol) # Also check dense. self.assertAllClose( tpu_hook.bias_values, cpu_hook.bias_values, atol=tol, rtol=tol) class TPUEstimatorGradientsWithIdCollisionTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def _input_fn(self, params): # This input_fn is expected to be called twice each having a batch_size 2. # The first output will be # label = [1, 2] # sparse inputs: SarseTensorValue( # indices=array([[0, 0], [0, 1], # [1, 0], [1, 1]]), # values=array([0, 1, # 1, 2]), # dense_shape=array([2, 2])) # # The second output will be # label = [3, 4] # sparse inputs: SarseTensorValue( # indices=array([[0, 0], [0, 1], # [1, 0], [1, 1]]), # values=array([1, 2, # 2, 3]), # dense_shape=array([2, 2])) # # So, each sample has two ids. Each core gets two samples, which share some # ids. And different cores share ids also. batch_size = params['batch_size'] self.assertTrue(batch_size == 2 or batch_size == 4) ds = tf.data.Dataset.range(8) def _map_fn(index): x = tf.floordiv(index, 2) y = tf.floormod(index, 2) label = tf.cast(index + 1, tf.float32) label = tf.reshape(label, [1]) target_dense = tf.stack([x + y, x + y + 1]) return ({KEY_NAME: dense_to_sparse(target_dense, tf.int64)}, label) ds = ds.map(_map_fn) ds = ds.batch(batch_size, drop_remainder=True) return ds def test_input_fn(self): ds = self._input_fn({'batch_size': 2}) gn = ds.make_one_shot_iterator().get_next() with tf.Session() as sess: # First call features, label = sess.run(gn) sparse_tensor = features[KEY_NAME] self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1]], sparse_tensor.indices) self.assertAllEqual([ 0, 1, 1, 2, ], sparse_tensor.values) self.assertAllEqual([[1], [2]], label) # second call features, label = sess.run(gn) sparse_tensor = features[KEY_NAME] self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1]], sparse_tensor.indices) self.assertAllEqual([ 1, 2, 2, 3, ], sparse_tensor.values) self.assertAllEqual([[3], [4]], label) def test_adagrad_opt_embedding_variables_on_tpu(self): use_tpu = True per_core_batch_size = 2 num_shards = FLAGS.test_num_shards batch_size = num_shards * per_core_batch_size hook = _EmbeddingVariableHook(use_tpu=use_tpu) estimator = get_estimator(use_tpu, self._model_dir, get_feature_columns(), batch_size) estimator.train(self._input_fn, steps=1, hooks=[hook]) final_step = 1 tol = DEFAULT_TOL # In this parcticular example, the gradient w.r.t. each activation is not # gradient w.r.t. embedding due to the combiner. unscaled_gradient_for_activation = [ get_activation_gradients(label=1), get_activation_gradients(label=2), get_activation_gradients(label=3), get_activation_gradients(label=4), ] self.assertAllEqual([12., 8., 4.0, 0.0], unscaled_gradient_for_activation) # Due to reduce_mean and 1/num_shards scaling, the embeddings gradients # are 3.0, 2.0, 1.0, 0.0 as num of samples per core is 2 and # num_shards (number of cores) is 2. # Now calcuates the gradients for embedding vars and accumulator for each # var. # # Note the IDs for each core are # Core 0 sample 0 IDs: [0 1] # Core 0 sample 1 IDs: [1 2] # Core 1 sample 0 IDs: [1 2] # Core 1 sample 1 IDs: [2 3] # For embedding ID 0, it appears only in the first sample of the first core. # So, its gradient is 3.0 / 2, where 1/2 is due to the mean combiner. gradient_for_id_0 = 1.5 accumuator_for_id_0 = gradient_for_id_0**2 + ADADGRAD_INIT_VALUE self.assertAllClose( accumuator_for_id_0, hook.slot_var_values[final_step][0][0], atol=tol, rtol=tol) # embedding_update = - LR * g / (init_accm + g**2) gradient_update_for_id_0 = get_embedding_update(gradient_for_id_0) self.assertAllClose( gradient_update_for_id_0, hook.var_values[final_step][0][0], rtol=tol, atol=tol) # Similarly, for embedding ID 3, it appears only in the second sample of the # second core. So, its gradient is 0.0 / 2, where 1/2 is due to the mean # combiner. gradient_for_id_3 = 0 accumuator_for_id_3 = gradient_for_id_3**2 + ADADGRAD_INIT_VALUE self.assertAllClose( accumuator_for_id_3, hook.slot_var_values[final_step][3][0], atol=tol, rtol=tol) # embedding_update = - LR * g / (init_accm + g**2) gradient_update_for_id_3 = get_embedding_update(gradient_for_id_3) self.assertAllClose( gradient_update_for_id_3, hook.var_values[final_step][3][0], rtol=tol, atol=tol) # For embedding ID 2, it appears in # - second sample of first core # - first sample of second core # - second sample of second core # # Note that the gradients of the second activation of the second core is 0. # So, equivalent, it is same as # # - second sample of first core -> gradient = 0.5 * 2.0 = 1.0 # - first sample of second core -> gradient = 0.5 * 1.0 = 0.5 gradient_for_id_2_in_core_0 = 1.0 gradient_for_id_2_in_core_1 = 0.5 accumuator_for_id_2 = ( ADADGRAD_INIT_VALUE + gradient_for_id_2_in_core_0**2 + gradient_for_id_2_in_core_1**2) self.assertAllClose( accumuator_for_id_2, hook.slot_var_values[final_step][2][0], atol=tol, rtol=tol) # embedding_update = ( # - LR * g1 / (init_accum + g1**2) # - LR * g2 / (init_accum + g1**2 + g2**2) gradient_update_for_id_2_after_apply_core_0 = get_embedding_update( gradient_for_id_2_in_core_0) accum_inc = gradient_for_id_2_in_core_0**2 gradient_update_for_id_2_after_apply_core_1 = get_embedding_update( gradient_for_id_2_in_core_1, previous_accum_inc=accum_inc) embedding_update_for_id_2 = ( gradient_update_for_id_2_after_apply_core_0 + gradient_update_for_id_2_after_apply_core_1) self.assertAllClose( embedding_update_for_id_2, hook.var_values[final_step][2][0], rtol=tol, atol=tol) # For embedding ID 1, it appears in # - first sample of first core # - second sample of first core # - first sample of second core # # So, the gradient for each sample # # - first sample of first core -> gradient = 0.5 * 3.0 = 1.5 # - second sample of first core -> gradient = 0.5 * 2.0 = 1.0 # - first sample of second core -> gradient = 0.5 * 1.0 = 0.5 # # Baracore combines the gradients in single core and then applies them core # by core. gradient_for_id_1_in_core_0 = 1.5 + 1.0 gradient_for_id_1_in_core_1 = 0.5 accumuator_for_id_1 = ( ADADGRAD_INIT_VALUE + gradient_for_id_1_in_core_0**2 + gradient_for_id_1_in_core_1**2) self.assertAllClose( accumuator_for_id_1, hook.slot_var_values[final_step][1][0], atol=tol, rtol=tol) # ID 1 resides on Core 1, so the updates from Core 1 are applied first. # embedding_update = ( # - LR * g1 / (init_accum + g1**2) # - LR * g2 / (init_accum + g1**2 + g2**2) gradient_update_for_id_1_after_apply_core_1 = get_embedding_update( gradient_for_id_1_in_core_1) accum_inc = gradient_for_id_1_in_core_1**2 gradient_update_for_id_1_after_apply_core_0 = get_embedding_update( gradient_for_id_1_in_core_0, previous_accum_inc=accum_inc) embedding_update_for_id_1 = ( gradient_update_for_id_1_after_apply_core_0 + gradient_update_for_id_1_after_apply_core_1) self.assertAllClose( embedding_update_for_id_1, hook.var_values[final_step][1][0], rtol=tol, atol=tol) def test_adagrad_opt_embedding_variables_on_cpu(self): use_tpu = False per_core_batch_size = 2 num_shards = FLAGS.test_num_shards batch_size = num_shards * per_core_batch_size hook = _EmbeddingVariableHook(use_tpu=use_tpu) estimator = get_estimator(use_tpu, self._model_dir, get_feature_columns(), batch_size) estimator.train(self._input_fn, steps=1, hooks=[hook]) final_step = 1 tol = DEFAULT_TOL # In this CPU example, the gradients for embedding rows are same as the # above: not only the sample loss and gradient, but also the scaling. The # only difference is CPU combines all gradients in one update; while TPU # updates the gradients core by core. # # For ID 0: gradient = 0.5 * 3.0 = 1.5 # For ID 1: gradient = 0.5 * 3.0 + 0.5 * 2.0 + 0.5 * 1.0 = 3.0 # For ID 2: gradient = 0.5 * 2.0 + 0.5 * 1.0 = 1.5 # For ID 3: gradient = 0.5 * 0.0 = 0.0 gradients_for_embedding = np.array([1.5, 3.0, 1.5, 0]) # Check accumulator after one step. for index, gradient in enumerate(gradients_for_embedding): accumulator = ADADGRAD_INIT_VALUE + gradient**2 self.assertAllClose( accumulator, hook.slot_var_values[final_step][index][0], atol=tol, rtol=tol) # Check embedding value after one step. for index, gradient in enumerate(gradients_for_embedding): embedding_update = get_embedding_update(gradient) self.assertAllClose( embedding_update, hook.var_values[final_step][index][0], atol=tol, rtol=tol) def test_sgd_opt_embedding_variables_on_cpu(self): use_tpu = False per_core_batch_size = 2 num_shards = FLAGS.test_num_shards batch_size = num_shards * per_core_batch_size hook = _EmbeddingVariableHook(use_tpu=use_tpu, include_slot_vars=False) estimator = get_estimator( use_tpu, self._model_dir, get_feature_columns(), batch_size, optimizer_type='sgd') estimator.train(self._input_fn, steps=1, hooks=[hook]) final_step = 1 tol = DEFAULT_TOL # In this CPU example, the gradients for embedding rows are same as the # above: not only the sample loss and gradient, but also the scaling. The # only difference is CPU combines all gradients in one update; while TPU # updates the gradients core by core. # # For ID 0: gradient = 0.5 * 3.0 = 1.5 # For ID 1: gradient = 0.5 * 3.0 + 0.5 * 2.0 + 0.5 * 1.0 = 3.0 # For ID 2: gradient = 0.5 * 2.0 + 0.5 * 1.0 = 1.5 # For ID 3: gradient = 0.5 * 0.0 = 0.0 gradients_for_embedding = np.array([1.5, 3.0, 1.5, 0]) # SGD has simple update rule, w += - lr * g embedding_update = [LEARNING_RATE * (-g) for g in gradients_for_embedding] # Check embedding value after one step. for index in range(len(gradients_for_embedding)): self.assertAllClose( embedding_update[index], hook.var_values[final_step][index][0], atol=tol, rtol=tol) def test_sgd_opt_embedding_variables_cpu_vs_tpu(self): # For sgd, cpu and tpu should agree. per_core_batch_size = 2 num_shards = FLAGS.test_num_shards batch_size = num_shards * per_core_batch_size use_tpu = False cpu_hook = _EmbeddingVariableHook(use_tpu=use_tpu, include_slot_vars=False) cpu_estimator = get_estimator( use_tpu, self._model_dir + '_cpu', get_feature_columns(), batch_size, optimizer_type='sgd') cpu_estimator.train(self._input_fn, steps=1, hooks=[cpu_hook]) use_tpu = True tpu_hook = _EmbeddingVariableHook(use_tpu=use_tpu, include_slot_vars=False) estimator = get_estimator( use_tpu, self._model_dir + '_tpu', get_feature_columns(), batch_size, optimizer_type='sgd') estimator.train(self._input_fn, steps=1, hooks=[tpu_hook]) tol = DEFAULT_TOL self.assertAllClose( cpu_hook.var_values, tpu_hook.var_values, atol=tol, rtol=tol) self.assertAllClose( cpu_hook.bias_values, tpu_hook.bias_values, atol=tol, rtol=tol) # Test gradient multiplier. def grad_multiplier_fn(global_step): # First global step is 0. return tf.cast(global_step + 1, tf.float32) * GRAD_MULTIPLIER tpu_hook2 = _EmbeddingVariableHook(use_tpu=use_tpu, include_slot_vars=False) estimator2 = get_estimator( use_tpu, self._model_dir + '_tpu_grad_multiplier', get_feature_columns(), batch_size, optimizer_type='sgd', grad_multiplier_fn=grad_multiplier_fn) estimator2.train(self._input_fn, steps=1, hooks=[tpu_hook2]) tol = DEFAULT_TOL self.assertAllClose([v * GRAD_MULTIPLIER for v in cpu_hook.var_values], tpu_hook2.var_values, atol=tol * GRAD_MULTIPLIER, rtol=tol * GRAD_MULTIPLIER) self.assertAllClose( cpu_hook.bias_values, tpu_hook2.bias_values, atol=tol, rtol=tol) if __name__ == '__main__': tf.disable_v2_behavior() tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_input_v2_test.py ================================================ # Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for TPUEstimator.""" from absl import flags from absl.testing import parameterized import numpy as np import six import tensorflow.compat.v1 as tf from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_estimator from tensorflow_estimator.python.estimator.util import tf_keras_v1 FLAGS = flags.FLAGS _TRAIN = model_fn_lib.ModeKeys.TRAIN _EVAL = model_fn_lib.ModeKeys.EVAL _PREDICT = model_fn_lib.ModeKeys.PREDICT def create_run_config(iterations_per_loop, **kwargs): return tpu_config.RunConfig( master='', tpu_config=tpu_config.TPUConfig( iterations_per_loop=iterations_per_loop, **kwargs), ) def dense_computation(features): return tf_keras_v1.__internal__.legacy.layers.dense( features, 1, kernel_initializer=tf.zeros_initializer()) def model_fn_global_step_incrementer(features, labels, mode, params): del params loss = None train_op = None predictions = dense_computation(features) if mode != _PREDICT: loss = tf.losses.mean_squared_error(labels, predictions) optimizer = tf.tpu.CrossShardOptimizer( tf.train.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, tf.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode, loss=loss, train_op=train_op, predictions={'predictions': predictions}, export_outputs={ 'test': export_output.PredictOutput({ 'prediction': predictions }) }) def dummy_input_fn_with_dataset(batch_size, fea_len=1, repeat=True, x=None): if x is None: x = np.random.normal(size=[batch_size, fea_len]).astype(np.float32) labels = [[2.0]] * batch_size dataset1 = tf.data.Dataset.from_tensor_slices(x) dataset2 = tf.data.Dataset.from_tensor_slices(labels) dataset = tf.data.Dataset.zip((dataset1, dataset2)) if repeat: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) def _map(x, y): return x, y return dataset.map(_map) class TpuEstimatorInputV2Test(parameterized.TestCase, tf.test.TestCase): @parameterized.parameters((2, 1), (None, 2)) def test_batch_size(self, num_cores_per_replica, num_shards): input_fn_call_count = [0] run_config = create_run_config( iterations_per_loop=4, num_cores_per_replica=num_cores_per_replica, num_shards=num_shards, per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V2) def _input_fn(params): input_fn_call_count[0] += 1 expected_batch_size = 128 // num_shards self.assertEqual(expected_batch_size, params['batch_size']) return dummy_input_fn_with_dataset(batch_size=params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=model_fn_global_step_incrementer, config=run_config, train_batch_size=128) self.assertEqual(0, input_fn_call_count[0]) est.train(_input_fn, steps=1) self.assertEqual(1, input_fn_call_count[0]) def test_run_spatial_partition(self): input_fn_call_count = [0] run_config = create_run_config( iterations_per_loop=4, num_cores_per_replica=2, num_shards=1, input_partition_dims=[[1, 2], None], per_host_input_for_training=( tpu_config.InputPipelineConfig.PER_HOST_V2)) def _input_fn(params): input_fn_call_count[0] += 1 return dummy_input_fn_with_dataset( batch_size=params['batch_size'], fea_len=2) est = tpu_estimator.TPUEstimator( model_fn=model_fn_global_step_incrementer, config=run_config, train_batch_size=128) self.assertEqual(0, input_fn_call_count[0]) est.train(_input_fn, steps=1) self.assertEqual(1, input_fn_call_count[0]) def test_predict_mode(self): input_fn_call_count = [0] predict_batch_size = 128 run_config = create_run_config( iterations_per_loop=4, num_cores_per_replica=2, num_shards=1, input_partition_dims=[[1, 2], None], per_host_input_for_training=( tpu_config.InputPipelineConfig.PER_HOST_V2)) def _input_fn(params): input_fn_call_count[0] += 1 return dummy_input_fn_with_dataset( batch_size=params['batch_size'], fea_len=2) est = tpu_estimator.TPUEstimator( model_fn=model_fn_global_step_incrementer, config=run_config, train_batch_size=128, predict_batch_size=predict_batch_size) self.assertEqual(0, input_fn_call_count[0]) predictor = est.predict(_input_fn, yield_single_examples=False) prediction = six.next(predictor) self.assertEqual(1, input_fn_call_count[0]) self.assertIn('predictions', prediction) self.assertEqual((predict_batch_size, 1), prediction['predictions'].shape) predictor = est.predict(_input_fn, yield_single_examples=True) prediction = six.next(predictor) self.assertEqual(2, input_fn_call_count[0]) self.assertIn('predictions', prediction) self.assertEqual((1,), prediction['predictions'].shape) def test_evaluate_mode(self): input_fn_call_count = [0] eval_batch_size = 128 run_config = create_run_config( iterations_per_loop=4, num_cores_per_replica=2, num_shards=1, input_partition_dims=[[1, 2], None], per_host_input_for_training=( tpu_config.InputPipelineConfig.PER_HOST_V2)) def _input_fn(params): input_fn_call_count[0] += 1 return dummy_input_fn_with_dataset( batch_size=params['batch_size'], fea_len=2) est = tpu_estimator.TPUEstimator( model_fn=model_fn_global_step_incrementer, config=run_config, train_batch_size=128, eval_batch_size=eval_batch_size) self.assertEqual(0, input_fn_call_count[0]) est.evaluate(_input_fn, steps=1) self.assertEqual(1, input_fn_call_count[0]) if __name__ == '__main__': tf.disable_v2_behavior() tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_integration_test.py ================================================ # Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for TPUEstimator.""" import contextlib import tempfile from absl import flags import numpy as np import tensorflow.compat.v1 as tf # pylint: disable=g-direct-tensorflow-import from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_estimator from tensorflow_estimator.python.estimator.util import tf_keras_v1 # pylint: enable=g-direct-tensorflow-import flags.DEFINE_integer('test_num_shards', 8, 'number of replicas to test') FLAGS = flags.FLAGS _TRAIN = model_fn_lib.ModeKeys.TRAIN _EVAL = model_fn_lib.ModeKeys.EVAL _PREDICT = model_fn_lib.ModeKeys.PREDICT _PER_HOST = 'per_host_sharding' _PER_SHARD = 'per_shard_sharding' _UNSHARDED = 'unsharded' _INPUT_PIPELINE_WITH_QUEUE_RUNNER = ( 'Input pipeline contains one or more QueueRunners') def dense_computation(features): return tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.zeros_initializer()) def model_fn_global_step_incrementer(features, labels, mode, params): del params loss = None train_op = None predictions = dense_computation(features) if mode != _PREDICT: loss = tf.losses.mean_squared_error(labels, predictions) optimizer = tf.tpu.CrossShardOptimizer( tf.train.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, tf.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode, loss=loss, train_op=train_op, predictions={'predictions': predictions}, export_outputs={ 'test': export_output.PredictOutput({ 'prediction': predictions }) }) def dummy_input_fn_with_dataset(batch_size, repeat=True, x=None): if x is None: x = np.random.normal(size=[batch_size, 1]).astype(np.float32) labels = [[2.0]] * batch_size dataset1 = tf.data.Dataset.from_tensor_slices(x) dataset2 = tf.data.Dataset.from_tensor_slices(labels) dataset = tf.data.Dataset.zip((dataset1, dataset2)) if repeat: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) def _map(x, y): return {'x': x}, y return dataset.map(_map) def dummy_input_fn(batch_size, repeat=True): dataset = dummy_input_fn_with_dataset(batch_size, repeat) iterator = dataset.make_one_shot_iterator() return iterator.get_next() def create_run_config(iterations_per_loop, **kwargs): return tpu_config.RunConfig( master='', tpu_config=tpu_config.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.test_num_shards, **kwargs), ) class TPUEstimatorIntegrationTest(tf.test.TestCase): def setUp(self): self._recorded_input_fn_invoke_metadata = { _TRAIN: {'called_count': 0, 'batch_size': None}, _EVAL: {'called_count': 0, 'batch_size': None}, _PREDICT: {'called_count': 0, 'batch_size': None} } self._data = np.linspace(0., 1., 100, dtype=np.float32).reshape(-1, 1) self._export_mode = False @contextlib.contextmanager def export_mode(self): """Enable the export mode for model_fn.""" # Inside the model_fn, the test will check the batch size passed via params. # However, export mode should not have that. It is infeasible for model_fn # to distinguish the predict vs export mode today. So, this contextmanager # helps the model_fn to do that. self._export_mode = True yield self._export_mode = False def assertInputFnCalledCountAndBatch(self, expected_called_count, expected_batch_size): real_called_count = {k: v['called_count'] for k, v in self._recorded_input_fn_invoke_metadata.items()} real_batch_size = {k: v['batch_size'] for k, v in self._recorded_input_fn_invoke_metadata.items()} self.assertEqual(expected_called_count, real_called_count) self.assertEqual(expected_batch_size, real_batch_size) def _generate_expected_batch_size_and_called_count( self, num_shards, train_batch_size, eval_batch_size, predict_batch_size, train_sharding_policy=_UNSHARDED, eval_sharding_policy=_UNSHARDED, predict_sharding_policy=None): expected_batch_size_for_model_fn = {} expected_batch_size_for_input_fn = {} expected_called_count_for_input_fn = {} if train_sharding_policy == _PER_SHARD: self.assertEqual(0, train_batch_size % num_shards) expected_batch_size_for_model_fn[_TRAIN] = train_batch_size // num_shards expected_batch_size_for_input_fn[_TRAIN] = train_batch_size // num_shards expected_called_count_for_input_fn[_TRAIN] = num_shards elif train_sharding_policy == _PER_HOST: self.assertEqual(0, train_batch_size % num_shards) expected_batch_size_for_model_fn[_TRAIN] = train_batch_size // num_shards expected_batch_size_for_input_fn[_TRAIN] = train_batch_size expected_called_count_for_input_fn[_TRAIN] = 1 else: expected_batch_size_for_model_fn[_TRAIN] = train_batch_size expected_batch_size_for_input_fn[_TRAIN] = train_batch_size expected_called_count_for_input_fn[_TRAIN] = 1 if eval_sharding_policy == _PER_HOST: self.assertEqual(0, train_batch_size % num_shards) expected_batch_size_for_model_fn[_EVAL] = eval_batch_size // num_shards expected_batch_size_for_input_fn[_EVAL] = eval_batch_size expected_called_count_for_input_fn[_EVAL] = 1 else: expected_batch_size_for_model_fn[_EVAL] = eval_batch_size expected_batch_size_for_input_fn[_EVAL] = eval_batch_size expected_called_count_for_input_fn[_EVAL] = 1 if predict_sharding_policy is None: # On CPU. expected_batch_size_for_model_fn[_PREDICT] = predict_batch_size expected_batch_size_for_input_fn[_PREDICT] = predict_batch_size expected_called_count_for_input_fn[_PREDICT] = 1 else: expected_batch_size_for_model_fn[_PREDICT] = ( predict_batch_size // num_shards) expected_batch_size_for_input_fn[_PREDICT] = predict_batch_size expected_called_count_for_input_fn[_PREDICT] = 1 return (expected_batch_size_for_model_fn, expected_batch_size_for_input_fn, expected_called_count_for_input_fn) def _wrap_input_fn_with_batch_size(self, batch_size, input_fn): def _input_fn(params): self.assertNotIn('batch_size', params) params['batch_size'] = batch_size return input_fn(params) return _input_fn def _make_input_fn(self, mode, repeat=False, take=None): metadata = self._recorded_input_fn_invoke_metadata[mode] def _input_fn(params): metadata['called_count'] += 1 batch_size = params['batch_size'] if metadata['batch_size'] is None: metadata['batch_size'] = batch_size else: self.assertEqual(batch_size, metadata['batch_size']) dataset1 = tf.data.Dataset.from_tensor_slices(self._data) dataset2 = tf.data.Dataset.from_tensor_slices(self._data) dataset = tf.data.Dataset.zip((dataset1, dataset2)) if repeat: dataset = dataset.repeat() dataset = dataset.batch(batch_size) if take: dataset = dataset.take(take) def _map_fn(x, y): x.set_shape([batch_size, 1]) y.set_shape([batch_size, 1]) return {'x': x}, y dataset = dataset.map(_map_fn) return dataset return _input_fn def _make_model_fn(self, batch_size_dict, use_tpu_estimator_spec=False): def _create_estimator_spec(mode, loss=None, predictions=None, export_outputs=None, eval_metrics=None, train_op=None): if use_tpu_estimator_spec: return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, predictions=predictions, export_outputs=export_outputs, eval_metrics=eval_metrics) else: return model_fn_lib.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, predictions=predictions, export_outputs=export_outputs, eval_metric_ops=(eval_metrics[0](*eval_metrics[1]) if eval_metrics else None)) def _model_fn(features, labels, mode, params): if not self._export_mode: # Always check batch size in params self.assertEqual(batch_size_dict[mode], params['batch_size']) else: self.assertNotIn('batch_size', params) # Check the input feeds correct shape for train and eval. When eval on CPU # or predict, it is allowed to have dynamic shape. So, here only validates # the fully known shape (which covers the TPU train). if features['x'].shape.is_fully_defined(): self.assertEqual(batch_size_dict[mode], features['x'].shape[0]) predictions = tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.ones_initializer()) export_outputs = { 'predictions': export_output.RegressionOutput(predictions) } if mode == _PREDICT: return _create_estimator_spec( mode=mode, predictions={'predictions': predictions}, export_outputs=export_outputs) loss = tf.losses.mean_squared_error(labels, predictions) optimizer = tf.tpu.CrossShardOptimizer( tf.train.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) eval_metrics = ( lambda labels, predictions: { # pylint: disable=g-long-lambda 'absolute_error': tf.metrics.mean_absolute_error( labels, predictions)}, [labels, predictions]) return _create_estimator_spec( mode=mode, loss=loss, predictions={'predictions': predictions}, export_outputs=export_outputs, train_op=train_op, eval_metrics=eval_metrics) return _model_fn def _test_identity_savedmodel(self, export_dir): with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: metagraph_def = tf.saved_model.loader.load(sess, [tf.saved_model.SERVING], export_dir) fetch = metagraph_def.signature_def['predictions'].outputs['outputs'] feed = metagraph_def.signature_def['predictions'].inputs['inputs'] for x in self._data: example = example_pb2.Example( features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature( float_list=feature_pb2.FloatList( value=np.ravel(x))) })).SerializeToString() y = sess.run(fetch.name, feed_dict={feed.name: [example]}) self.assertAlmostEqual(y, x[0], delta=0.01) def test_complete_flow_with_per_core_input(self): # Choose the train_batch_size divisible by 2 and 8 (common shards in test # env) and batch_size for eval and predict prime number. train_batch_size = 16 eval_batch_size = 16 predict_batch_size = 8 run_config = create_run_config(iterations_per_loop=4, per_host_input_for_training=False) num_shards = run_config.tpu_config.num_shards (expected_batch_size_for_model_fn, expected_batch_size_for_input_fn, expected_called_count_for_input_fn) = ( self._generate_expected_batch_size_and_called_count( num_shards, train_batch_size, eval_batch_size, predict_batch_size, train_sharding_policy=_PER_SHARD, eval_sharding_policy=_PER_HOST, predict_sharding_policy=_PER_HOST)) est = tpu_estimator.TPUEstimator( model_fn=self._make_model_fn( expected_batch_size_for_model_fn, use_tpu_estimator_spec=True), config=run_config, train_batch_size=train_batch_size, eval_batch_size=eval_batch_size, predict_batch_size=predict_batch_size) # TRAIN # learn y = x # Note: Gradients are all zero. Just testing execution. def _input_fn(params): dataset = self._make_input_fn(mode=_TRAIN, repeat=True)(params) return tf.data.make_one_shot_iterator(dataset).get_next() train_input_fn = _input_fn est.train(train_input_fn, steps=7) # EVALUTE scores = est.evaluate(self._make_input_fn(mode=_EVAL), steps=6) self.assertEqual(7, scores['global_step']) self.assertGreater(0.1, scores['absolute_error']) # PREDICT predict_input_fn = self._make_input_fn(mode=_PREDICT, take=2) predictions = [x['predictions'] for x in est.predict(predict_input_fn)] self.assertAllClose( self._data[:predict_batch_size * 2], predictions, atol=0.01) # Verify all input_fn invoke recorded metadata. self.assertInputFnCalledCountAndBatch( expected_called_count_for_input_fn, expected_batch_size_for_input_fn) # EXPORT feature_spec = {'x': tf.io.FixedLenFeature([1], tf.float32)} serving_input_receiver_fn = ( export.build_parsing_serving_input_receiver_fn(feature_spec)) with self.export_mode(): export_dir = est.export_saved_model( tempfile.mkdtemp(dir=self.get_temp_dir()), serving_input_receiver_fn) self.assertTrue(tf.gfile.Exists(export_dir)) self._test_identity_savedmodel(export_dir) def test_complete_flow_with_per_host_input(self): # Choose the train_batch_size divisible by 2 and 8 (common shards in test # env) and batch_size for eval and predict prime number. train_batch_size = 16 eval_batch_size = 16 predict_batch_size = 16 run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=True) num_shards = run_config.tpu_config.num_shards (expected_batch_size_for_model_fn, expected_batch_size_for_input_fn, expected_called_count_for_input_fn) = ( self._generate_expected_batch_size_and_called_count( num_shards, train_batch_size, eval_batch_size, predict_batch_size, train_sharding_policy=_PER_HOST, eval_sharding_policy=_PER_HOST, predict_sharding_policy=_PER_HOST)) est = tpu_estimator.TPUEstimator( model_fn=self._make_model_fn( expected_batch_size_for_model_fn, use_tpu_estimator_spec=True), config=run_config, train_batch_size=train_batch_size, eval_batch_size=eval_batch_size, predict_batch_size=predict_batch_size) # TRAIN # learn y = x # Note: Gradients are all zero. Just testing execution. train_input_fn = self._make_input_fn(mode=_TRAIN, repeat=True) est.train(train_input_fn, steps=7) # EVALUTE scores = est.evaluate(self._make_input_fn(mode=_EVAL), steps=6) self.assertEqual(7, scores['global_step']) self.assertGreater(0.1, scores['absolute_error']) # PREDICT predict_input_fn = self._make_input_fn(mode=_PREDICT, take=2) predictions = [x['predictions'] for x in est.predict(predict_input_fn)] self.assertAllClose( self._data[:predict_batch_size * 2], predictions, atol=0.01) # Verify all input_fn invoke recorded metadata. self.assertInputFnCalledCountAndBatch( expected_called_count_for_input_fn, expected_batch_size_for_input_fn) # EXPORT feature_spec = {'x': tf.io.FixedLenFeature([1], tf.float32)} serving_input_receiver_fn = ( export.build_parsing_serving_input_receiver_fn(feature_spec)) with self.export_mode(): export_dir = est.export_saved_model( tempfile.mkdtemp(dir=self.get_temp_dir()), serving_input_receiver_fn) self.assertTrue(tf.gfile.Exists(export_dir)) self._test_identity_savedmodel(export_dir) def test_complete_flow_with_eval_on_tpu(self): # Choose the train_batch_size divisible by 2 and 8 (common shards in test # env) and batch_size for eval and predict prime number. train_batch_size = 16 eval_batch_size = 8 predict_batch_size = 8 run_config = create_run_config(iterations_per_loop=4) num_shards = run_config.tpu_config.num_shards (expected_batch_size_for_model_fn, expected_batch_size_for_input_fn, expected_called_count_for_input_fn) = ( self._generate_expected_batch_size_and_called_count( num_shards, train_batch_size, eval_batch_size, predict_batch_size, train_sharding_policy=_PER_HOST, eval_sharding_policy=_PER_HOST, predict_sharding_policy=_PER_HOST)) est = tpu_estimator.TPUEstimator( model_fn=self._make_model_fn( expected_batch_size_for_model_fn, use_tpu_estimator_spec=True), config=run_config, train_batch_size=train_batch_size, eval_batch_size=eval_batch_size, predict_batch_size=predict_batch_size) # TRAIN # learn y = x # Note: Gradients are all zero. Just testing execution. train_input_fn = self._make_input_fn(mode=_TRAIN, repeat=True) est.train(train_input_fn, steps=7) # EVALUTE eval_input_fn = self._make_input_fn(mode=_EVAL, repeat=False) scores = est.evaluate(eval_input_fn, steps=2) self.assertEqual(7, scores['global_step']) self.assertGreater(0.1, scores['absolute_error']) # PREDICT predict_input_fn = self._make_input_fn(mode=_PREDICT, take=2) predictions = [x['predictions'] for x in est.predict(predict_input_fn)] self.assertAllClose( self._data[:predict_batch_size * 2], predictions, atol=0.01) # Verify all input_fn invoke recorded metadata. self.assertInputFnCalledCountAndBatch( expected_called_count_for_input_fn, expected_batch_size_for_input_fn) # EXPORT feature_spec = {'x': tf.io.FixedLenFeature([1], tf.float32)} serving_input_receiver_fn = ( export.build_parsing_serving_input_receiver_fn(feature_spec)) with self.export_mode(): export_dir = est.export_saved_model( tempfile.mkdtemp(dir=self.get_temp_dir()), serving_input_receiver_fn) self.assertTrue(tf.gfile.Exists(export_dir)) self._test_identity_savedmodel(export_dir) def test_complete_flow_with_no_tpu(self): # Choose the train_batch_size divisible by 2 and 8 (common shards in test # env) and batch_size for eval and predict prime number. train_batch_size = 16 eval_batch_size = 8 predict_batch_size = 1 run_config = create_run_config(iterations_per_loop=4) num_shards = run_config.tpu_config.num_shards (expected_batch_size_for_model_fn, expected_batch_size_for_input_fn, expected_called_count_for_input_fn) = ( self._generate_expected_batch_size_and_called_count( num_shards, train_batch_size, eval_batch_size, predict_batch_size, train_sharding_policy=_UNSHARDED, eval_sharding_policy=_UNSHARDED)) est = tpu_estimator.TPUEstimator( model_fn=self._make_model_fn( expected_batch_size_for_model_fn, use_tpu_estimator_spec=True), config=run_config, train_batch_size=train_batch_size, eval_batch_size=eval_batch_size, predict_batch_size=predict_batch_size, use_tpu=False) # TRAIN # learn y = x # Note: Gradients are all zero. Just testing execution. train_input_fn = self._make_input_fn(mode=_TRAIN, repeat=True) est.train(train_input_fn, steps=7) # EVALUTE eval_input_fn = self._make_input_fn(mode=_EVAL) scores = est.evaluate(eval_input_fn, steps=2) self.assertEqual(7, scores['global_step']) self.assertGreater(0.1, scores['absolute_error']) # PREDICT predict_input_fn = self._make_input_fn(mode=_PREDICT) predictions = [x['predictions'] for x in est.predict(predict_input_fn)] self.assertAllClose(self._data, predictions, atol=0.01) # Verify all input_fn invoke recorded metadata. self.assertInputFnCalledCountAndBatch( expected_called_count_for_input_fn, expected_batch_size_for_input_fn) # EXPORT feature_spec = {'x': tf.io.FixedLenFeature([1], tf.float32)} serving_input_receiver_fn = ( export.build_parsing_serving_input_receiver_fn(feature_spec)) with self.export_mode(): export_dir = est.export_saved_model( tempfile.mkdtemp(dir=self.get_temp_dir()), serving_input_receiver_fn) self.assertTrue(tf.gfile.Exists(export_dir)) self._test_identity_savedmodel(export_dir) if __name__ == '__main__': tf.disable_v2_behavior() tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_model_parallelism_test.py ================================================ # Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for TPUEstimator with model parallelism.""" from absl import flags import numpy as np import tensorflow as tf from tensorflow.python.tpu import tpu_feed from tensorflow.python.tpu.device_assignment import device_assignment from tensorflow.python.tpu.topology import Topology from tensorflow.python.training import evaluation from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.export import export_output from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_estimator from tensorflow_estimator.python.estimator.util import tf_keras_v1 # pylint: enable=g-direct-tensorflow-import FLAGS = flags.FLAGS _TRAIN = model_fn_lib.ModeKeys.TRAIN _EVAL = model_fn_lib.ModeKeys.EVAL _PREDICT = model_fn_lib.ModeKeys.PREDICT _PER_HOST = 'per_host_sharding' _PER_SHARD = 'per_shard_sharding' _UNSHARDED = 'unsharded' _INPUT_PIPELINE_WITH_QUEUE_RUNNER = ( 'Input pipeline contains one or more QueueRunners') def dense_computation(features): return tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) def model_fn_global_step_incrementer(features, labels, mode, params): del params loss = None train_op = None predictions = dense_computation(features) if mode != _PREDICT: loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) optimizer = tf.compat.v1.tpu.CrossShardOptimizer( tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, tf.compat.v1.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode, loss=loss, train_op=train_op, predictions={'predictions': predictions}, export_outputs={ 'test': export_output.PredictOutput({ 'prediction': predictions }) }) def dummy_input_fn_with_dataset(batch_size, repeat=True, x=None): if x is None: x = np.random.normal(size=[batch_size, 1]).astype(np.float32) labels = [[2.0]] * batch_size dataset1 = tf.compat.v1.data.Dataset.from_tensor_slices(x) dataset2 = tf.compat.v1.data.Dataset.from_tensor_slices(labels) dataset = tf.compat.v1.data.Dataset.zip((dataset1, dataset2)) if repeat: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) def _map(x, y): return {'x': x}, y return dataset.map(_map) def dummy_input_fn(batch_size, repeat=True): dataset = dummy_input_fn_with_dataset(batch_size, repeat) iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) return iterator.get_next() def create_run_config(iterations_per_loop, num_shards, num_cores_per_replica, **kwargs): return tpu_config.RunConfig( master='', tpu_config=tpu_config.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=num_shards, num_cores_per_replica=num_cores_per_replica, **kwargs)) class TPUEstimatorModelParallelismConstructorTest(tf.test.TestCase): def test_fail_model_parallelism_for_per_core_input(self): run_config = create_run_config( iterations_per_loop=4, num_shards=1, num_cores_per_replica=2, per_host_input_for_training=False) with self.assertRaisesRegex(ValueError, 'Model parallelism only supports'): tpu_estimator.TPUEstimator( model_fn=model_fn_global_step_incrementer, config=run_config, train_batch_size=128) class TPUEstimatorModelParallelismTrainingTest(tf.test.TestCase): def _train_and_return_global_steps(self, iterations_per_loop, steps=None, max_steps=None, pre_train_steps=None, **kwargs): """Trains the model and returns the list of global steps after each loop.""" def input_fn(params): return dummy_input_fn(params['batch_size']) def _model_fn(features, labels, mode, params): return model_fn_global_step_incrementer(features, labels, mode, params) run_config = create_run_config( iterations_per_loop=iterations_per_loop, num_shards=1, num_cores_per_replica=2, **kwargs) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16, eval_batch_size=16) class _TrainStepCheckHook(tf.compat.v1.train.SessionRunHook): """Check eval step counter after one session.run.""" def __init__(self): """Constructs the run hook.""" self._global_steps = [] @property def global_steps(self): return self._global_steps def after_run(self, run_context, run_values): global_step = run_context.session.run(tf.compat.v1.train.get_global_step()) self._global_steps.append(global_step) if pre_train_steps: est.train(input_fn, steps=pre_train_steps) hook = _TrainStepCheckHook() est.train(input_fn, steps=steps, max_steps=max_steps, hooks=[hook]) return hook.global_steps def test_train_steps_with_model_parallelism(self): # From scratch. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=40, steps=12) self.assertEqual([12], global_steps_per_loop) # From existing checkpoint. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=40, steps=12, pre_train_steps=3) self.assertEqual([15], global_steps_per_loop) class TPUEstimatorModelParallelismEvaluationTest(tf.test.TestCase): def _create_input_fn(self): def _input_fn(params): return dummy_input_fn(params['batch_size']) return _input_fn def _create_head(self, mode, loss, eval_metrics): """Creates a head returning `TPUEstimatorSpec` based on mode.""" if mode == _EVAL: return tpu_estimator.TPUEstimatorSpec( mode=mode, eval_metrics=eval_metrics, loss=loss) # Train optimizer = tf.compat.v1.tpu.CrossShardOptimizer( tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, global_step=tf.compat.v1.train.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss) def _metric_fn_on_cpu(self, labels, predictions): return { 'mse': tf.compat.v1.metrics.mean_absolute_error(labels, predictions), } def _model_fn_with_eval_tensor_list(self, features, labels, mode, params): del params # unused. predictions = tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) return self._create_head( mode, loss, eval_metrics=(self._metric_fn_on_cpu, [labels, predictions])) def _model_fn_with_eval_dict(self, features, labels, mode, params): del params # unused. predictions = tf_keras_v1.__internal__.legacy.layers.dense( features['x'], 1, kernel_initializer=tf.compat.v1.zeros_initializer()) loss = tf.compat.v1.losses.mean_squared_error(labels, predictions) return self._create_head( mode, loss, eval_metrics=(self._metric_fn_on_cpu, { 'labels': labels, 'predictions': predictions })) def _test_eval_steps(self, expected_eval_steps, iterations): run_config = create_run_config( iterations_per_loop=iterations, num_shards=1, num_cores_per_replica=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_tensor_list, config=run_config, train_batch_size=16, eval_batch_size=16) est.train(self._create_input_fn(), steps=1) class _EvalStepCheckHook(tf.compat.v1.train.SessionRunHook): """Check eval step counter after one session.run. As the evaluation sets the eval iterations as the eval steps, the after_run should be invoked only once. """ def __init__(self, iterations_per_loop, test_case): """Constructs the run hook.""" self._iterations = iterations_per_loop self._invoked = False self._test_case = test_case def before_run(self, run_context): return tf.compat.v1.train.SessionRunArgs({ 'eval_steps': evaluation._get_or_create_eval_step() }) def after_run(self, run_context, run_values): eval_steps = run_values.results['eval_steps'] self._test_case.assertEqual(expected_eval_steps, eval_steps) self._test_case.assertFalse(self._invoked) self._invoked = True est.evaluate( self._create_input_fn(), steps=expected_eval_steps, hooks=[_EvalStepCheckHook(iterations, self)]) def test_eval_metrics_with_tensor_list(self): run_config = create_run_config( iterations_per_loop=2, num_shards=1, num_cores_per_replica=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_tensor_list, config=run_config, train_batch_size=16, eval_batch_size=16) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_eval_metrics_with_dict(self): run_config = create_run_config( iterations_per_loop=2, num_shards=1, num_cores_per_replica=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_dict, config=run_config, train_batch_size=16, eval_batch_size=16) est.train(self._create_input_fn(), steps=1) est.evaluate(self._create_input_fn(), steps=1) def test_fail_with_wrong_num_shards(self): run_config = create_run_config( iterations_per_loop=2, num_shards=2, num_cores_per_replica=2) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn_with_eval_tensor_list, config=run_config, train_batch_size=16, eval_batch_size=16) with self.assertRaisesRegex(ValueError, 'num_shards is not set correctly'): est.train(self._create_input_fn(), steps=1) class TPUEstimatorModelParallelismInFeedTest(tf.test.TestCase): def setUp(self): self._topology_2x2x2 = Topology( device_coordinates=np.array( [[[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 1, 0, 1], [1, 0, 0, 0], [1, 0, 0, 1], [1, 1, 0, 0], [1, 1, 0, 1]]], dtype=np.int32), mesh_shape=np.array([2, 2, 1, 2], dtype=np.int32)) def test_infeed_even_partition(self): """Tests even infeed tensors partition.""" ds = device_assignment( self._topology_2x2x2, num_replicas=1, computation_shape=[1, 1, 1, 2]) input_partition_dims = [[2, 1]] # pylint: disable=protected-access partitioned_infeed = tpu_feed._PartitionedInfeedQueue( number_of_tuple_elements=1, host_id=0, input_partition_dims=input_partition_dims, device_assignment=ds) x = tf.zeros((14, 5)) tensors = partitioned_infeed._check_dims_and_partition_or_replicate_on_host( x, dims=input_partition_dims[0]) self.assertEqual(2, len(tensors)) self.assertEqual([(7, 5), (7, 5)], [t.shape for t in tensors]) # pylint: enable=protected-access def test_infeed_uneven_partition(self): """Tests uneven infeed tensors partition.""" ds = device_assignment( self._topology_2x2x2, num_replicas=1, computation_shape=[2, 2, 1, 2]) input_partition_dims = [[4, 2]] # pylint: disable=protected-access partitioned_infeed = tpu_feed._PartitionedInfeedQueue( number_of_tuple_elements=1, host_id=0, input_partition_dims=input_partition_dims, device_assignment=ds) x = tf.zeros((14, 5)) tensors = partitioned_infeed._check_dims_and_partition_or_replicate_on_host( x, dims=input_partition_dims[0]) self.assertEqual(8, len(tensors)) self.assertEqual((2, 2), tensors[-1].shape) # pylint: enable=protected-access def test_infeed_tailing_zero_partition(self): """Tests infeed tensors partition which causes zero-size tensors.""" ds = device_assignment( self._topology_2x2x2, num_replicas=1, computation_shape=[1, 2, 1, 2]) input_partition_dims = [[4, 1]] # pylint: disable=protected-access partitioned_infeed = tpu_feed._PartitionedInfeedQueue( number_of_tuple_elements=1, host_id=0, input_partition_dims=input_partition_dims, device_assignment=ds) x = tf.zeros((5, 5)) tensors = partitioned_infeed._check_dims_and_partition_or_replicate_on_host( x, dims=input_partition_dims[0]) self.assertEqual(4, len(tensors)) self.assertEqual((1, 5), tensors[2].shape) self.assertEqual((0, 5), tensors[3].shape) # pylint: enable=protected-access if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_signals_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """TPU Estimator Signalling Tests.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from tensorflow_estimator.python.estimator.tpu import tpu_estimator def make_input_fn(num_samples): a = np.linspace(0, 100.0, num=num_samples) b = np.reshape(np.array(a, dtype=np.float32), (len(a), 1)) def input_fn(params): batch_size = params['batch_size'] da1 = tf.compat.v1.data.Dataset.from_tensor_slices(a) da2 = tf.compat.v1.data.Dataset.from_tensor_slices(b) dataset = tf.compat.v1.data.Dataset.zip((da1, da2)) dataset = dataset.map(lambda fa, fb: {'a': fa, 'b': fb}) dataset = dataset.batch(batch_size) return dataset return input_fn, (a, b) def make_input_fn_with_labels(num_samples): a = np.linspace(0, 100.0, num=num_samples) b = np.reshape(np.array(a, dtype=np.float32), (len(a), 1)) def input_fn(params): batch_size = params['batch_size'] da1 = tf.compat.v1.data.Dataset.from_tensor_slices(a) da2 = tf.compat.v1.data.Dataset.from_tensor_slices(b) dataset = tf.compat.v1.data.Dataset.zip((da1, da2)) dataset = dataset.map(lambda fa, fb: ({'a': fa}, fb)) dataset = dataset.batch(batch_size) return dataset return input_fn, (a, b) class TPUEstimatorStoppingSignalsTest(tf.test.TestCase): def test_normal_output_without_signals(self): num_samples = 4 batch_size = 2 params = {'batch_size': batch_size} input_fn, (a, b) = make_input_fn(num_samples=num_samples) with tf.Graph().as_default(): dataset = input_fn(params) features = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next() # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape. self.assertIsNone(features['a'].shape.as_list()[0]) with tf.compat.v1.Session() as sess: result = sess.run(features) self.assertAllEqual(a[:batch_size], result['a']) self.assertAllEqual(b[:batch_size], result['b']) # This run should work as num_samples / batch_size = 2. result = sess.run(features) self.assertAllEqual(a[batch_size:num_samples], result['a']) self.assertAllEqual(b[batch_size:num_samples], result['b']) with self.assertRaises(tf.errors.OutOfRangeError): # Given num_samples and batch_size, this run should fail. sess.run(features) def test_output_with_stopping_signals(self): num_samples = 4 batch_size = 2 params = {'batch_size': batch_size} input_fn, (a, b) = make_input_fn(num_samples=num_samples) with tf.Graph().as_default(): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size) dataset_initializer = inputs.dataset_initializer() features, _ = inputs.features_and_labels() signals = inputs.signals() # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape. self.assertIsNone(features['a'].shape.as_list()[0]) with tf.compat.v1.Session() as sess: sess.run(dataset_initializer) result, evaluated_signals = sess.run([features, signals]) self.assertAllEqual(a[:batch_size], result['a']) self.assertAllEqual(b[:batch_size], result['b']) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) # This run should work as num_samples / batch_size = 2. result, evaluated_signals = sess.run([features, signals]) self.assertAllEqual(a[batch_size:num_samples], result['a']) self.assertAllEqual(b[batch_size:num_samples], result['b']) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) # This run should work, *but* see STOP ('1') as signals _, evaluated_signals = sess.run([features, signals]) self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) with self.assertRaises(tf.errors.OutOfRangeError): sess.run(features) class TPUEstimatorStoppingSignalsWithPaddingTest(tf.test.TestCase): def test_num_samples_divisible_by_batch_size(self): num_samples = 4 batch_size = 2 params = {'batch_size': batch_size} input_fn, (a, b) = make_input_fn(num_samples=num_samples) with tf.Graph().as_default(): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals( dataset, batch_size, add_padding=True) dataset_initializer = inputs.dataset_initializer() features, _ = inputs.features_and_labels() signals = inputs.signals() # With padding, all shapes are static now. self.assertEqual(batch_size, features['a'].shape.as_list()[0]) with tf.compat.v1.Session() as sess: sess.run(dataset_initializer) result, evaluated_signals = sess.run([features, signals]) self.assertAllEqual(a[:batch_size], result['a']) self.assertAllEqual(b[:batch_size], result['b']) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) self.assertAllEqual([0.] * batch_size, evaluated_signals['padding_mask']) # This run should work as num_samples / batch_size = 2. result, evaluated_signals = sess.run([features, signals]) self.assertAllEqual(a[batch_size:num_samples], result['a']) self.assertAllEqual(b[batch_size:num_samples], result['b']) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) self.assertAllEqual([0.] * batch_size, evaluated_signals['padding_mask']) # This run should work, *but* see STOP ('1') as signals _, evaluated_signals = sess.run([features, signals]) self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) with self.assertRaises(tf.errors.OutOfRangeError): sess.run(features) def test_num_samples_not_divisible_by_batch_size(self): num_samples = 5 batch_size = 2 params = {'batch_size': batch_size} input_fn, (a, b) = make_input_fn_with_labels(num_samples=num_samples) with tf.Graph().as_default(): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals( dataset, batch_size, add_padding=True) dataset_initializer = inputs.dataset_initializer() features, labels = inputs.features_and_labels() signals = inputs.signals() # With padding, all shapes are static. self.assertEqual(batch_size, features['a'].shape.as_list()[0]) with tf.compat.v1.Session() as sess: sess.run(dataset_initializer) evaluated_features, evaluated_labels, evaluated_signals = ( sess.run([features, labels, signals])) self.assertAllEqual(a[:batch_size], evaluated_features['a']) self.assertAllEqual(b[:batch_size], evaluated_labels) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) self.assertAllEqual([0.] * batch_size, evaluated_signals['padding_mask']) # This run should work as num_samples / batch_size >= 2. evaluated_features, evaluated_labels, evaluated_signals = ( sess.run([features, labels, signals])) self.assertAllEqual(a[batch_size:2 * batch_size], evaluated_features['a']) self.assertAllEqual(b[batch_size:2 * batch_size], evaluated_labels) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) self.assertAllEqual([0.] * batch_size, evaluated_signals['padding_mask']) # This is the final partial batch. evaluated_features, evaluated_labels, evaluated_signals = ( sess.run([features, labels, signals])) real_batch_size = num_samples % batch_size # Assert the real part. self.assertAllEqual(a[2 * batch_size:num_samples], evaluated_features['a'][:real_batch_size]) self.assertAllEqual(b[2 * batch_size:num_samples], evaluated_labels[:real_batch_size]) # Assert the padded part. self.assertAllEqual([0.0] * (batch_size - real_batch_size), evaluated_features['a'][real_batch_size:]) self.assertAllEqual([[0.0]] * (batch_size - real_batch_size), evaluated_labels[real_batch_size:]) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) padding = ([.0] * real_batch_size + [1.] * (batch_size - real_batch_size)) self.assertAllEqual(padding, evaluated_signals['padding_mask']) # This run should work, *but* see STOP ('1') as signals _, evaluated_signals = sess.run([features, signals]) self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) with self.assertRaises(tf.errors.OutOfRangeError): sess.run(features) def test_slice(self): num_samples = 3 batch_size = 2 params = {'batch_size': batch_size} input_fn, (a, b) = make_input_fn(num_samples=num_samples) with tf.Graph().as_default(): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals( dataset, batch_size, add_padding=True) dataset_initializer = inputs.dataset_initializer() features, _ = inputs.features_and_labels() signals = inputs.signals() sliced_features = ( tpu_estimator._PaddingSignals.slice_tensor_or_dict(features, signals)) with tf.compat.v1.Session() as sess: sess.run(dataset_initializer) result, evaluated_signals = sess.run([sliced_features, signals]) self.assertAllEqual(a[:batch_size], result['a']) self.assertAllEqual(b[:batch_size], result['b']) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) # This is the final partial batch. result, evaluated_signals = sess.run([sliced_features, signals]) self.assertEqual(1, len(result['a'])) self.assertAllEqual(a[batch_size:num_samples], result['a']) self.assertAllEqual(b[batch_size:num_samples], result['b']) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) # This run should work, *but* see STOP ('1') as signals _, evaluated_signals = sess.run([sliced_features, signals]) self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) with self.assertRaises(tf.errors.OutOfRangeError): sess.run(sliced_features) def test_slice_with_multi_invocations_per_step(self): num_samples = 3 batch_size = 2 params = {'batch_size': batch_size} input_fn, (a, b) = make_input_fn(num_samples=num_samples) with tf.Graph().as_default(): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals( dataset, batch_size, add_padding=True, num_invocations_per_step=2) dataset_initializer = inputs.dataset_initializer() features, _ = inputs.features_and_labels() signals = inputs.signals() sliced_features = ( tpu_estimator._PaddingSignals.slice_tensor_or_dict(features, signals)) with tf.compat.v1.Session() as sess: sess.run(dataset_initializer) result, evaluated_signals = sess.run([sliced_features, signals]) self.assertAllEqual(a[:batch_size], result['a']) self.assertAllEqual(b[:batch_size], result['b']) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) # This is the final partial batch. result, evaluated_signals = sess.run([sliced_features, signals]) self.assertEqual(1, len(result['a'])) self.assertAllEqual(a[batch_size:num_samples], result['a']) self.assertAllEqual(b[batch_size:num_samples], result['b']) self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) # We should see 3 continuous batches with STOP ('1') as signals and all # of them have mask 1. _, evaluated_signals = sess.run([sliced_features, signals]) self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) self.assertAllEqual([1.] * batch_size, evaluated_signals['padding_mask']) _, evaluated_signals = sess.run([sliced_features, signals]) self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) self.assertAllEqual([1.] * batch_size, evaluated_signals['padding_mask']) _, evaluated_signals = sess.run([sliced_features, signals]) self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) self.assertAllEqual([1.] * batch_size, evaluated_signals['padding_mask']) with self.assertRaises(tf.errors.OutOfRangeError): sess.run(sliced_features) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/tpu_estimator_test.py ================================================ # Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for TPUEstimator. To improve the performance, the test has been splitted into multiple parts 1. Integration tpu_estimator_integration_test 2. Model Parallellsim tpu_estimator_model_parallelism_test 3. Evaluation tpu_estimator_evaluation_test 4. Export tpu_estimator_export_test 5. Input Host v2 tpu_estimator_input_v2_test """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import functools import os import re import tempfile from absl import flags from absl.testing import parameterized import numpy as np import tensorflow as tf # pylint: disable=g-direct-tensorflow-import from tensorflow.core.protobuf import cluster_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.util import event_pb2 from tensorflow.python import data as dataset_lib from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.layers import layers from tensorflow.python.lib.io import tf_record from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.ops import variable_scope from tensorflow.python.ops.gen_array_ops import reshape from tensorflow.python.ops.losses import losses from tensorflow.python.ops.random_ops import random_uniform from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.summary import summary as summary_lib from tensorflow.python.tpu import topology as tf_topology from tensorflow.python.tpu import tpu_system_metadata as tpu_system_metadata_lib from tensorflow.python.training import moving_averages from tensorflow.python.training import session_run_hook from tensorflow.python.training import training from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.util import tf_keras from tensorflow_estimator.python.estimator.export import export from tensorflow_estimator.python.estimator.export import export_output as export_output_lib from tensorflow_estimator.python.estimator.inputs import numpy_io from tensorflow_estimator.python.estimator.tpu import tpu_config from tensorflow_estimator.python.estimator.tpu import tpu_estimator # pylint: enable=g-direct-tensorflow-import flags.DEFINE_integer('test_num_shards', 8, 'number of replicas to test') FLAGS = flags.FLAGS _TRAIN = model_fn_lib.ModeKeys.TRAIN _EVAL = model_fn_lib.ModeKeys.EVAL _PREDICT = model_fn_lib.ModeKeys.PREDICT _PER_HOST = 'per_host_sharding' _PER_SHARD = 'per_shard_sharding' _UNSHARDED = 'unsharded' _INPUT_PIPELINE_WITH_QUEUE_RUNNER = ( 'Input pipeline contains one or more QueueRunners') def events_from_file(filepath): """Returns all events in a single event file. Args: filepath: Path to the event file. Returns: A list of all tf.compat.v1.Event protos in the event file. """ records = list(tf_record.tf_record_iterator(filepath)) result = [] for r in records: event = event_pb2.Event() event.ParseFromString(r) result.append(event) return result def dense_computation(features): x = features['x'] if len(x.get_shape().as_list()) == 4: x = math_ops.reduce_sum(x, axis=[1, 2]) return layers.dense(x, 1, kernel_initializer=init_ops.zeros_initializer()) def get_model_fn(export_tpu_tensor=True, export_cpu_tensor=False, tpu_estimator_spec=True): def model_fn(features, labels, mode, params): del params loss = None train_op = None predictions = dense_computation(features) export_outputs = None if mode != _PREDICT: loss = losses.mean_squared_error(labels, predictions) optimizer = tf.compat.v1.tpu.CrossShardOptimizer( training.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, training.get_global_step()) else: if export_tpu_tensor: key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY export_outputs = { key: export_output_lib.PredictOutput({ 'prediction': predictions }) } else: export_outputs = {} if export_cpu_tensor: def host_call(predictions): classes = string_ops.as_string(predictions, name='classes') classification_output = export_output_lib.ClassificationOutput( classes=classes) export_outputs['classification'] = classification_output tf.compat.v1.tpu.outside_compilation(host_call, predictions) if tpu_estimator_spec: spec_type = tpu_estimator.TPUEstimatorSpec else: spec_type = model_fn_lib.EstimatorSpec return spec_type( mode, loss=loss, train_op=train_op, predictions={'predictions': predictions}, export_outputs=export_outputs) return model_fn def dummy_input_fn_with_dataset(dataset_size, repeat=True, x=None, batch_size=None): if batch_size is None: batch_size = dataset_size if x is None: x = np.random.normal(size=[dataset_size, 1]).astype(np.float32) labels = [[2.0]] * dataset_size dataset1 = dataset_lib.Dataset.from_tensor_slices(x) dataset2 = dataset_lib.Dataset.from_tensor_slices(labels) dataset = dataset_lib.Dataset.zip((dataset1, dataset2)) if repeat: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) def _map(x, y): return {'x': x}, y return dataset.map(_map) def dummy_input_fn(batch_size, repeat=True): dataset = dummy_input_fn_with_dataset(batch_size, repeat) iterator = dataset_ops.make_one_shot_iterator(dataset) return iterator.get_next() def create_run_config(iterations_per_loop, num_shards=None, **kwargs): return tpu_config.RunConfig( master='', tpu_config=tpu_config.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=num_shards if num_shards else FLAGS.test_num_shards, **kwargs), ) class TPUEstimatorConstructorTest(test.TestCase): def test_reserved_key(self): run_config = create_run_config(iterations_per_loop=4) params = {'batch_size': 128} with self.assertRaisesRegex(ValueError, 'are reserved keys'): tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, params=params) def test_missing_train_batch_size(self): run_config = create_run_config(iterations_per_loop=4) with self.assertRaisesRegex(ValueError, '`train_batch_size` cannot be `None`'): tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, params={}) def test_invalid_batch_size(self): run_config = create_run_config(iterations_per_loop=4) with self.assertRaisesRegex(TypeError, 'must be int'): tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=1.0) def test_batch_size_with_num_shards_for_per_core_input(self): input_fn_call_count = [0] run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=False) num_shards = run_config.tpu_config.num_shards def _input_fn(params): input_fn_call_count[0] += 1 self.assertEqual(128 // num_shards, params['batch_size']) return dummy_input_fn(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=128) self.assertEqual(0, input_fn_call_count[0]) est.train(_input_fn, steps=1) self.assertEqual(num_shards, input_fn_call_count[0]) def test_batch_size_with_num_shards_for_per_host_input(self): input_fn_call_count = [0] run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=True) def _input_fn(params): input_fn_call_count[0] += 1 self.assertEqual(128, params['batch_size']) return dummy_input_fn(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=128) self.assertEqual(0, input_fn_call_count[0]) est.train(_input_fn, steps=1) self.assertEqual(1, input_fn_call_count[0]) def test_train_batch_size_with_non_divisible_num_shards(self): run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=127) with self.assertRaisesRegex(ValueError, 'train.*must be divisible'): est.train(dummy_input_fn_with_dataset, steps=1) def test_train_batch_size_with_non_divisible_num_shards_broadcast_mode(self): input_fn_call_count = [0] run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=tpu_config.InputPipelineConfig.BROADCAST) def _input_fn(params): input_fn_call_count[0] += 1 self.assertEqual(127, params['batch_size']) return dummy_input_fn(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=127) self.assertEqual(0, input_fn_call_count[0]) est.train(_input_fn, steps=1) self.assertEqual(1, input_fn_call_count[0]) def test_eval_batch_size_with_non_divisible_num_shards(self): run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=64, eval_batch_size=127) with self.assertRaisesRegex(ValueError, 'eval.*must be divisible'): est.evaluate(dummy_input_fn_with_dataset, steps=1) def test_predict_batch_size_with_non_divisible_num_shards_broadcast_mode( self): run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=tpu_config.InputPipelineConfig.BROADCAST) def _input_fn(params): return dummy_input_fn_with_dataset(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=64, predict_batch_size=127) est.train(_input_fn, steps=1) est.predict(_input_fn) def test_predict_batch_size_with_non_divisible_num_shards(self): run_config = create_run_config(iterations_per_loop=4) def _input_fn(params): return dummy_input_fn_with_dataset(params['batch_size']) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=64, predict_batch_size=127) est.train(_input_fn, steps=1) with self.assertRaisesRegex(ValueError, 'predict.*must be divisible'): list(est.predict(_input_fn)) def test_invalid_num_shards(self): run_config = tpu_config.RunConfig( master='', tpu_config=tpu_config.TPUConfig(iterations_per_loop=2, num_shards=16)) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=128) with self.assertRaisesRegex(ValueError, 'num_shards is not set correctly'): est.train(dummy_input_fn_with_dataset, steps=1) class TPUEstimatorTPUContextTest(test.TestCase): def test_context_replicas(self): def _input_fn(params): batch_size = params['batch_size'] context = params['context'] self.assertEqual(FLAGS.test_num_shards, context.num_replicas) self.assertEqual(1, context.num_hosts) self.assertEqual(0, context.current_host) self.assertEqual(FLAGS.test_num_shards, context.num_of_replicas_per_host) return dummy_input_fn(batch_size) run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=False) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=16) est.train(_input_fn, steps=4) def _query_system(self, master_address, cluster_def, query_topology): del master_address, cluster_def, query_topology # construct an ideal, not real, topology for 4x4. topology = tf_topology.Topology( mesh_shape=[4, 4, 1, 2], device_coordinates=[ [ [0, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 1], [2, 0, 0, 0], [2, 0, 0, 1], [3, 0, 0, 0], [3, 0, 0, 1], ], [ [0, 1, 0, 0], [0, 1, 0, 1], [1, 1, 0, 0], [1, 1, 0, 1], [2, 1, 0, 0], [2, 1, 0, 1], [3, 1, 0, 0], [3, 1, 0, 1], ], [ [0, 2, 0, 0], [0, 2, 0, 1], [1, 2, 0, 0], [1, 2, 0, 1], [2, 2, 0, 0], [2, 2, 0, 1], [3, 2, 0, 0], [3, 2, 0, 1], ], [ [0, 3, 0, 0], [0, 3, 0, 1], [1, 3, 0, 0], [1, 3, 0, 1], [2, 3, 0, 0], [2, 3, 0, 1], [3, 3, 0, 0], [3, 3, 0, 1], ], ], ) return tpu_system_metadata_lib.TPUSystemMetadata( num_cores=32, num_hosts=4, num_of_cores_per_host=8, topology=topology, devices=[]) def test_num_cores_per_replica_is_not_greater_than_num_cores_per_host(self): def _input_fn(params): return dummy_input_fn(params['batch_size']) with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): FLAGS.test_num_shards = 2 run_config = create_run_config( iterations_per_loop=1, num_cores_per_replica=16) with self.assertRaisesRegex( ValueError, 'Except the PER_HOST_V2 mode, the num of cores required by ' 'model parallelism specified by TPUConfig.num_cores_per_replica ' 'should be less than or equal to the num_cores_per_host. ' 'num_cores_per_replica: 16, num_cores_per_host: 8'): est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=64) est.train(_input_fn, steps=1) def test_device_for_replica_fn(self): def _input_fn(params): batch_size = params['batch_size'] context = params['context'] with self.assertRaisesRegex( RuntimeError, 'This TPUContext instance must not be ' 'called from input_fn.'): context.device_assignment() for replica_id in range(context.num_replicas): (host_device, ordinal_id) = context.device_for_replica(replica_id) self.assertEqual('/task:0/device:CPU:0', host_device) self.assertEqual(ordinal_id, replica_id) return dummy_input_fn(batch_size) run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=True) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=16) est.train(_input_fn, steps=4) def test_input_deployment_for_per_host(self): fake_num_cores = 32 fake_num_hosts = 4 fake_num_cores_per_host = fake_num_cores // fake_num_hosts invocation_count = [0] global_batch_size = 16 * fake_num_cores def _input_fn(params): batch_size = params['batch_size'] self.assertEqual(global_batch_size // fake_num_hosts, batch_size) context = params['context'] current_invocation_count = invocation_count[0] (current_input_device, invocation_index_in_context, total_invocations, replicas_consumed_by_current_invocation) = ( context.current_input_fn_deployment()) self.assertEqual('/replica:0/task:0/device:CPU:0', current_input_device) self.assertEqual(current_invocation_count, invocation_index_in_context) self.assertEqual(current_invocation_count, context.current_host) self.assertEqual(fake_num_hosts, total_invocations) self.assertEqual(fake_num_cores_per_host, replicas_consumed_by_current_invocation) # Use the invocation_count to track the number of invocations. invocation_count[0] = current_invocation_count + 1 return dummy_input_fn(batch_size) with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): run_config = create_run_config( iterations_per_loop=4, num_shards=fake_num_cores, per_host_input_for_training=True) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=global_batch_size) # This exception is ok as we do not have sufficient TPU cores to run the # model. as far as the assert after it is correct, input pipeline checking # is done and successful. with self.assertRaisesRegex(errors.InvalidArgumentError, 'there are only 2 cores in the TPU topology'): est.train(_input_fn, steps=4) self.assertEqual(fake_num_hosts, invocation_count[0]) def test_input_deployment_for_per_host_v2(self): fake_num_cores = 32 fake_num_hosts = 4 fake_num_cores_per_host = fake_num_cores // fake_num_hosts invocation_count = [0] global_batch_size = 16 * fake_num_cores def _input_fn(params): batch_size = params['batch_size'] self.assertEqual(global_batch_size // fake_num_cores, batch_size) context = params['context'] current_invocation_count = invocation_count[0] (current_input_device, invocation_index_in_context, total_invocations, replicas_consumed_by_current_invocation) = ( context.current_input_fn_deployment()) self.assertEqual('/replica:0/task:0/device:CPU:0', current_input_device) self.assertEqual(current_invocation_count, invocation_index_in_context) self.assertEqual(fake_num_hosts, total_invocations) self.assertEqual(current_invocation_count, context.current_host) self.assertEqual(fake_num_cores_per_host, replicas_consumed_by_current_invocation) # Use the invocation_count to track the number of invocations. invocation_count[0] = current_invocation_count + 1 return dummy_input_fn_with_dataset(batch_size) with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): run_config = create_run_config( iterations_per_loop=4, num_shards=fake_num_cores, per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V2 ) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=global_batch_size) # This exception is ok as we do not have sufficient TPU cores to run the # model. as far as the assert after it is correct, input pipeline checking # is done and successful. with self.assertRaisesRegex(errors.InvalidArgumentError, 'there are only 2 cores in the TPU topology'): est.train(_input_fn, steps=4) self.assertEqual(fake_num_hosts, invocation_count[0]) def test_input_deployment_for_per_host_v2_with_model_parallelism(self): fake_num_cores = 32 fake_num_hosts = 4 fake_num_cores_per_host = fake_num_cores // fake_num_hosts num_cores_per_replica = 2 fake_num_replicas = fake_num_cores // num_cores_per_replica fake_num_replicas_per_host = ( fake_num_cores_per_host // num_cores_per_replica) invocation_count = [0] global_batch_size = 16 * fake_num_cores def _input_fn(params): batch_size = params['batch_size'] self.assertEqual(global_batch_size // fake_num_replicas, batch_size) context = params['context'] current_invocation_count = invocation_count[0] (current_input_device, invocation_index_in_context, total_invocations, replicas_consumed_by_current_invocation) = ( context.current_input_fn_deployment()) self.assertEqual('/replica:0/task:0/device:CPU:0', current_input_device) self.assertEqual(current_invocation_count, invocation_index_in_context) self.assertEqual(current_invocation_count, context.current_host) self.assertEqual(fake_num_hosts, total_invocations) self.assertEqual(fake_num_replicas_per_host, replicas_consumed_by_current_invocation) # Use the invocation_count to track the number of invocations. invocation_count[0] = current_invocation_count + 1 return dummy_input_fn_with_dataset(batch_size) with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): run_config = create_run_config( iterations_per_loop=4, num_shards=fake_num_replicas, per_host_input_for_training=tpu_config.InputPipelineConfig .PER_HOST_V2, num_cores_per_replica=num_cores_per_replica) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=global_batch_size) # This exception is ok as we do not have sufficient TPU cores to run the # model. as far as the assert after it is correct, input pipeline checking # is done and successful. with self.assertRaisesRegex(errors.InvalidArgumentError, 'there are only 2 cores in the TPU topology'): est.train(_input_fn, steps=4) self.assertEqual(fake_num_hosts, invocation_count[0]) def test_input_deployment_model_parallelism_cross_host_replica(self): fake_num_cores = 32 fake_num_hosts = 4 fake_num_cores_per_host = fake_num_cores // fake_num_hosts num_cores_per_replica = 16 self.assertGreater(num_cores_per_replica, fake_num_cores_per_host) fake_num_replicas = fake_num_cores // num_cores_per_replica host_ids = [] invocation_count = [0] global_batch_size = 16 * fake_num_cores def _input_fn(params): batch_size = params['batch_size'] self.assertEqual(global_batch_size // fake_num_replicas, batch_size) context = params['context'] current_invocation_count = invocation_count[0] (current_input_device, invocation_index_in_context, total_invocations, replicas_consumed_by_current_invocation) = ( context.current_input_fn_deployment()) self.assertEqual('/replica:0/task:0/device:CPU:0', current_input_device) self.assertEqual(current_invocation_count, invocation_index_in_context) host_ids.append(context.current_host) self.assertEqual(fake_num_replicas, total_invocations) self.assertEqual(1, replicas_consumed_by_current_invocation) # Use the invocation_count to track the number of invocations. invocation_count[0] = current_invocation_count + 1 return dummy_input_fn_with_dataset(batch_size) with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): run_config = create_run_config( iterations_per_loop=4, num_shards=fake_num_replicas, per_host_input_for_training=tpu_config.InputPipelineConfig .PER_HOST_V2, num_cores_per_replica=num_cores_per_replica) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=global_batch_size) # This exception is ok as we do not have sufficient TPU cores to run the # model. as far as the assert after it is correct, input pipeline checking # is done and successful. with self.assertRaisesRegex(errors.InvalidArgumentError, 'there are only 2 cores in the TPU topology'): est.train(_input_fn, steps=4) self.assertEqual(fake_num_replicas, invocation_count[0]) self.assertEqual([0, 2], host_ids) def test_input_deployment_for_broadcast_mode(self): invocation_count = [0] global_batch_size = 16 def _input_fn(params): batch_size = params['batch_size'] self.assertEqual(global_batch_size, batch_size) context = params['context'] current_invocation_count = invocation_count[0] (current_input_device, invocation_index_in_context, total_invocations, replicas_consumed_by_current_invocation) = ( context.current_input_fn_deployment()) self.assertEqual('/replica:0/task:0/device:CPU:0', current_input_device) self.assertEqual(current_invocation_count, invocation_index_in_context) self.assertEqual(1, total_invocations) self.assertEqual(FLAGS.test_num_shards, replicas_consumed_by_current_invocation) # Use the invocation_count to track the number of invocations. invocation_count[0] = current_invocation_count + 1 return dummy_input_fn_with_dataset(batch_size) run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=tpu_config.InputPipelineConfig.BROADCAST) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=global_batch_size) est.train(_input_fn, steps=4) self.assertEqual(1, invocation_count[0]) def test_input_deployment_for_eval_broadcast_mode(self): invocation_count = [0] global_batch_size = 16 num_cores = FLAGS.test_num_shards def _input_fn(params, is_training=True): batch_size = params['batch_size'] self.assertEqual(global_batch_size, batch_size) context = params['context'] current_invocation_count = invocation_count[0] (current_input_device, invocation_index_in_context, total_invocations, replicas_consumed_by_current_invocation) = ( context.current_input_fn_deployment()) self.assertEqual('/replica:0/task:0/device:CPU:0', current_input_device) if is_training: self.assertEqual(current_invocation_count, invocation_index_in_context) else: self.assertEqual(current_invocation_count - 1, invocation_index_in_context) self.assertEqual(1, total_invocations) self.assertEqual(num_cores, replicas_consumed_by_current_invocation) # Use the invocation_count to track the number of invocations. invocation_count[0] = current_invocation_count + 1 return dummy_input_fn_with_dataset(batch_size) run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=True, eval_training_input_configuration=tpu_config.InputPipelineConfig.SLICED) def _assert_model_fn(features, labels, mode, params): actual_model_fn = get_model_fn() per_replica_batch_size = params['batch_size'] self.assertEqual(per_replica_batch_size, global_batch_size // num_cores) return actual_model_fn(features, labels, mode, params) est = tpu_estimator.TPUEstimator( model_fn=_assert_model_fn, config=run_config, train_batch_size=global_batch_size, eval_batch_size=global_batch_size) est.train(functools.partial(_input_fn, is_training=True), steps=1) self.assertEqual(1, invocation_count[0]) est.evaluate(functools.partial(_input_fn, is_training=False), steps=1) self.assertEqual(2, invocation_count[0]) def test_input_deployment_for_per_core(self): fake_num_cores = 32 fake_num_hosts = 4 fake_num_cores_per_host = fake_num_cores // fake_num_hosts invocation_count = [0] global_batch_size = 16 * fake_num_cores def _input_fn(params): batch_size = params['batch_size'] self.assertEqual(global_batch_size // fake_num_cores, batch_size) context = params['context'] current_invocation_count = invocation_count[0] (current_input_device, invocation_index_in_context, total_invocations, replicas_consumed_by_current_invocation) = ( context.current_input_fn_deployment()) self.assertEqual('/replica:0/task:0/device:CPU:0', current_input_device) self.assertEqual(current_invocation_count, invocation_index_in_context) self.assertEqual(current_invocation_count // fake_num_cores_per_host, context.current_host) self.assertEqual(fake_num_cores, total_invocations) self.assertEqual(1, replicas_consumed_by_current_invocation) # Use the invocation_count to track the number of invocations. invocation_count[0] = current_invocation_count + 1 return dummy_input_fn(batch_size) with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): run_config = create_run_config( iterations_per_loop=4, num_shards=fake_num_cores, per_host_input_for_training=False) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=global_batch_size) # This exception is ok as we do not have sufficient TPU cores to run the # model. as far as the assert after it is correct, input pipeline checking # is done and successful. with self.assertRaisesRegex(errors.InvalidArgumentError, 'there are only 2 cores in the TPU topology'): est.train(_input_fn, steps=4) self.assertEqual(fake_num_cores, invocation_count[0]) def test_hparams_as_params(self): def _input_fn(params): batch_size = params['batch_size'] context = params['context'] self.assertEqual(FLAGS.test_num_shards, context.num_replicas) return dummy_input_fn(batch_size) run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=False) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), params={}, config=run_config, train_batch_size=16) est.train(_input_fn, steps=4) class TPUEstimatorInputFnTest(parameterized.TestCase): def setUp(self): # TODO(b/65703635): Remove setting/restoring the constant here. # As we are transitioning from deprecated mode to new mode. We have to # test both cases to ensure we do not break clients. super(TPUEstimatorInputFnTest, self).setUp() self._old_value = tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP def tearDown(self): super(TPUEstimatorInputFnTest, self).tearDown() tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = self._old_value # Use 10 to test TPUEstimator is correctly concatenating small tensors. @parameterized.parameters(1, 10) def test_succeed_with_dataset(self, num_features): tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = True def _input_fn(params): batch_size = params['batch_size'] x = np.random.normal(size=[batch_size, 1]).astype(np.float32) x1 = np.random.normal(size=[batch_size, 1]).astype(np.int32) labels = [[2.0]] * batch_size dataset1 = dataset_lib.Dataset.from_tensor_slices(x) dataset2 = dataset_lib.Dataset.from_tensor_slices(x1) dataset3 = dataset_lib.Dataset.from_tensor_slices(labels) dataset = dataset_lib.Dataset.zip((dataset1, dataset2, dataset3)) def _map_fn(x, x1, y): xs = {} for i in range(num_features): xs['x' * (i + 1)] = array_ops.identity(x) xs['x1' * (i + 1)] = array_ops.identity(x1) return xs, y dataset = dataset.map(_map_fn) dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) return dataset run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=True) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=16) est.train(_input_fn, steps=4) def test_succeed_with_input_return_features_and_labels_with_dataset(self): tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = True def _input_fn(params): batch_size = params['batch_size'] return dummy_input_fn(batch_size) run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=False) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=16) est.train(_input_fn, steps=4) def test_fail_with_queue_based_input_fn_in_while_loop(self): tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = True data = np.arange(40, dtype=np.float32).reshape(40, 1) x = {'x': data} y = data * 2.0 def input_fn(params): batch_size = params['batch_size'] return numpy_io.numpy_input_fn( x, y, batch_size=batch_size, shuffle=False, num_epochs=None)() run_config = create_run_config( iterations_per_loop=4, per_host_input_for_training=False) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=16) with self.assertRaisesRegex(RuntimeError, _INPUT_PIPELINE_WITH_QUEUE_RUNNER): est.train(input_fn, steps=4) def test_warning_with_queue_based_input_fn(self): tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = False data = np.arange(40, dtype=np.float32).reshape(40, 1) x = {'x': data} y = data * 2.0 def input_fn(params): batch_size = params['batch_size'] return numpy_io.numpy_input_fn( x, y, batch_size=batch_size, shuffle=False, num_epochs=None)() run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=16) with test.mock.patch.object(logging, 'warn') as mock_log: est.train(input_fn, steps=4) self.assertRegex( str(mock_log.call_args), _INPUT_PIPELINE_WITH_QUEUE_RUNNER) def test_nested_inputs_dict(self): self.help_test_nested_inputs(nest_type='dict') def test_nested_inputs_tuple(self): self.help_test_nested_inputs(nest_type='tuple') def test_nested_inputs_namedtuple(self): self.help_test_nested_inputs(nest_type='namedtuple') def help_test_nested_inputs(self, nest_type): self.assertIn(nest_type, ['dict', 'tuple', 'namedtuple']) tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = True class MyTuple(collections.namedtuple('MyTuple', ['a', 'b'])): pass def model_fn(features, labels, mode, params): del params if nest_type == 'dict': inputs = features['x'] elif nest_type == 'tuple': inputs = features elif nest_type == 'namedtuple': inputs = tuple(features) else: inputs = features predictions = layers.dense( inputs[0], 1, kernel_initializer=init_ops.zeros_initializer()) loss = losses.mean_squared_error(labels, predictions) export_outputs = None optimizer = tf.compat.v1.tpu.CrossShardOptimizer( training.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, training.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode, loss=loss, train_op=train_op, export_outputs=export_outputs) def _input_fn(params): batch_size = params['batch_size'] x = dataset_ops.Dataset.from_tensor_slices( (random_uniform([4, 1]), random_uniform([4, 1], maxval=100, dtype=dtypes.float32))) dataset_labels = dataset_ops.Dataset.from_tensor_slices( random_uniform([4, 1])) dataset = dataset_ops.Dataset.zip((x, dataset_labels)) def _map_fn(x, y): if nest_type == 'dict': return {'x': x}, y elif nest_type == 'tuple': return tuple(x), y elif nest_type == 'namedtuple': return MyTuple(*x), y else: return x, y dataset = dataset.map(_map_fn) dataset = dataset.batch(batch_size, drop_remainder=True) dataset = dataset.repeat(-1) return dataset run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=model_fn, config=run_config, train_batch_size=4) est.train(_input_fn, steps=4) class _DummyHook(session_run_hook.SessionRunHook): """Check whether this hook is called or not.""" def __init__(self): """Constructs the run hook.""" self._called = False def after_create_session(self, sees, coord): del sees, coord self._called = True @property def called(self): return self._called class TPUEstimatorModelFnTest(test.TestCase): def test_succeed_with_missing_labels(self): def _model_fn(features, mode, params): labels = features.pop('y') return get_model_fn()(features, labels, mode, params) def _input_fn_without_labels(params): batch_size = params['batch_size'] features, labels = dummy_input_fn(batch_size) return {'x': features['x'], 'y': labels} run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16) est.train(_input_fn_without_labels, steps=1) def test_succeed_with_log_step_count_steps_none(self): def _model_fn(features, mode, params): labels = features.pop('y') return get_model_fn()(features, labels, mode, params) def _input_fn_without_labels(params): batch_size = params['batch_size'] features, labels = dummy_input_fn(batch_size) return {'x': features['x'], 'y': labels} run_config = create_run_config(iterations_per_loop=4) run_config = run_config.replace(log_step_count_steps=None) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16) est.train(_input_fn_without_labels, steps=1) def test_missing_labels_in_model_fn_not_input_fn(self): def _model_fn(features, mode, params): del features, mode, params # unused. return tpu_estimator.TPUEstimatorSpec() def _input_fn(params): return dummy_input_fn(params['batch_size']) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16) with self.assertRaisesRegex( ValueError, 'model_fn does not take labels, but input_fn returns labels'): est.train(_input_fn, steps=1) def test_missing_params(self): def _model_fn(features, labels, mode): del features, labels, mode # unused. return tpu_estimator.TPUEstimatorSpec() def _input_fn(params): return dummy_input_fn(params['batch_size']) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16) with self.assertRaisesRegex(ValueError, 'model_fn .* does not include params'): est.train(_input_fn, steps=1) def test_invalid_arg(self): def _model_fn(features, labels, invalid_arg): del features, labels, invalid_arg # unused. return tpu_estimator.TPUEstimatorSpec() with self.assertRaisesRegex(ValueError, 'model_fn .* has following not expected args'): run_config = create_run_config(iterations_per_loop=4) tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16) def test_valid_training_hook(self): run_config = create_run_config(iterations_per_loop=4) dummy_hook = _DummyHook() def _input_fn(params): return dummy_input_fn(params['batch_size']) def _model_fn(features, labels, mode, params): spec = get_model_fn()(features, labels, mode, params) return model_fn_lib.EstimatorSpec( mode=mode, train_op=spec.train_op, loss=spec.loss, training_hooks=[dummy_hook]) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=2 * FLAGS.test_num_shards) est.train(input_fn=_input_fn, steps=1) self.assertTrue(dummy_hook.called) def test_valid_eval_hook(self): run_config = create_run_config(iterations_per_loop=4) dummy_hook = _DummyHook() def _input_fn(params): return dummy_input_fn(params['batch_size']) def _model_fn(features, labels, mode, params): spec = get_model_fn()(features, labels, mode, params) return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=spec.train_op, loss=spec.loss, evaluation_hooks=[dummy_hook]) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=2 * FLAGS.test_num_shards, eval_batch_size=2 * FLAGS.test_num_shards) est.evaluate(input_fn=_input_fn, steps=1) self.assertTrue(dummy_hook.called) def test_valid_prediction_hook(self): run_config = create_run_config(iterations_per_loop=4) dummy_hook = _DummyHook() def _input_fn(params): return dummy_input_fn_with_dataset(params['batch_size'], repeat=False) def _model_fn(features, labels, mode, params): del labels, params predictions = dense_computation(features) return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=None, loss=None, predictions={'predictions': predictions}, prediction_hooks=[dummy_hook]) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=2 * FLAGS.test_num_shards, predict_batch_size=2 * FLAGS.test_num_shards) list(est.predict(input_fn=_input_fn)) self.assertTrue(dummy_hook.called) def test_invalid_training_chief_hook(self): run_config = create_run_config(iterations_per_loop=4) dummy_hook = session_run_hook.SessionRunHook() def _input_fn(params): return dummy_input_fn(params['batch_size']) def _model_fn(features, labels, mode, params): spec = get_model_fn()(features, labels, mode, params) return model_fn_lib.EstimatorSpec( mode=mode, train_op=spec.train_op, loss=spec.loss, training_chief_hooks=[dummy_hook]) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=2 * FLAGS.test_num_shards) with self.assertRaisesRegex( ValueError, 'training_chief_hooks returned by ' 'EstimatorSpec is not supported in ' 'TPUEstimator'): est.train(input_fn=_input_fn, steps=1) def test_access_device_assignment_in_model_fn(self): def _model_fn(features, labels, mode, params): ctx = params['context'] self.assertIsInstance(ctx.device_assignment, tf.tpu.experimental.DeviceAssignment) return get_model_fn()(features, labels, mode, params) def _input_fn(params): return dummy_input_fn(params['batch_size']) FLAGS.test_num_shards //= 2 run_config = create_run_config( iterations_per_loop=4, num_cores_per_replica=2) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16) est.train(_input_fn, steps=4) FLAGS.test_num_shards *= 2 def test_fail_to_call_deployment_in_model_fn(self): def _model_fn(features, labels, mode, params): ctx = params['context'] with self.assertRaisesRegex( RuntimeError, 'This TPUContext instance must not be ' 'called from model_fn.'): ctx.current_input_fn_deployment() return get_model_fn()(features, labels, mode, params) def _input_fn(params): return dummy_input_fn(params['batch_size']) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16) est.train(_input_fn, steps=4) class TPUEstimatorPredictionTest(test.TestCase): def _test_train_and_predict(self, run_config, dataset_size, input_tensor=None): """Trains the model and returns the list of global steps after each loop.""" def train_input_fn(params): return dummy_input_fn_with_dataset( dataset_size, repeat=True, x=input_tensor, batch_size=params['batch_size']) def predict_input_fn(params): return dummy_input_fn_with_dataset( dataset_size, repeat=False, x=input_tensor, batch_size=params['batch_size']) def _model_fn(features, labels, mode, params): return get_model_fn()(features, labels, mode, params) batch_size = 16 est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=batch_size, eval_batch_size=batch_size, predict_batch_size=batch_size) est.train(train_input_fn, steps=1) predictions = list(est.predict(predict_input_fn)) if (run_config.tpu_config.per_host_input_for_training == tpu_config .InputPipelineConfig.BROADCAST): expected_size = batch_size elif (run_config.tpu_config.per_host_input_for_training == tpu_config .InputPipelineConfig.PER_HOST_V2): expected_size = dataset_size else: expected_size = batch_size self.assertEqual(expected_size, len(predictions)) def _construct_run_config(self, mode, num_shards=2, input_partition_dims=None, num_cores_per_replica=None): return create_run_config( iterations_per_loop=4, num_shards=num_shards, per_host_input_for_training=mode, input_partition_dims=input_partition_dims, num_cores_per_replica=num_cores_per_replica) def test_train_and_predict_per_host_v1(self): self._test_train_and_predict( self._construct_run_config(tpu_config.InputPipelineConfig.PER_HOST_V1), 16) def test_train_and_predict_per_host_v2_evenly_distributed(self): self._test_train_and_predict( self._construct_run_config(tpu_config.InputPipelineConfig.PER_HOST_V2), 16) def test_train_and_predict_per_host_v2_not_evenly_distributed(self): self._test_train_and_predict( self._construct_run_config(tpu_config.InputPipelineConfig.PER_HOST_V2), 24) def test_train_and_predict_with_input_partition(self): self._test_train_and_predict( self._construct_run_config( tpu_config.InputPipelineConfig.PER_HOST_V2, num_shards=1, input_partition_dims=[{ 'x': [1, 2, 1, 1] }, None], num_cores_per_replica=2), 16, np.zeros((16, 32, 32, 3), dtype=np.float32)) def test_train_and_predict_broadcast(self): self._test_train_and_predict( self._construct_run_config(tpu_config.InputPipelineConfig.BROADCAST), 16) def test_non_static_shape(self): def predict_input_fn(params): return dummy_input_fn_with_dataset(params['batch_size'], repeat=False) def _model_fn(features, labels, mode, params): spec = get_model_fn()(features, labels, mode, params) spec.predictions['dummy'] = array_ops.placeholder( dtypes.float32, shape=(None, 24)) return spec batch_size = 16 run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=batch_size, eval_batch_size=batch_size, predict_batch_size=batch_size) with self.assertRaisesRegex(ValueError, 'should be static'): list(est.predict(predict_input_fn)) def test_predict_on_cpu(self): """Trains the model and returns the list of global steps after each loop.""" def train_input_fn(params): return dummy_input_fn_with_dataset(params['batch_size'], repeat=True) def predict_input_fn(params): # A fixed input x = np.linspace( 0.0, 100.0, num=batch_size).reshape(batch_size, 1).astype(np.float32) return dummy_input_fn_with_dataset( params['batch_size'], repeat=False, x=x) def _model_fn(features, labels, mode, params): return get_model_fn()(features, labels, mode, params) batch_size = 16 run_config = create_run_config(iterations_per_loop=4) tpu_est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=batch_size, eval_batch_size=batch_size, predict_batch_size=batch_size, use_tpu=True) tpu_est.train(train_input_fn, steps=1) tpu_predictions = [ x['predictions'] for x in tpu_est.predict(predict_input_fn) ] self.assertEqual(batch_size * 1, len(tpu_predictions)) cpu_est = tpu_estimator.TPUEstimator( model_dir=tpu_est.model_dir, # To load the ckpt. model_fn=_model_fn, config=run_config, train_batch_size=batch_size, eval_batch_size=batch_size, predict_batch_size=batch_size, use_tpu=False) cpu_predictions = [ x['predictions'] for x in cpu_est.predict(predict_input_fn) ] self.assertEqual(batch_size * 1, len(cpu_predictions)) self.assertAllClose(tpu_predictions, cpu_predictions, atol=0.01) def test_train_and_export(self): def train_input_fn(params): return dummy_input_fn_with_dataset(params['batch_size'], repeat=True) def _model_fn(features, labels, mode, params): return get_model_fn()(features, labels, mode, params) batch_size = 16 run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=batch_size, eval_batch_size=batch_size, predict_batch_size=1) est.train(train_input_fn, steps=1) # Even though the predict_batch_size is 1, not divisible by the num_shards # (2 in this case), export_savedmodel should not trigger the TPU validation. # This test ensures that the predict mode is handled correctly inside # TPUEstimator. feature_spec = {'x': parsing_ops.FixedLenFeature([1], dtypes.float32)} serving_input_receiver_fn = ( export.build_parsing_serving_input_receiver_fn(feature_spec)) est.export_saved_model( tempfile.mkdtemp(dir=self.get_temp_dir()), serving_input_receiver_fn) class TPUEstimatorTrainingTest(test.TestCase): def _train_and_return_global_steps(self, iterations_per_loop, steps=None, max_steps=None, pre_train_steps=None): """Trains the model and returns the list of global steps after each loop.""" def input_fn(params): return dummy_input_fn(params['batch_size']) def _model_fn(features, labels, mode, params): return get_model_fn()(features, labels, mode, params) run_config = create_run_config(iterations_per_loop=iterations_per_loop) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=16, eval_batch_size=16) class _TrainStepCheckHook(session_run_hook.SessionRunHook): """Check eval step counter after one session.run.""" def __init__(self): """Constructs the run hook.""" self._global_steps = [] @property def global_steps(self): return self._global_steps def after_run(self, run_context, run_values): global_step = run_context.session.run(training.get_global_step()) self._global_steps.append(global_step) if pre_train_steps: est.train(input_fn, steps=pre_train_steps) hook = _TrainStepCheckHook() est.train(input_fn, steps=steps, max_steps=max_steps, hooks=[hook]) return hook.global_steps def test_train_steps_not_divisible_by_iterations(self): # From scratch. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=4, steps=10) self.assertEqual([4, 8, 10], global_steps_per_loop) # From existing checkpoint. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=4, steps=10, pre_train_steps=3) self.assertEqual([7, 11, 13], global_steps_per_loop) def test_train_steps_divisible_by_iterations(self): # From scratch. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=4, steps=12) self.assertEqual([4, 8, 12], global_steps_per_loop) # From existing checkpoint. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=4, steps=12, pre_train_steps=3) self.assertEqual([7, 11, 15], global_steps_per_loop) def test_train_steps_with_large_iterations(self): # From scratch. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=40, steps=12) self.assertEqual([12], global_steps_per_loop) # From existing checkpoint. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=40, steps=12, pre_train_steps=3) self.assertEqual([15], global_steps_per_loop) def test_train_max_steps_not_divisible_by_iterations(self): # From scratch. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=4, max_steps=10) self.assertEqual([4, 8, 10], global_steps_per_loop) # From existing checkpoint. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=4, max_steps=10, pre_train_steps=3) self.assertEqual([7, 10], global_steps_per_loop) def test_train_max_steps_divisible_by_iterations(self): # From scratch. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=4, max_steps=12) self.assertEqual([4, 8, 12], global_steps_per_loop) # From existing checkpoint. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=4, max_steps=15, pre_train_steps=3) self.assertEqual([7, 11, 15], global_steps_per_loop) def test_train_max_steps_with_large_iterations(self): # From scratch. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=40, max_steps=12) self.assertEqual([12], global_steps_per_loop) # From existing checkpoint. global_steps_per_loop = self._train_and_return_global_steps( iterations_per_loop=40, max_steps=12, pre_train_steps=3) self.assertEqual([12], global_steps_per_loop) def test_error_out_if_train_steps_is_float(self): with self.assertRaisesRegex(TypeError, 'must be int'): self._train_and_return_global_steps(iterations_per_loop=40, steps=12.3) def test_error_out_if_train_steps_is_invalid(self): with self.assertRaisesRegex(ValueError, 'Must specify.*> 0'): self._train_and_return_global_steps(iterations_per_loop=40, steps=-32) def test_error_out_if_train_max_steps_is_float(self): with self.assertRaisesRegex(TypeError, 'must be int'): self._train_and_return_global_steps( iterations_per_loop=40, max_steps=12.3) def test_error_out_if_train_max_steps_is_invalid(self): with self.assertRaisesRegex(ValueError, 'Must specify.*> 0'): self._train_and_return_global_steps(iterations_per_loop=40, max_steps=-32) def test_warm_starts(self): def _make_model_fn(x, use_tpu): def _variable_creating_model_fn(features, labels, mode, params): del params loss = None train_op = None variable_scope.get_variable('x', initializer=x) predictions = dense_computation(features) loss = losses.mean_squared_error(labels, predictions) optimizer = training.GradientDescentOptimizer(learning_rate=0.5) if use_tpu: optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize(loss, training.get_global_step()) if use_tpu: return tpu_estimator.TPUEstimatorSpec( mode, loss=loss, train_op=train_op) else: return model_fn_lib.EstimatorSpec( mode, loss=constant_op.constant(1.), train_op=state_ops.assign_add(training.get_global_step(), 1)) return _variable_creating_model_fn def input_fn(params): return dummy_input_fn(params.get('batch_size', 16)) run_config = create_run_config(iterations_per_loop=1) tpu_est = tpu_estimator.TPUEstimator( model_fn=_make_model_fn(42., use_tpu=True), config=run_config, train_batch_size=16, eval_batch_size=16) tpu_est.train(input_fn, steps=10) warm_started_est = estimator_lib.Estimator( model_fn=_make_model_fn(36., use_tpu=False), warm_start_from=tpu_est.model_dir) warm_started_est.train(input_fn, steps=5) # warm_start is called after the model_fn, so x should have the value # from the checkpoint. self.assertEqual(42., warm_started_est.get_variable_value('x')) class TPUEstimatorValidationTest(parameterized.TestCase, test.TestCase): def _query_system(self, master_address, cluster_def, query_topology): del master_address, cluster_def, query_topology return tpu_system_metadata_lib.TPUSystemMetadata( num_cores=16, num_hosts=2, num_of_cores_per_host=8, topology=None, devices=[]) def test_error_if_cross_replica_sum_missing(self): def _input_fn(params): return dummy_input_fn(params['batch_size']) def _model_fn(features, labels, params): del params predictions = layers.dense( features['x'], 1, kernel_initializer=init_ops.zeros_initializer()) loss = losses.mean_squared_error(labels, predictions) optimizer = training.GradientDescentOptimizer(learning_rate=0.5) train_op = optimizer.minimize(loss, training.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode=None, loss=loss, train_op=train_op) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, train_batch_size=8, config=run_config, params={}) with self.assertRaisesRegex(ValueError, 'model training on TPUs'): est.train(input_fn=_input_fn, steps=1) def test_no_error_if_cross_replica_sum_present(self): def _input_fn(params): return dummy_input_fn(params['batch_size']) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), train_batch_size=8, config=run_config, params={}) est.train(input_fn=_input_fn, steps=1) def test_error_dynamic_shape_tensor_features_for_model(self): """Asserting that features Tensor to TPUEstimator model has static shape. """ def _input_fn(params): features = reshape( math_ops.range(params['batch_size'] * 64, dtype=dtypes.float32), (params['batch_size'], 64)) # Make features with dynamic shape by the help of random padding. padding = random_uniform([], minval=0, maxval=10, dtype=dtypes.int32) features = array_ops.pad(features, [(0, 0), (0, padding)]) return dataset_lib.Dataset.from_tensor_slices( (features, math_ops.range(params['batch_size']) % 10)).repeat().batch( 16, drop_remainder=True) def _model_fn(features, labels, mode, params): del labels del params if mode == _PREDICT: return tpu_estimator.TPUEstimatorSpec( mode=mode, predictions={'value': features}) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, train_batch_size=8, config=run_config, predict_batch_size=16, params={}) with self.assertRaisesRegex(ValueError, 'features.*must have static'): list(est.predict(_input_fn)) def test_error_dynamic_shape_dict_tensor_features_for_model(self): """Asserting that features dict to TPUEstimator model has static shape. """ def _input_fn_dict(params): features = reshape( math_ops.range(params['batch_size'] * 64, dtype=dtypes.float32), (params['batch_size'], 64)) # Make features with dynamic shape by the help of random padding. padding = random_uniform([], minval=0, maxval=10, dtype=dtypes.int32) features = array_ops.pad(features, [(0, 0), (0, padding)]) dataset = dataset_lib.Dataset.from_tensor_slices(features) dataset = dataset.map(lambda v: {'key': v}) return dataset.repeat().batch(16, drop_remainder=True) def _model_fn(features, labels, mode, params): del labels del params if mode == _PREDICT: return tpu_estimator.TPUEstimatorSpec( mode=mode, predictions={'value': features['key']}) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, train_batch_size=8, config=run_config, predict_batch_size=16, params={}) with self.assertRaisesRegex(ValueError, 'features.*must have static.*'): list(est.predict(_input_fn_dict)) def test_error_dynamic_shape_tensor_labels_for_model(self): """Asserting that labels to TPUEstimator model has static shape. """ def _input_fn(params): features = reshape( math_ops.range(params['batch_size'] * 64, dtype=dtypes.float32), (params['batch_size'], 64)) labels = reshape( math_ops.range(params['batch_size'] * 64, dtype=dtypes.float32), (params['batch_size'], 64)) # Make labels with dynamic shape by the help of random padding. padding = random_uniform([], minval=0, maxval=10, dtype=dtypes.int32) labels = array_ops.pad(labels, [(0, 0), (0, padding)]) dataset = dataset_lib.Dataset.from_tensor_slices((features, labels)) return dataset.repeat().batch(16, drop_remainder=True) def _model_fn(features, labels, mode, params): del labels del params if mode == _PREDICT: return tpu_estimator.TPUEstimatorSpec( mode=mode, predictions={'value': features}) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, train_batch_size=8, config=run_config, predict_batch_size=16, params={}) with self.assertRaisesRegex(ValueError, 'labels.*must have static'): list(est.predict(_input_fn)) def test_error_dynamic_shape_dict_tensor_labels_for_model(self): """Asserting that labels dict to TPUEstimator model has static shape. """ def _input_fn_dict(params): features = reshape( math_ops.range(params['batch_size'] * 64, dtype=dtypes.float32), (params['batch_size'], 64)) labels = reshape( math_ops.range(params['batch_size'] * 64, dtype=dtypes.float32), (params['batch_size'], 64)) # Make labels with dynamic shape by the help of random padding. padding = random_uniform([], minval=0, maxval=10, dtype=dtypes.int32) labels = array_ops.pad(labels, [(0, 0), (0, padding)]) dataset = dataset_lib.Dataset.from_tensor_slices((features, labels)) dataset = dataset.map(lambda f, l: ({'fkey': f}, {'lkey': l})) return dataset.repeat().batch(16, drop_remainder=True) def _model_fn(features, labels, mode, params): del labels del params if mode == _PREDICT: return tpu_estimator.TPUEstimatorSpec( mode=mode, predictions={'value': features['fkey']}) run_config = create_run_config(iterations_per_loop=4) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, train_batch_size=8, config=run_config, predict_batch_size=16, params={}) with self.assertRaisesRegex(ValueError, 'labels.*must have static*. shape'): list(est.predict(_input_fn_dict)) def test_error_none_eval_batch_size_for_evaluation_mode(self): def _input_fn(params): return dummy_input_fn(params['batch_size']) with self.assertRaisesRegex( ValueError, 'eval_batch_size in TPUEstimator constructor cannot be `None`'): est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=create_run_config(iterations_per_loop=4), train_batch_size=64, use_tpu=True) est.evaluate(_input_fn, steps=1) def test_error_none_predict_batch_size_for_prediction_mode(self): def _input_fn(params): return dummy_input_fn(params['batch_size']) with self.assertRaisesRegex( ValueError, 'predict_batch_size in TPUEstimator constructor cannot be `None`'): est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=create_run_config(iterations_per_loop=4), train_batch_size=64, use_tpu=True) list(est.predict(_input_fn)) @parameterized.parameters( (tpu_config.InputPipelineConfig.PER_HOST_V1, 'evaluate'), (tpu_config.InputPipelineConfig.PER_HOST_V1, 'predict'), (tpu_config.InputPipelineConfig.PER_HOST_V2, 'predict')) def test_error_num_hosts_and_replicas_larger_than_1_in_eval_and_predict_mode( self, input_pipeline_mode, predict_or_evaluate): def _input_fn(params): return dummy_input_fn(params['batch_size']) with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): run_config = create_run_config( iterations_per_loop=1, num_cores_per_replica=8, per_host_input_for_training=input_pipeline_mode) if predict_or_evaluate == 'evaluate': expected_error_re = ('TPUEstimator.evaluate is only supported ' 'under three conditions') else: expected_error_re = ('TPUEstimator.predict is only supported ' 'under three conditions') with self.assertRaisesRegex(ValueError, expected_error_re): est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=32, eval_batch_size=32, predict_batch_size=32, use_tpu=True) if predict_or_evaluate == 'evaluate': est.evaluate(_input_fn, steps=1) else: list(est.predict(_input_fn)) def test_evaluate_1host_and_replicas_larger_than_1_with_PER_HOST_V2( self): fake_num_cores = 32 def _input_fn(params): batch_size = params['batch_size'] x = np.random.normal(size=[batch_size, 20]).astype(np.float32) return dummy_input_fn_with_dataset(batch_size, repeat=False, x=x) with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): run_config = create_run_config( iterations_per_loop=4, num_shards=fake_num_cores // 2, per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V2 ) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=32, eval_batch_size=32, predict_batch_size=32, use_tpu=True) # This exception is ok as we do not have sufficient TPU cores to run the # model. with self.assertRaisesRegex(errors.InvalidArgumentError, 'there are only 2 cores in the TPU topology'): est.evaluate(_input_fn, steps=1) @parameterized.parameters( (tpu_config.InputPipelineConfig.BROADCAST, 'evaluate'), (tpu_config.InputPipelineConfig.PER_HOST_V1, 'evaluate'), (tpu_config.InputPipelineConfig.PER_HOST_V2, 'evaluate'), (tpu_config.InputPipelineConfig.BROADCAST, 'predict'), (tpu_config.InputPipelineConfig.PER_HOST_V1, 'predict'), (tpu_config.InputPipelineConfig.PER_HOST_V2, 'predict')) def test_no_error_1host_1replica_in_eval_and_predict_mode( self, input_pipeline_mode, predict_or_evaluate): def _input_fn(params): return dummy_input_fn_with_dataset( dataset_size=params['batch_size'], repeat=False) FLAGS.test_num_shards = None run_config = create_run_config( iterations_per_loop=1, num_cores_per_replica=2, per_host_input_for_training=input_pipeline_mode) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=32, eval_batch_size=32, predict_batch_size=32, use_tpu=True) if predict_or_evaluate == 'evaluate': est.evaluate(_input_fn, steps=1) else: list(est.predict(_input_fn)) class TPUConfigTest(test.TestCase): def _create_ctx(self, run_config, mode=_TRAIN): est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=16) with est._ctx.with_mode(mode) as ctx: return ctx def test_no_cluster_spec(self): run_config = tpu_config.RunConfig() ctx = self._create_ctx(run_config) self.assertIsNone(ctx.master_job) ctx = self._create_ctx(run_config, mode=_EVAL) self.assertIsNone(ctx.master_job) run_config = tpu_config.RunConfig(master='grpc://10.4.5.7:8470') ctx = self._create_ctx(run_config) self.assertEqual('tpu_worker', ctx.master_job) ctx = self._create_ctx(run_config, mode=_EVAL) self.assertEqual('tpu_worker', ctx.master_job) run_config = tpu_config.RunConfig( master='grpc://10.4.5.7:8470', evaluation_master='grpc://10.5.6.7:8470') ctx = self._create_ctx(run_config) self.assertEqual('tpu_worker', ctx.master_job) ctx = self._create_ctx(run_config, mode=_EVAL) self.assertEqual('tpu_worker', ctx.master_job) def test_cluster_spec_prop(self): cluster_def = cluster_pb2.ClusterDef() worker_job = cluster_def.job.add() worker_job.name = 'worker' worker_job.tasks[0] = 'grpc://10.2.3.4:8470' session_config = config_pb2.ConfigProto(cluster_def=cluster_def) run_config = tpu_config.RunConfig( session_config=session_config, master='grpc://10.2.3.4:8470') ctx = self._create_ctx(run_config) self.assertEqual('worker', ctx.master_job) def test_cluster_spec_prop_multi_jobs(self): cluster_def = cluster_pb2.ClusterDef() worker_job = cluster_def.job.add() worker_job.name = 'worker' worker_job.tasks[0] = 'grpc://10.2.3.4:8470' coordinator_job = cluster_def.job.add() coordinator_job.name = 'coordinator' coordinator_job.tasks[0] = 'grpc://10.2.3.4:8470' session_config = config_pb2.ConfigProto(cluster_def=cluster_def) run_config = tpu_config.RunConfig( session_config=session_config, master='grpc://10.2.3.4:8470') ctx = self._create_ctx(run_config) self.assertEqual('worker', ctx.master_job) def test_cluster_spec_prop_cannot_infer(self): # No coordinator. cluster_def = cluster_pb2.ClusterDef() worker_job = cluster_def.job.add() worker_job.name = 'worker' worker_job.tasks[0] = 'grpc://10.2.3.4:8470' coordinator_job = cluster_def.job.add() coordinator_job.name = 'other_worker' coordinator_job.tasks[0] = 'grpc://10.2.3.4:8470' session_config = config_pb2.ConfigProto(cluster_def=cluster_def) run_config = tpu_config.RunConfig( session_config=session_config, master='grpc://10.2.3.4:8470') with self.assertRaises(ValueError): ctx = self._create_ctx(run_config) ctx.master_job # pylint:disable=pointless-statement # 2 non-coordinator jobs. cluster_def = cluster_pb2.ClusterDef() worker_job = cluster_def.job.add() worker_job.name = 'worker' worker_job.tasks[0] = 'grpc://10.2.3.4:8470' other_worker_job = cluster_def.job.add() other_worker_job.name = 'other_worker' other_worker_job.tasks[0] = 'grpc://10.2.3.5:8470' coordinator_job = cluster_def.job.add() coordinator_job.name = 'coordinator' coordinator_job.tasks[0] = 'grpc://10.2.3.4:8470' session_config = config_pb2.ConfigProto(cluster_def=cluster_def) run_config = tpu_config.RunConfig( session_config=session_config, master='grpc://10.2.3.4:8470') with self.assertRaises(ValueError): ctx = self._create_ctx(run_config) ctx.master_job # pylint:disable=pointless-statement def test_session_config_none(self): run_config = tpu_config.RunConfig() self.assertIsNone(run_config.session_config) ctx = self._create_ctx(run_config) self.assertIsNone(ctx.master_job) run_config = tpu_config.RunConfig(master='grpc://10.2.3.4:8470') self.assertIsNone(run_config.session_config) ctx = self._create_ctx(run_config) self.assertEqual('tpu_worker', ctx.master_job) def test_override_name(self): tpu_cfg = tpu_config.TPUConfig(tpu_job_name='my_custom_job') run_config = tpu_config.RunConfig(tpu_config=tpu_cfg) ctx = self._create_ctx(run_config) self.assertEqual('my_custom_job', ctx.master_job) def test_evaluation_master(self): run_config = tpu_config.RunConfig(master='grpc://10.2.3.4:8470') self.assertEqual(run_config.master, run_config.evaluation_master) run_config = tpu_config.RunConfig( master='grpc://10.2.3.4:8470', evaluation_master='grpc://1.1.1.1:8470') self.assertEqual('grpc://1.1.1.1:8470', run_config.evaluation_master) def test_input_partition_config(self): with self.assertRaisesRegex(ValueError, 'input_partition_dims is.* PER_HOST_V2 mode.'): tpu_config.TPUConfig( num_shards=1, input_partition_dims=[[1, 2, 1, 1], None]) with self.assertRaisesRegex(ValueError, '.*requires setting num_cores_per_replica.'): tpu_config.TPUConfig( num_shards=1, per_host_input_for_training=tpu_config.InputPipelineConfig .PER_HOST_V2, input_partition_dims=[[1, 2, 1, 1], None]) with self.assertRaisesRegex(ValueError, '.*with one or two elements.'): tpu_config.TPUConfig( num_shards=1, per_host_input_for_training=tpu_config.InputPipelineConfig .PER_HOST_V2, input_partition_dims=[[1, 2, 1, 1], None, None]) tpu_config.TPUConfig( num_shards=1, num_cores_per_replica=2, per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V2, input_partition_dims=[[1, 2, 1, 1], None]) class TPUEstimatorInputPartitionValidationTest(test.TestCase): def _train(self, iterations_per_loop, image_height=224, image_width=224, steps=None, num_shards=None, num_cores_per_replica=None, input_partition_dims=None): """Trains the model with InputPartition config.""" def input_fn(params): batch_size = params['batch_size'] x = np.random.normal( size=[batch_size, image_height, image_width, 3]).astype(np.float32) return dummy_input_fn_with_dataset(batch_size, repeat=True, x=x) run_config = create_run_config( iterations_per_loop=iterations_per_loop, num_shards=num_shards, num_cores_per_replica=num_cores_per_replica, input_partition_dims=input_partition_dims, per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V2) est = tpu_estimator.TPUEstimator( model_fn=get_model_fn(), config=run_config, train_batch_size=128 * num_shards, eval_batch_size=128 * num_shards) est.train(input_fn, steps=steps, max_steps=None) def test_train_with_non_positive_dims(self): with self.assertRaisesRegex(ValueError, 'All input partition dims must be >= 1.'): self._train( iterations_per_loop=2, image_height=321, image_width=224, steps=2, num_shards=1, num_cores_per_replica=2, input_partition_dims=[{ 'x': [1, 2, 0, 1] }, None]) def test_train_with_unmatched_partition_dims(self): with self.assertRaisesRegex( ValueError, 'The product of each input partition dim should ' 'equal to num_cores_per_replica.*'): self._train( iterations_per_loop=2, image_height=320, image_width=224, steps=2, num_shards=1, num_cores_per_replica=2, input_partition_dims=[{ 'x': [1, 2, 2, 1] }, None]) def test_train_with_shape_unmatched_partition_dims(self): with self.assertRaisesRegex(ValueError, 'Input partition dims must have the same .*'): self._train( iterations_per_loop=2, image_height=320, image_width=224, steps=2, num_shards=1, num_cores_per_replica=2, input_partition_dims=[{ 'x': [1, 2, 1] }, None]) def test_train_with_unmatched_feature_keys(self): with self.assertRaisesRegex( ValueError, r'TPUConfig.input_partition_dims\[0\]' ' mismatched feature .*'): self._train( iterations_per_loop=2, image_height=320, image_width=224, steps=2, num_shards=1, num_cores_per_replica=2, input_partition_dims=[{ 'wrong_key': [1, 2, 1] }, None]) def test_train_with_unmatched_label_keys(self): with self.assertRaisesRegex( ValueError, r'TPUConfig.input_partition_dims\[1\]' ' mismatched label .*'): self._train( iterations_per_loop=2, image_height=320, image_width=224, steps=2, num_shards=1, num_cores_per_replica=2, input_partition_dims=[{ 'x': [1, 2, 1, 1] }, { 'wrong_key': None }]) def test_train_uneven_partitions_successful(self): # image_height=321, partitioned to 2 tensors with heights 161 and 160. self._train( iterations_per_loop=2, image_height=321, image_width=224, steps=2, num_shards=1, num_cores_per_replica=2, input_partition_dims=[{ 'x': [1, 2, 1, 1] }, None]) def test_uneven_partitions_computation(self): image_height, image_width = 321, 224 def _predict_input_fn(params): batch_size = params['batch_size'] x = np.random.normal( size=[batch_size, image_height, image_width, 3]).astype(np.float32) return dummy_input_fn_with_dataset(batch_size, repeat=False, x=x) def _model_fn(features, labels, mode, params): del params, labels if mode == _PREDICT: conv_output = layers.conv2d(features['x'], filters=1, kernel_size=3) return tpu_estimator.TPUEstimatorSpec( mode=mode, predictions={'predictions': conv_output}) run_config = create_run_config( iterations_per_loop=2, num_shards=1, num_cores_per_replica=2, input_partition_dims=[{ 'x': [1, 2, 1, 1] }], per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V2) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=128, predict_batch_size=1) res = list(est.predict(_predict_input_fn)) self.assertEqual(len(res), 1) self.assertEqual(res[0]['predictions'].shape, (319, 222, 1)) def _test_input_partitions_with_nested_label(self, input_partition_dims): image_height, image_width = 224, 224 def _dummy_input_fn_with_dataset(dataset_size, repeat=True, x=None, batch_size=None): if batch_size is None: batch_size = dataset_size if x is None: x = np.random.normal(size=[dataset_size, 1]).astype(np.float32) labels = [[2.0]] * dataset_size dataset1 = dataset_lib.Dataset.from_tensor_slices(x) dataset2 = dataset_lib.Dataset.from_tensor_slices(labels) dataset = dataset_lib.Dataset.zip((dataset1, dataset2)) if repeat: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) def _map(x, y): return {'x': x}, {'label_1': {'label_2': y, 'label_3': y}, 'label_4': y} return dataset.map(_map) def _input_fn(params): batch_size = params['batch_size'] x = np.random.normal( size=[batch_size, image_height, image_width, 3]).astype(np.float32) return _dummy_input_fn_with_dataset(batch_size, repeat=True, x=x) def _model_fn(features, labels, mode, params): del params predictions = dense_computation(features) loss = losses.mean_squared_error(labels['label_1']['label_3'], predictions) optimizer = tf.compat.v1.tpu.CrossShardOptimizer( training.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, training.get_global_step()) return tpu_estimator.TPUEstimatorSpec(mode, loss=loss, train_op=train_op) run_config = create_run_config( iterations_per_loop=2, num_shards=1, num_cores_per_replica=2, input_partition_dims=input_partition_dims, per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V2) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, config=run_config, train_batch_size=128, predict_batch_size=1) est.train(_input_fn, steps=4, max_steps=None) def test_fully_specified_input_partitions_with_with_nested_label(self): self._test_input_partitions_with_nested_label([{'x': [1, 2, 1, 1]}, None]) def test_partial_specified_input_partitions_with_nested_label(self): self._test_input_partitions_with_nested_label([{ 'x': [1, 2, 1, 1] }, { 'label_1': { 'label_2': None, 'label_3': None }, 'label_4': None }]) def test_incorrect_input_partitions_with_nested_label(self): with self.assertRaisesRegex( ValueError, r'TPUConfig.input_partition_dims\[1\]' ' mismatched the structure of labels. .*'): self._test_input_partitions_with_nested_label([{ 'x': [1, 2, 1, 1] }, { 'label_1': None, 'label_4': None }]) class TPUEstimatorInputPipelinePlacementTest(test.TestCase): def _test_placement(self, per_host): num_cores = 32 batch_sizes = [] global_batch_size = 1024 host_id_matcher = re.compile(r'^input_pipeline_task(\d+)/(.*)$') host_to_device = collections.defaultdict(list) def _input_fn(params): batch_sizes.append(params['batch_size']) return dummy_input_fn(params['batch_size']) def _model_fn(features, labels, mode, params): # Examine the input pipeline placement. operations = ops.get_default_graph().get_operations() for op in operations: result = host_id_matcher.match(op.name) if result is None: continue # There is one op to read iterations_per_loop var (the Send node of # tf.identity). It is colocated with global step. So, ignore here. if result.group(2) == 'Identity/ReadVariableOp': continue host_id = int(result.group(1)) host_to_device[host_id].append(op.device) return get_model_fn()(features, labels, mode, params) run_config = tpu_config.RunConfig( master='fake://123', tpu_config=tpu_config.TPUConfig( num_shards=num_cores, per_host_input_for_training=per_host)) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, train_batch_size=global_batch_size, config=run_config) old_value = tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = True try: est.train(input_fn=_input_fn, steps=1) self.fail('The train should not finish.') except errors.NotFoundError: # Expected. The TF sesion master is not valid. pass tpu_estimator._WRAP_INPUT_FN_INTO_WHILE_LOOP = old_value expected_num_hosts = num_cores // 8 if per_host: self.assertEqual(len(batch_sizes), expected_num_hosts) self.assertEqual(batch_sizes[0], global_batch_size // expected_num_hosts) else: self.assertEqual(len(batch_sizes), num_cores) self.assertEqual(batch_sizes[0], global_batch_size // num_cores) self.assertEqual(expected_num_hosts, len(list(host_to_device.keys()))) for host_id in range(expected_num_hosts): # On each host, all ops should be placed on the same device device_set = set(host_to_device[host_id]) self.assertEqual(1, len(device_set)) self.assertEqual('/job:tpu_worker/task:{}/device:CPU:0'.format(host_id), host_to_device[host_id][0]) def _query_system(self, master_address, cluster_def, query_topology): del master_address, cluster_def, query_topology return tpu_system_metadata_lib.TPUSystemMetadata( num_cores=32, num_hosts=4, num_of_cores_per_host=8, topology=None, devices=[]) def test_per_host_placement(self): with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): self._test_placement(True) def test_per_core_placement(self): with test.mock.patch.object( tpu_system_metadata_lib, '_query_tpu_system_metadata', side_effect=self._query_system): self._test_placement(False) class TPUEstimatorScaffoldTest(test.TestCase): def _get_scaffold_fn(self, mode): def _scaffold_fn_on_cpu(): scaffold = training.Scaffold() finalize_fn = scaffold.finalize def _finalize(): self.assertNotIn(mode, self.is_finalize_fn_called) self.is_finalize_fn_called[mode] = True return finalize_fn() scaffold.finalize = _finalize return scaffold return _scaffold_fn_on_cpu def _input_fn(self, params): return dummy_input_fn(params['batch_size']) def _predict_input_fn(self, params): return dummy_input_fn_with_dataset( dataset_size=params['batch_size'], repeat=False) def _model_fn(self, features, labels, mode, config, params): """Creates a head returning `TPUEstimatorSpec` based on mode.""" predictions = layers.dense( features['x'], 1, kernel_initializer=init_ops.zeros_initializer()) eval_metrics = None train_op = None loss = None if mode != _PREDICT: loss = losses.mean_squared_error(labels, predictions) if mode == _TRAIN: optimizer = training.GradientDescentOptimizer(learning_rate=0.5) if params['use_tpu']: optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize( loss, global_step=training.get_global_step()) elif mode == _EVAL: def _metric_fn_on_cpu(labels, predictions): return { 'mse': metrics_lib.mean_absolute_error(labels, predictions), } eval_metrics = (_metric_fn_on_cpu, [labels, predictions]) return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss, predictions={'x': predictions}, scaffold_fn=self._get_scaffold_fn(mode), eval_metrics=eval_metrics) def test_train(self): for use_tpu in [True, False]: self.is_finalize_fn_called = {} est = tpu_estimator.TPUEstimator( model_fn=self._model_fn, train_batch_size=8, config=create_run_config(iterations_per_loop=4), use_tpu=use_tpu) est.train(input_fn=self._input_fn, steps=1) self.assertTrue(self.is_finalize_fn_called[_TRAIN]) def test_eval(self): for use_tpu in [True, False]: self.is_finalize_fn_called = {} est = tpu_estimator.TPUEstimator( model_fn=self._model_fn, train_batch_size=8, eval_batch_size=8, config=create_run_config(iterations_per_loop=4), use_tpu=use_tpu) # Generate checkpoint. est.train(input_fn=self._input_fn, steps=1) est.evaluate(input_fn=self._input_fn, steps=1) self.assertTrue(self.is_finalize_fn_called[_EVAL]) def test_predict(self): for use_tpu in [True, False]: self.is_finalize_fn_called = {} est = tpu_estimator.TPUEstimator( model_fn=self._model_fn, train_batch_size=8, predict_batch_size=8, config=create_run_config(iterations_per_loop=4), use_tpu=use_tpu) # Generate checkpoint. est.train(input_fn=self._input_fn, steps=1) list(est.predict(input_fn=self._predict_input_fn)) self.assertTrue(self.is_finalize_fn_called[_PREDICT]) def test_scaffold_fn_capture_tpu_tensor(self): def _model_fn(features, labels, mode, config, params): """Creates a head returning `TPUEstimatorSpec` based on mode.""" del config, params predictions = layers.dense( features['x'], 1, kernel_initializer=init_ops.zeros_initializer()) loss = losses.mean_squared_error(labels, predictions) optimizer = training.GradientDescentOptimizer(learning_rate=0.5) optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize( loss, global_step=training.get_global_step()) def scaffold_fn(): summary_lib.scalar('loss_', loss) return training.Scaffold() return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss, scaffold_fn=scaffold_fn) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, train_batch_size=8, config=create_run_config(iterations_per_loop=4)) with self.assertRaises(ValueError): est.train(input_fn=self._input_fn, steps=1) def test_scaffold_capture_tpu_tensor(self): def _model_fn(features, labels, mode, config, params): """Creates a head returning `TPUEstimatorSpec` based on mode.""" del config, params predictions = layers.dense( features['x'], 1, kernel_initializer=init_ops.zeros_initializer()) loss = losses.mean_squared_error(labels, predictions) optimizer = training.GradientDescentOptimizer(learning_rate=0.5) optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) train_op = optimizer.minimize( loss, global_step=training.get_global_step()) # Scaffold.finalize will "merge" all summaries, so we will be able # to detect invalid TPU tensor capture. summary_lib.scalar('loss_', loss) def scaffold_fn(): return training.Scaffold() return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss, scaffold_fn=scaffold_fn) est = tpu_estimator.TPUEstimator( model_fn=_model_fn, train_batch_size=8, config=create_run_config(iterations_per_loop=4)) with self.assertRaises(ValueError): est.train(input_fn=self._input_fn, steps=1) class TPUEstimatorScaffoldWithEMATest(test.TestCase): def _get_scaffold(self, ema): var_dict = ema.variables_to_restore() return training.Scaffold(saver=training.Saver(var_dict)) def _input_fn(self, params): return dummy_input_fn(params['batch_size']) def _model_fn(self, features, labels, mode, config, params): """Creates a head returning `TPUEstimatorSpec` based on mode.""" with variable_scope.variable_scope('foo'): predictions = layers.dense( features['x'], 1, kernel_initializer=init_ops.zeros_initializer()) eval_metrics = None train_op = None loss = losses.mean_squared_error(labels, predictions) ema = moving_averages.ExponentialMovingAverage(decay=0.999) if mode == _TRAIN: optimizer = training.GradientDescentOptimizer(learning_rate=0.5) optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) opt_op = optimizer.minimize(loss, global_step=training.get_global_step()) with ops.control_dependencies([opt_op]): train_op = ema.apply() elif mode == _EVAL: def _metric_fn_on_cpu(labels, predictions): return { 'mse': metrics_lib.mean_absolute_error(labels, predictions), } eval_metrics = (_metric_fn_on_cpu, [labels, predictions]) # Change the saver for non-training mode. scaffold_fn = None if mode == _TRAIN else (lambda: self._get_scaffold(ema)) return tpu_estimator.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss, predictions=predictions, scaffold_fn=scaffold_fn, eval_metrics=eval_metrics) def test_ema_with_train_and_evaluate(self): use_tpu = True est = tpu_estimator.TPUEstimator( model_fn=self._model_fn, train_batch_size=8, eval_batch_size=8, config=create_run_config(iterations_per_loop=1), use_tpu=use_tpu) # With iterations_per_loop=1 and train steps = 2, the after_run in the hook # will be invoked once to change the bias value. Make the bias variable # super large here to avoid flaky. rewrite_var_hook = _RewriteVarHook( scope_name='foo', variable_name='dense/bias', value=[100]) est.train(input_fn=self._input_fn, steps=2, hooks=[rewrite_var_hook]) bias_value = est.get_variable_value('foo/dense/bias') bias_ma_value = est.get_variable_value( 'foo/dense/bias/ExponentialMovingAverage') self.assertNotAllClose(bias_value, bias_ma_value) model_variable_value_hook = ( _ModelVariableValueHook(scope_name='foo', variable_name='dense/bias')) est.evaluate( input_fn=self._input_fn, steps=1, hooks=[model_variable_value_hook]) bias_value_during_eval = model_variable_value_hook.got_value self.assertAlmostEqual(bias_ma_value, bias_value_during_eval) class _ModelVariableValueHook(session_run_hook.SessionRunHook): """Capture the value of given variable after initialization.""" def __init__(self, scope_name, variable_name): """Constructs the run hook.""" self.scope_name = scope_name self.variable_name = variable_name self.got_value = None def after_create_session(self, sess, coord): del coord with variable_scope.variable_scope(self.scope_name, reuse=True): self.got_value = sess.run(variable_scope.get_variable(self.variable_name)) class _RewriteVarHook(session_run_hook.SessionRunHook): """Rwrite the variable value hook.""" def __init__(self, scope_name, variable_name, value): """Constructs the run hook.""" self.scope_name = scope_name self.variable_name = variable_name self.value = value def begin(self): with variable_scope.variable_scope(self.scope_name, reuse=True): self._var = variable_scope.get_variable(self.variable_name) def after_run(self, run_context, run_values): self._var.load(self.value, session=run_context.session) class TPUEstimatorHostCallTest(test.TestCase): def _input_fn(self, params): return dummy_input_fn(params['batch_size']) def _host_call(self, model_dir, mode): def fn(global_step, labels, predictions): global_step = math_ops.cast(global_step[0], dtypes.int64) # We add a filename suffix here to avoid clashing with existing summary # creation in Estimator. Otherwise both may attempt to open the same # filename. # # The name of the op is set to model_dir to avoid ResourceManager caching # the same summary writer instance across tests. # # In addition, we give different suffixes for train and eval to avoid # FileWriter in evaluate() overwrites the events dumped by training. # This is because the event file path has timestamps at second accuracy # but the CPU training could be super fast. with tf.summary.create_file_writer( model_dir, filename_suffix='.TPUEstimator-{}'.format(1 if mode == model_fn_lib .ModeKeys.TRAIN else 2), name=os.path.basename(model_dir)).as_default(): with summary_ops_v2.record_summaries_every_n_global_steps( 5 if mode == model_fn_lib.ModeKeys.TRAIN else 1, global_step=global_step): loss = losses.mean_squared_error(labels, predictions) summary_ops_v2.scalar('host_call_test', loss, step=global_step) summary_ops_v2.scalar( 'host_call_global_step', global_step, step=global_step) return tf.compat.v1.summary.all_v2_summary_ops() return fn def _metric_fn_on_cpu(self, labels, predictions): return { 'mse': metrics_lib.mean_absolute_error(labels, predictions), } def _model_fn(self, model_dir): def fn(features, labels, mode, params): del params train_op = None predictions = dense_computation(features) loss = losses.mean_squared_error(labels, predictions) if mode == _TRAIN: optimizer = tf.compat.v1.tpu.CrossShardOptimizer( training.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, training.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode, loss=loss, train_op=train_op, predictions=predictions, eval_metrics=(self._metric_fn_on_cpu, [labels, predictions]), host_call=(self._host_call(model_dir, mode), [ array_ops.reshape( math_ops.cast(training.get_global_step(), dtypes.int32), [1]), labels, predictions ])) return fn def _events_from_logdir(self, logdir): files = gfile.ListDirectory(logdir) events = [] found = False for f in sorted(files): # Note that we need to distinguish between the TPUEstimator events file # and the SummarySaverHook one. if '.tfevents.' in f and '.TPUEstimator' in f: found = True f = os.path.join(logdir, f) events.extend(events_from_file(f)) self.assertEqual(True, found) return events def _test_summaries(self, use_tpu, output_every_n_steps=False): outfeed_every_n_steps = 2 if output_every_n_steps else 1 model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) run_config = tpu_config.RunConfig( master='', model_dir=model_dir, tpu_config=tpu_config.TPUConfig( iterations_per_loop=21, num_shards=FLAGS.test_num_shards, experimental_host_call_every_n_steps=outfeed_every_n_steps, )) est = tpu_estimator.TPUEstimator( model_fn=self._model_fn(model_dir), train_batch_size=8, eval_batch_size=8, config=run_config, use_tpu=use_tpu) est.train(input_fn=self._input_fn, steps=42) events = self._events_from_logdir(model_dir) events = [e for e in events if e.WhichOneof('what') != 'file_version'] if not output_every_n_steps or not use_tpu: self.assertEqual(18, len(events)) self.assertEqual( 9, len([e for e in events if e.summary.value[0].tag == 'host_call_test'])) self.assertEqual([value*5 for value in range(9)], [ e.summary.value[0].simple_value for e in events if e.summary.value[0].tag == 'host_call_global_step']) else: self.assertEqual(10, len(events)) self.assertEqual( 5, len([e for e in events if e.summary.value[0].tag == 'host_call_test'])) self.assertEqual([0, 10, 20, 25, 35], [ e.summary.value[0].simple_value for e in events if e.summary.value[0].tag == 'host_call_global_step']) est.evaluate(input_fn=self._input_fn, steps=7) events = self._events_from_logdir(model_dir) events = [e for e in events if e.WhichOneof('what') != 'file_version'] if not output_every_n_steps or not use_tpu: self.assertEqual(32, len(events)) # 18 from train + 14 from eval self.assertEqual( 16, # 9 from train + 7 from eval len([e for e in events if e.summary.value[0].tag == 'host_call_test'])) self.assertEqual( [value*5 for value in range(9)] + [42] * 7, [e.summary.value[0].simple_value for e in events if e.summary.value[0].tag == 'host_call_global_step']) else: self.assertEqual(24, len(events)) self.assertEqual( 12, len([e for e in events if e.summary.value[0].tag == 'host_call_test'])) self.assertEqual( [0, 10, 20, 25, 35] + [42] * 7, [e.summary.value[0].simple_value for e in events if e.summary.value[0].tag == 'host_call_global_step']) def test_summaries(self): self._test_summaries(True) def test_summaries_on_cpu(self): self._test_summaries(False) def test_summaries_every_n_steps(self): self._test_summaries(True, True) def test_summaries_on_cpu_every_n_steps(self): self._test_summaries(False, True) def test_keras_tensorflow_op_layer(self): def model_fn(features, labels, mode, params): del features, labels, params i1 = tf_keras.Input(10) i2 = tf_keras.Input(10) out = tf.concat([i1, i2], axis=1) out = tf_keras.layers.Dense(1)(out) model = tf_keras.Model([i1, i2], out) x = [tf.ones((5, 10)), tf.ones((5, 10))] y = model(x) loss = tf.reduce_mean(y) if mode == _TRAIN: optimizer = tf.compat.v1.tpu.CrossShardOptimizer( training.GradientDescentOptimizer(learning_rate=0.5)) train_op = optimizer.minimize(loss, training.get_global_step()) return tpu_estimator.TPUEstimatorSpec( mode, loss=loss, train_op=train_op) model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) run_config = tpu_config.RunConfig( master='', model_dir=model_dir, tpu_config=tpu_config.TPUConfig( iterations_per_loop=1, num_shards=FLAGS.test_num_shards, )) est = tpu_estimator.TPUEstimator( model_fn=model_fn, train_batch_size=8, eval_batch_size=8, config=run_config) est.train(input_fn=self._input_fn, steps=42) if __name__ == '__main__': tf.compat.v1.disable_v2_behavior() test.main() ================================================ FILE: tensorflow_estimator/python/estimator/tpu/util.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== """Utilities for the functionalities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import re import time import numpy as np import six import tensorflow as tf _ITERATIONS_PER_LOOP_VALUE_REGEX = re.compile( r'^(?P[1-9]\d*)((?P[s|m|h])$|$)') IterationsPerLoopCounter = collections.namedtuple('IterationsPerLoopCounter', ['value', 'unit']) def check_positive_integer(value, name): """Checks whether `value` is a positive integer.""" if not isinstance(value, (six.integer_types, np.integer)): raise TypeError('{} must be int, got {}'.format(name, type(value))) if value <= 0: raise ValueError('{} must be positive, got {}'.format(name, value)) def parse_iterations_per_loop(iterations_per_loop): """Parses the `iterations_per_loop` value. The parser expects the value of the `iterations_per_loop` value to be a positive integer value with unit:`count` or time-based value `` where is any positive integer and `s`, `m`, `h` are unit of time in seconds, minutes, hours respectively. Examples of valid values: `3600s`, `60m` , `1h`. Args: iterations_per_loop: Number of iterations or time alloted to spend on per device loop. Returns: A dictionary of `value` and `unit`. The `unit` value can be either a raw `count`, or time in `seconds`. { "value": , "unit": } """ m = _ITERATIONS_PER_LOOP_VALUE_REGEX.match(str(iterations_per_loop)) if m is None: raise ValueError( 'Invalid TPUConfig `iterations_per_loop` value. Value must be positive ' 'integer value or time-based value `` where is any' 'positive integer and `s`, `m`, `h` are unit of time in seconds, ' 'minutes, hours respectively. Examples of valid values: `3600s`, `60m`,' ' `1h`.') unit_value = 'seconds' if m.group('suffix') in ['h', 'm', 's'] else 'count' value = int(m.group('value')) if m.group('suffix') == 'm': value *= 60 elif m.group('suffix') == 'h': value *= 3600 return IterationsPerLoopCounter(value, unit_value) # TODO(b/118302029) Remove this copy of MultiHostDatasetInitializerHook after we # release a tensorflow_estimator with MultiHostDatasetInitializerHook in # python/estimator/util.py. class MultiHostDatasetInitializerHook(tf.compat.v1.train.SessionRunHook): """Creates a SessionRunHook that initializes all passed iterators.""" def __init__(self, dataset_initializers): self._initializers = dataset_initializers def after_create_session(self, session, coord): del coord start = time.time() session.run(self._initializers) tf.compat.v1.logging.info('Initialized dataset iterators in %d seconds', time.time() - start) ================================================ FILE: tensorflow_estimator/python/estimator/training.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Classes and functions related to train_and_evaluate.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import json import os import time import six import tensorflow as tf from tensorflow.python.distribute import estimator_training as distribute_coordinator_training from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import server_lib from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import exporter as exporter_lib from tensorflow_estimator.python.estimator import run_config as run_config_lib from tensorflow_estimator.python.estimator.estimator_export import estimator_export _MAX_DELAY_SECS = 60 _DELAY_SECS_PER_WORKER = 5 _TF_CONFIG_ENV = 'TF_CONFIG' _ENVIRONMENT_KEY = 'environment' _ENVIRONMENT_GOOGLE_VALUE = 'google' _TRAINER_JOBS = (run_config_lib.TaskType.CHIEF, run_config_lib.TaskType.MASTER, run_config_lib.TaskType.WORKER) def _validate_input_fn(input_fn): """Validates the `input_fn`.""" if not callable(input_fn): raise TypeError('`input_fn` must be callable, given: {}'.format(input_fn)) def _validate_hooks(hooks): """Validates the `hooks`.""" hooks = tuple(hooks or []) for hook in hooks: if not isinstance(hook, tf.compat.v1.train.SessionRunHook): raise TypeError( 'All hooks must be `SessionRunHook` instances, given: {}'.format( hook)) return hooks def _validate_saving_listeners(saving_listeners): """Validates the `saving_listeners`.""" saving_listeners = tuple(saving_listeners or []) for saving_listener in saving_listeners: if not isinstance(saving_listener, tf.compat.v1.train.CheckpointSaverListener): raise TypeError( 'All saving_listeners must be `CheckpointSaverListener` instances, ' 'given: {}'.format(saving_listener)) return saving_listeners def _validate_exporters(exporters): """Validates `exporters` and returns them as a tuple.""" if not exporters: return () if isinstance(exporters, exporter_lib.Exporter): exporters = [exporters] unique_names = [] # `Exporter`s should have unique names. try: for exporter in exporters: if not isinstance(exporter, exporter_lib.Exporter): # Error message will be printed out by the outer try/except. raise TypeError if not exporter.name: full_list_of_names = [e.name for e in exporters] raise ValueError('An Exporter cannot have a name that is `None` or' ' empty. All exporter names:' ' {}'.format(full_list_of_names)) if not isinstance(exporter.name, six.string_types): raise ValueError('An Exporter must have a string name. Given: ' '{}'.format(type(exporter.name))) if exporter.name in unique_names: full_list_of_names = [e.name for e in exporters] raise ValueError( '`exporters` must have unique names. Such a name cannot be `None`.' ' All exporter names: {}'.format(full_list_of_names)) unique_names.append(exporter.name) except TypeError: # Two possibilities: # - `exporters` is neither `Exporter` nor iterable. Python has # raised a `TypeError` when iterating over `exporters`. # - an `exporter` was None or not of type `Exporter`, so we raised a # `TypeError`. raise TypeError('`exporters` must be an Exporter,' ' an iterable of Exporter, or `None`,' ' found %s.' % exporters) return tuple(exporters) def _is_google_env(): """Detects whether current environment is google.""" tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV) or '{}') if not tf_config: tf.compat.v1.logging.warn( 'TF_CONFIG should not be empty in distributed environment.') return tf_config.get(_ENVIRONMENT_KEY) == _ENVIRONMENT_GOOGLE_VALUE @estimator_export('estimator.TrainSpec') class TrainSpec( collections.namedtuple( 'TrainSpec', ['input_fn', 'max_steps', 'hooks', 'saving_listeners'])): """Configuration for the "train" part for the `train_and_evaluate` call. `TrainSpec` determines the input data for the training, as well as the duration. Optional hooks run at various stages of training. Usage: >>> train_spec = tf.estimator.TrainSpec( ... input_fn=lambda: 1, ... max_steps=100, ... hooks=[_StopAtSecsHook(stop_after_secs=10)], ... saving_listeners=[_NewCheckpointListenerForEvaluate(None, 20, None)]) >>> train_spec.saving_listeners[0]._eval_throttle_secs 20 >>> train_spec.hooks[0]._stop_after_secs 10 >>> train_spec.max_steps 100 """ def __new__(cls, input_fn, max_steps=None, hooks=None, saving_listeners=None): """Creates a validated `TrainSpec` instance. Args: input_fn: A function that provides input data for training as minibatches. See [Premade Estimators]( https://tensorflow.org/guide/premade_estimators#create_input_functions) for more information. The function should construct and return one of the following: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. * A tuple (features, labels): Where features is a `Tensor` or a dictionary of string feature name to `Tensor` and labels is a `Tensor` or a dictionary of string label name to `Tensor`. max_steps: Int. Positive number of total steps for which to train model. If `None`, train forever. The training `input_fn` is not expected to generate `OutOfRangeError` or `StopIteration` exceptions. See the `train_and_evaluate` stop condition section for details. hooks: Iterable of `tf.train.SessionRunHook` objects to run on all workers (including chief) during training. saving_listeners: Iterable of `tf.estimator.CheckpointSaverListener` objects to run on chief during training. Returns: A validated `TrainSpec` object. Raises: ValueError: If any of the input arguments is invalid. TypeError: If any of the arguments is not of the expected type. """ # Validate input_fn. _validate_input_fn(input_fn) # Validate max_steps. if max_steps is not None and max_steps <= 0: raise ValueError( 'Must specify max_steps > 0, given: {}'.format(max_steps)) # Validate hooks. hooks = _validate_hooks(hooks) # Validate saving_listeners. saving_listeners = _validate_saving_listeners(saving_listeners) return super(TrainSpec, cls).__new__( cls, input_fn=input_fn, max_steps=max_steps, hooks=hooks, saving_listeners=saving_listeners) @estimator_export('estimator.EvalSpec') class EvalSpec( collections.namedtuple('EvalSpec', [ 'input_fn', 'steps', 'name', 'hooks', 'exporters', 'start_delay_secs', 'throttle_secs' ])): """Configuration for the "eval" part for the `train_and_evaluate` call. `EvalSpec` combines details of evaluation of the trained model as well as its export. Evaluation consists of computing metrics to judge the performance of the trained model. Export writes out the trained model on to external storage. """ def __new__(cls, input_fn, steps=100, name=None, hooks=None, exporters=None, start_delay_secs=120, throttle_secs=600): """Creates a validated `EvalSpec` instance. Args: input_fn: A function that constructs the input data for evaluation. See [Premade Estimators]( https://tensorflow.org/guide/premade_estimators#create_input_functions) for more information. The function should construct and return one of the following: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. * A tuple (features, labels): Where features is a `Tensor` or a dictionary of string feature name to `Tensor` and labels is a `Tensor` or a dictionary of string label name to `Tensor`. steps: Int. Positive number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. See `Estimator.evaluate` for details. name: String. Name of the evaluation if user needs to run multiple evaluations on different data sets. Metrics for different evaluations are saved in separate folders, and appear separately in tensorboard. hooks: Iterable of `tf.train.SessionRunHook` objects to run during evaluation. exporters: Iterable of `Exporter`s, or a single one, or `None`. `exporters` will be invoked after each evaluation. start_delay_secs: Int. Start evaluating after waiting for this many seconds. throttle_secs: Int. Do not re-evaluate unless the last evaluation was started at least this many seconds ago. Of course, evaluation does not occur if no new checkpoints are available, hence, this is the minimum. Returns: A validated `EvalSpec` object. Raises: ValueError: If any of the input arguments is invalid. TypeError: If any of the arguments is not of the expected type. """ # Validate input_fn. _validate_input_fn(input_fn) # Validate steps. if steps is not None and steps <= 0: raise ValueError('Must specify steps > 0, given: {}'.format(steps)) # Validate name. if name is not None and not isinstance(name, six.string_types): raise TypeError('`name` must be string, given: {}'.format(name)) # Validate hooks. hooks = _validate_hooks(hooks) # Validate exporters. exporters = _validate_exporters(exporters) # Validate start_delay_secs. if start_delay_secs < 0: raise ValueError('Must specify start_delay_secs >= 0, given: {}'.format( start_delay_secs)) # Validate throttle_secs. if throttle_secs < 0: raise ValueError( 'Must specify throttle_secs >= 0, given: {}'.format(throttle_secs)) return super(EvalSpec, cls).__new__( cls, input_fn=input_fn, steps=steps, name=name, hooks=hooks, exporters=exporters, start_delay_secs=start_delay_secs, throttle_secs=throttle_secs) @estimator_export('estimator.train_and_evaluate') def train_and_evaluate(estimator, train_spec, eval_spec): """Train and evaluate the `estimator`. This utility function trains, evaluates, and (optionally) exports the model by using the given `estimator`. All training related specification is held in `train_spec`, including training `input_fn` and training max steps, etc. All evaluation and export related specification is held in `eval_spec`, including evaluation `input_fn`, steps, etc. This utility function provides consistent behavior for both local (non-distributed) and distributed configurations. The default distribution configuration is parameter server-based between-graph replication. For other types of distribution configurations such as all-reduce training, please use [DistributionStrategies](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/distribute). Overfitting: In order to avoid overfitting, it is recommended to set up the training `input_fn` to shuffle the training data properly. Stop condition: In order to support both distributed and non-distributed configuration reliably, the only supported stop condition for model training is `train_spec.max_steps`. If `train_spec.max_steps` is `None`, the model is trained forever. *Use with care* if model stop condition is different. For example, assume that the model is expected to be trained with one epoch of training data, and the training `input_fn` is configured to throw `OutOfRangeError` after going through one epoch, which stops the `Estimator.train`. For a three-training-worker distributed configuration, each training worker is likely to go through the whole epoch independently. So, the model will be trained with three epochs of training data instead of one epoch. Example of local (non-distributed) training: ```python # Set up feature columns. categorial_feature_a = categorial_column_with_hash_bucket(...) categorial_feature_a_emb = embedding_column( categorical_column=categorial_feature_a, ...) ... # other feature columns estimator = DNNClassifier( feature_columns=[categorial_feature_a_emb, ...], hidden_units=[1024, 512, 256]) # Or set up the model directory # estimator = DNNClassifier( # config=tf.estimator.RunConfig( # model_dir='/my_model', save_summary_steps=100), # feature_columns=[categorial_feature_a_emb, ...], # hidden_units=[1024, 512, 256]) # Input pipeline for train and evaluate. def train_input_fn(): # returns x, y # please shuffle the data. pass def eval_input_fn(): # returns x, y pass train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=1000) eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) ``` Note that in current implementation `estimator.evaluate` will be called multiple times. This means that evaluation graph (including eval_input_fn) will be re-created for each `evaluate` call. `estimator.train` will be called only once. Example of distributed training: Regarding the example of distributed training, the code above can be used without a change (Please do make sure that the `RunConfig.model_dir` for all workers is set to the same directory, i.e., a shared file system all workers can read and write). The only extra work to do is setting the environment variable `TF_CONFIG` properly for each worker correspondingly. Also see [Distributed TensorFlow](https://www.tensorflow.org/deploy/distributed). Setting environment variable depends on the platform. For example, on Linux, it can be done as follows (`$` is the shell prompt): ``` $ TF_CONFIG='' python train_model.py ``` For the content in `TF_CONFIG`, assume that the training cluster spec looks like: ``` cluster = {"chief": ["host0:2222"], "worker": ["host1:2222", "host2:2222", "host3:2222"], "ps": ["host4:2222", "host5:2222"]} ``` Example of `TF_CONFIG` for chief training worker (must have one and only one): ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. TF_CONFIG='{ "cluster": { "chief": ["host0:2222"], "worker": ["host1:2222", "host2:2222", "host3:2222"], "ps": ["host4:2222", "host5:2222"] }, "task": {"type": "chief", "index": 0} }' ``` Note that the chief worker also does the model training job, similar to other non-chief training workers (see next paragraph). In addition to the model training, it manages some extra work, e.g., checkpoint saving and restoring, writing summaries, etc. Example of `TF_CONFIG` for non-chief training worker (optional, could be multiple): ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. TF_CONFIG='{ "cluster": { "chief": ["host0:2222"], "worker": ["host1:2222", "host2:2222", "host3:2222"], "ps": ["host4:2222", "host5:2222"] }, "task": {"type": "worker", "index": 0} }' ``` where the `task.index` should be set as 0, 1, 2, in this example, respectively for non-chief training workers. Example of `TF_CONFIG` for parameter server, aka ps (could be multiple): ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. TF_CONFIG='{ "cluster": { "chief": ["host0:2222"], "worker": ["host1:2222", "host2:2222", "host3:2222"], "ps": ["host4:2222", "host5:2222"] }, "task": {"type": "ps", "index": 0} }' ``` where the `task.index` should be set as 0 and 1, in this example, respectively for parameter servers. Example of `TF_CONFIG` for evaluator task. Evaluator is a special task that is not part of the training cluster. There could be only one. It is used for model evaluation. ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. TF_CONFIG='{ "cluster": { "chief": ["host0:2222"], "worker": ["host1:2222", "host2:2222", "host3:2222"], "ps": ["host4:2222", "host5:2222"] }, "task": {"type": "evaluator", "index": 0} }' ``` When `distribute` or `experimental_distribute.train_distribute` and `experimental_distribute.remote_cluster` is set, this method will start a client running on the current host which connects to the `remote_cluster` for training and evaluation. Args: estimator: An `Estimator` instance to train and evaluate. train_spec: A `TrainSpec` instance to specify the training specification. eval_spec: A `EvalSpec` instance to specify the evaluation and export specification. Returns: A tuple of the result of the `evaluate` call to the `Estimator` and the export results using the specified `Exporter`s. Currently, the return value is undefined for distributed training mode. Raises: ValueError: if environment variable `TF_CONFIG` is incorrectly set. """ _assert_eval_spec(eval_spec) # fail fast if eval_spec is invalid. estimator_lib._estimator_api_gauge.get_cell('train_and_evaluate').set(True) # pylint: disable=protected-access executor = _TrainingExecutor( estimator=estimator, train_spec=train_spec, eval_spec=eval_spec) config = estimator.config # If `distribute_coordinator_mode` is set and running in distributed # environment, we run `train_and_evaluate` via distribute coordinator. if distribute_coordinator_training.should_run_distribute_coordinator(config): tf.compat.v1.logging.info( 'Running `train_and_evaluate` with Distribute Coordinator.') distribute_coordinator_training.train_and_evaluate(estimator, train_spec, eval_spec, _TrainingExecutor) return if (config.task_type == run_config_lib.TaskType.EVALUATOR and config.task_id > 0): raise ValueError( 'For distributed training, there can only be one `evaluator` task ' '(with task id 0). Given task id {}'.format(config.task_id)) return executor.run() class _StopAtSecsHook(tf.compat.v1.train.SessionRunHook): """Stops given secs after begin is called.""" def __init__(self, stop_after_secs): self._stop_after_secs = stop_after_secs self._start_time = None def begin(self): self._start_time = time.time() def after_run(self, run_context, run_values): del run_values if time.time() - self._start_time >= self._stop_after_secs: run_context.request_stop() class _NewCheckpointListenerForEvaluate( tf.compat.v1.train.CheckpointSaverListener): """A saver listener to run evaluate with every checkpoint.""" def __init__(self, evaluator, eval_throttle_secs, continuous_eval_listener): self._evaluator = evaluator self._eval_throttle_secs = eval_throttle_secs self._continuous_eval_listener = continuous_eval_listener self.eval_result, self.export_results = None, None def begin(self): self._timer = basic_session_run_hooks.SecondOrStepTimer( every_secs=self._eval_throttle_secs) self._is_first_run = True def after_save(self, session, global_step_value): del session # unused; required by signature. # skip first run model is not trained yet. if self._is_first_run: self._is_first_run = False return if not self._continuous_eval_listener.before_eval(): tf.compat.v1.logging.info( 'Exiting training and evaluation loop, as requested by ' '_ContinuousEvalListener.before_eval.') return True if self._timer.should_trigger_for_step(global_step_value): self._evaluate(global_step_value) # updates self.eval_result if not self._continuous_eval_listener.after_eval(self.eval_result): tf.compat.v1.logging.info('Exiting evaluation, as requested by ' '_ContinuousEvalListener.after_eval.') return True else: # TODO(ispir): add remaining time in the log. tf.compat.v1.logging.info( 'Skip the current checkpoint eval due to throttle secs ' '({} secs).'.format(self._eval_throttle_secs)) def end(self, session, global_step_value): # Evaluate if the last step has not been evaluated, yet. if global_step_value != self._timer.last_triggered_step(): if self._continuous_eval_listener.before_eval(): self._evaluate(global_step_value) self._continuous_eval_listener.after_eval(self.eval_result) def _evaluate(self, global_step_value): self._timer.update_last_triggered_step(global_step_value) self.eval_result, self.export_results = ( self._evaluator.evaluate_and_export()) if self.eval_result.status != _EvalStatus.EVALUATED: # This is unexpected; should never happen. # Training should always end with a new checkpoint. raise RuntimeError('There was no new checkpoint after the training. ' 'Eval status: {}'.format(self.eval_result.status)) class _TrainingExecutor(object): """The executor to run `Estimator` training and evaluation. This implementation supports both distributed and non-distributed (aka local) training and evaluation based on the setting in `tf.estimator.RunConfig`. """ def __init__(self, estimator, train_spec, eval_spec, train_hooks=None, continuous_eval_listener=None): if not isinstance(estimator, (estimator_lib.Estimator, estimator_lib.EstimatorV2)): raise TypeError('`estimator` must have type `tf.estimator.Estimator`. ' 'Got: {}'.format(type(estimator))) self._estimator = estimator if not isinstance(train_spec, TrainSpec): raise TypeError('`train_spec` must have type `tf.estimator.TrainSpec`. ' 'Got: {}'.format(type(train_spec))) self._train_spec = train_spec if eval_spec and not isinstance(eval_spec, EvalSpec): raise TypeError('`eval_spec` must be either `None` or have type ' '`tf.estimator.EvalSpec`. Got: {}'.format( type(eval_spec))) self._eval_spec = eval_spec self._train_hooks = _validate_hooks(train_hooks) if (continuous_eval_listener and not isinstance(continuous_eval_listener, _ContinuousEvalListener)): raise TypeError('`continuous_eval_listener` must have type ' '`_ContinuousEvalListener`.') self._continuous_eval_listener = ( continuous_eval_listener or _ContinuousEvalListener()) @property def estimator(self): return self._estimator def run(self): """Executes the run_foo for task type `foo`. `_TrainingExecutor` predefines the procedure for task type 'chief', 'worker', 'ps', and 'evaluator'. For task type `foo`, the corresponding procedure is `run_foo'. This `run` method invoke the procedure base on the `RunConfig.task_type`. Returns: A tuple of the result of the `evaluate` call to the `Estimator` and the export results using the specified `ExportStrategy`. Currently undefined for distributed training mode. Raises: ValueError: if the estimator.config is mis-configured. """ config = self._estimator.config if (not config.cluster_spec and config.task_type != run_config_lib.TaskType.EVALUATOR): tf.compat.v1.logging.info( 'Running training and evaluation locally (non-distributed).') return self.run_local() # Distributed case. if not config.task_type: # TODO(xiejw): Improve the error message about how to set the TF_CONFIG # correctly. raise ValueError( '`estimator.config` must have task_type set. This usually means ' 'TF_CONFIG environment is not set correctly.') if config.task_type == 'local': raise ValueError( '`task.type` in TF_CONFIG cannot be `local`. Leaving `cluster` and ' '`task` properties in TF_CONFIG absent triggers train and evaluate ' '`Estimator` locally (non-distributed).') # For task type foo, call executor.run_foo. available_tasks = [ x for x in dir(self) if x.startswith('run_') and x != 'run_local' and callable(getattr(self, x)) ] task_to_run = 'run_' + config.task_type if task_to_run not in available_tasks: raise ValueError( 'Task type {} is not supported. Supported task types are {}'.format( config.task_type, [x[len('run_'):] for x in available_tasks])) getattr(self, task_to_run)() def run_chief(self): """Runs task chief.""" # TODO(xiejw): To allow execution framework to add train hooks. return self._start_distributed_training( saving_listeners=self._train_spec.saving_listeners) def run_worker(self): """Runs task (training) worker.""" # TODO(xiejw): To allow execution framework to add train hooks. return self._start_distributed_training() def run_master(self): """Runs task master.""" _assert_eval_spec(self._eval_spec) # Final export signal: For any eval result with global_step >= train # max_steps, the evaluator will send the final export signal. There is a # small chance that the Estimator.train stopping logic sees a different # global_step value (due to global step race condition and the fact the # saver sees a larger value for checkpoint saving), which does not end # the training. When the training ends, a new checkpoint is generated, which # triggers the listener again. So, it could be the case the final export is # triggered twice. # # But here, throttle_secs will skip the next intermediate checkpoint and, # so, the double final export chance is very small. evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, self._train_spec.max_steps) # When the underlying `Estimator` object saves a new checkpoint, we would # like this callback to be called so that evaluation and export can trigger. saving_listeners = self._train_spec.saving_listeners + tuple( [_NewCheckpointListenerForEvaluate(evaluator, self._eval_spec.throttle_secs, _ContinuousEvalListener())]) self._start_distributed_training(saving_listeners=saving_listeners) def run_evaluator(self): """Runs task evaluator.""" # TODO(xiejw): To allow execution framework to add continuous eval listener. return self._start_continuous_evaluation() def run_ps(self): """Runs task parameter server (in training cluster spec).""" config = self._estimator.config server = self._start_std_server(config) server.join() def run_local(self): """Runs training and evaluation locally (non-distributed).""" _assert_eval_spec(self._eval_spec) train_hooks = list(self._train_spec.hooks) + list(self._train_hooks) tf.compat.v1.logging.info( 'Start train and evaluate loop. The evaluate will happen ' 'after every checkpoint. Checkpoint frequency is determined ' 'based on RunConfig arguments: save_checkpoints_steps {} or ' 'save_checkpoints_secs {}.'.format( self._estimator.config.save_checkpoints_steps, self._estimator.config.save_checkpoints_secs)) evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, self._train_spec.max_steps) listener_for_eval = _NewCheckpointListenerForEvaluate( evaluator, self._eval_spec.throttle_secs, self._continuous_eval_listener) saving_listeners = self._train_spec.saving_listeners + (listener_for_eval,) self._estimator.train( input_fn=self._train_spec.input_fn, max_steps=self._train_spec.max_steps, hooks=train_hooks, saving_listeners=saving_listeners) eval_result = listener_for_eval.eval_result or _EvalResult( status=_EvalStatus.MISSING_CHECKPOINT) return eval_result.metrics, listener_for_eval.export_results def _start_std_server(self, config): """Creates, starts, and returns a server_lib.Server.""" if (not config.cluster_spec or not config.task_type or config.task_id is None): raise RuntimeError('Could not start server; be sure to specify ' 'cluster_spec, task_type, and task in ' 'RunConfig or set the TF_CONFIG environment variable.') if not config.master: jobs = config.cluster_spec.jobs if (len(jobs) == 1 and len(config.cluster_spec.job_tasks(jobs[0])) == 1 and config.task_type in _TRAINER_JOBS): # For distributed training, config.master is empty if and only if it has # a single node in the cluster spec. In this case, we should not start # the server. tf.compat.v1.logging.info( 'Skip starting Tensorflow server as there is only one ' 'node in the cluster.') return else: raise RuntimeError( 'Could not start server; be sure to specify master in ' 'RunConfig or set the TF_CONFIG environment variable.') tf.compat.v1.logging.info('Start Tensorflow server.') if config.session_config is None: session_config = tf.compat.v1.ConfigProto(log_device_placement=False) else: session_config = tf.compat.v1.ConfigProto( log_device_placement=False, gpu_options=config.session_config.gpu_options) server = server_lib.Server( config.cluster_spec, job_name=config.task_type, task_index=config.task_id, config=session_config, start=False, protocol=config.protocol) server.start() return server def _start_distributed_training(self, saving_listeners=None): """Calls `Estimator` train in a distributed setting.""" config = self._estimator.config # Start in-process TensorFlow server if needed. It's important to start the # server before we (optionally) sleep. Otherwise, the servers will wait to # connect to each other before starting to train. if not _is_google_env(): self._start_std_server(config) # Delay worker to start. For asynchronous training, this usually helps model # to converge faster. Chief starts the training immediately, so, worker # with task id x (0-based) should wait (x+1) * _DELAY_SECS_PER_WORKER. start_delay_secs = 0 if config.task_type == run_config_lib.TaskType.WORKER: # TODO(xiejw): Replace the hard code logic (task_id + 1) with unique id in # training cluster. max_delay_secs = _MAX_DELAY_SECS if config.experimental_max_worker_delay_secs is not None: max_delay_secs = int(config.experimental_max_worker_delay_secs) start_delay_secs = min(max_delay_secs, (config.task_id + 1) * _DELAY_SECS_PER_WORKER) if start_delay_secs > 0: tf.compat.v1.logging.info('Waiting %d secs before starting training.', start_delay_secs) time.sleep(start_delay_secs) self._estimator.train( input_fn=self._train_spec.input_fn, max_steps=self._train_spec.max_steps, hooks=list(self._train_spec.hooks) + list(self._train_hooks), saving_listeners=saving_listeners) def _start_continuous_evaluation(self): """Repeatedly calls `Estimator` evaluate and export until training ends.""" _assert_eval_spec(self._eval_spec) start_delay_secs = self._eval_spec.start_delay_secs if start_delay_secs: tf.compat.v1.logging.info('Waiting %f secs before starting eval.', start_delay_secs) time.sleep(start_delay_secs) latest_eval_result = None evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, self._train_spec.max_steps) should_early_stop = False while not should_early_stop: if (latest_eval_result and latest_eval_result.status == _EvalStatus.EVALUATED): global_step = latest_eval_result.metrics.get( tf.compat.v1.GraphKeys.GLOBAL_STEP) if (global_step and self._train_spec.max_steps and global_step >= self._train_spec.max_steps): tf.compat.v1.logging.info( 'Exiting evaluation, global_step=%s >= train max_steps=%s', global_step, self._train_spec.max_steps) return latest_eval_result, should_early_stop = self._execute_evaluator_once( evaluator, self._continuous_eval_listener, self._eval_spec.throttle_secs) def _execute_evaluator_once(self, evaluator, continuous_eval_listener, throttle_secs): """Executes the `evaluator`.""" _assert_eval_spec(self._eval_spec) start = time.time() eval_result = None should_early_stop = False if not continuous_eval_listener.before_eval(): tf.compat.v1.logging.info('Exiting evaluation, as requested by ' '_ContinuousEvalListener.before_eval.') should_early_stop = True return (eval_result, should_early_stop) # Final export signal: For any eval result with global_step >= train # max_steps, the evaluator will send the final export signal. The next # iteration of while loop will end the continuous eval as the stopping # condition is satisfied (both checks use the same global_step value, # i.e., no race condition) eval_result, _ = evaluator.evaluate_and_export() if not self._continuous_eval_listener.after_eval(eval_result): tf.compat.v1.logging.info('Exiting evaluation, as requested by ' '_ContinuousEvalListener.after_eval.') should_early_stop = True return (eval_result, should_early_stop) # Throttle if necessary. elapsed_time = time.time() - start difference = throttle_secs - elapsed_time if difference > 0: tf.compat.v1.logging.info( 'Waiting %f secs before starting next eval run.', difference ) time.sleep(difference) elif (throttle_secs == 0 and eval_result.status != _EvalStatus.EVALUATED): # Prints a user-actionable warning to avoid unnecessary load on evaluator. tf.compat.v1.logging.warning( 'EvalSpec.throttle_secs is set as 0. This might overload the job ' 'before finding (next) new checkpoint. Please consider to increase ' 'it.') return (eval_result, should_early_stop) class _Evaluator(object): """A helper class to call `Estimator.evaluate` and export model.""" def __init__(self, estimator, eval_spec, max_training_steps): self._estimator = estimator _assert_eval_spec(eval_spec) self._eval_spec = eval_spec self._is_final_export_triggered = False self._previous_ckpt_path = None self._last_warning_time = 0 self._max_training_steps = max_training_steps @property def is_final_export_triggered(self): return self._is_final_export_triggered def evaluate_and_export(self): """Evaluate and (maybe) export the current model. Returns: A tuple of `EvalResult` instance and the export results. Raises: RuntimeError: for any unexpected internal error. TypeError: if evaluation result has wrong type. """ latest_ckpt_path = self._estimator.latest_checkpoint() if not latest_ckpt_path: self._log_err_msg('Estimator is not trained yet. Will start an ' 'evaluation when a checkpoint is ready.') return _EvalResult(status=_EvalStatus.MISSING_CHECKPOINT), [] if latest_ckpt_path == self._previous_ckpt_path: self._log_err_msg( 'No new checkpoint ready for evaluation. Skip the current ' 'evaluation pass as evaluation results are expected to be same ' 'for the same checkpoint.') return _EvalResult(status=_EvalStatus.NO_NEW_CHECKPOINT), [] metrics = self._estimator.evaluate( input_fn=self._eval_spec.input_fn, steps=self._eval_spec.steps, name=self._eval_spec.name, checkpoint_path=latest_ckpt_path, hooks=self._eval_spec.hooks) # _EvalResult validates the metrics. eval_result = _EvalResult( status=_EvalStatus.EVALUATED, metrics=metrics, checkpoint_path=latest_ckpt_path) is_the_final_export = ( eval_result.metrics[tf.compat.v1.GraphKeys.GLOBAL_STEP] >= self._max_training_steps if self._max_training_steps else False) export_results = self._export_eval_result(eval_result, is_the_final_export) if is_the_final_export: tf.compat.v1.logging.debug( 'Calling exporter with the `is_the_final_export=True`.') self._is_final_export_triggered = True self._last_warning_time = 0 self._previous_ckpt_path = latest_ckpt_path return eval_result, export_results def _log_err_msg(self, message): """Prints warning `message` every 10 mins.""" current_time = time.time() if current_time - self._last_warning_time > 600: tf.compat.v1.logging.warning(message) self._last_warning_time = current_time def _export_eval_result(self, eval_result, is_the_final_export): """Export `eval_result` according to exporters in `EvalSpec`.""" export_dir_base = os.path.join( tf.compat.as_str_any(self._estimator.model_dir), tf.compat.as_str_any('export')) export_results = [] for exporter in self._eval_spec.exporters: export_results.append( exporter.export( estimator=self._estimator, export_path=os.path.join( tf.compat.as_str_any(export_dir_base), tf.compat.as_str_any(exporter.name)), checkpoint_path=eval_result.checkpoint_path, eval_result=eval_result.metrics, is_the_final_export=is_the_final_export)) return export_results class _EvalStatus(object): """The status of an evaluation event. For local training and evaluation, the status can only be `EVALUATED` as `Estimator.train` always generates a new checkpoint. For distributed training and evaluation, a separated evaluator keeps looking for new checkpoint. So, multiple situations might occur: - EVALUATED: A new checkpoint is found since last evaluation. `Estimator.evaluate` will be invoked. - MISSING_CHECKPOINT: No checkpoint can be found. Typically, this means the trainer has not yet produced any checkpoint. - NO_NEW_CHECKPOINT: No new checkpoint can be found since last evaluation. Typically, this means the trainer has not yet produced any new checkpoint. """ EVALUATED = 'evaluated' MISSING_CHECKPOINT = 'missing checkpoint' NO_NEW_CHECKPOINT = 'no new checkpoint' class _EvalResult( collections.namedtuple('EvalResult', ['status', 'metrics', 'checkpoint_path'])): """_EvalResult holds the result of an evaluation event.""" def __new__(cls, status, metrics=None, checkpoint_path=None): """Creates a validated `_EvalResult`. Args: status: See `_EvalStatus`. metrics: The evaluation results returned by `Estimator.evaluate`. Only set if status is `EVALUATED`. checkpoint_path: The corresponding checkpoint path for the `metrics`. Only set if status is `EVALUATED`. Returns: A validated `_EvalResult` object. Raises: ValueError: If validation fails. TypeError: If any of the arguments is not the expected type. """ if status != _EvalStatus.EVALUATED: if metrics: raise ValueError( 'metrics must be `None` if status is not {}; got status {},' ' metrics {}'.format(_EvalStatus.EVALUATED, status, metrics)) if checkpoint_path: raise ValueError( 'checkpoint must be `None` if status is not {}; got status {}, ' 'checkpoint_path {}'.format(_EvalStatus.EVALUATED, status, checkpoint_path)) return super(_EvalResult, cls).__new__(cls, status, metrics, checkpoint_path) # Now, evaluated case. assert status == _EvalStatus.EVALUATED # Validates metrics. if not metrics: raise ValueError( 'Internal error: `Estimator.evaluate` should never return empty ' 'metrics.') if not isinstance(metrics, dict): raise TypeError( '`Estimator.evaluate` should return dict. Given {}.'.format( type(metrics))) if tf.compat.v1.GraphKeys.GLOBAL_STEP not in metrics: raise ValueError( 'Internal error: `Estimator.evaluate` result should have ' '`global_step` in result. Given {}'.format(metrics)) # Validates checkpoint_path. if not checkpoint_path: raise ValueError( 'Internal error: `checkpoint_path` should never be empty.') return super(_EvalResult, cls).__new__(cls, status, metrics, checkpoint_path) class _ContinuousEvalListener(object): """Interface for listeners that take action before or after evaluation.""" def before_eval(self): """Called before evaluation. Returns: `False` if you want to skip the current evaluation and early stop the continuous evaluation; `True` otherwise. """ return True def after_eval(self, eval_result): """Called after the evaluation is executed. Args: eval_result: An `_EvalResult` instance. Returns: False if you want to early stop continuous evaluation; `True` otherwise. """ del eval_result return True def _assert_eval_spec(eval_spec): """Raise error if `eval_spec` is not of the right type.""" if not isinstance(eval_spec, EvalSpec): raise TypeError('`eval_spec` must have type `tf.estimator.EvalSpec`. ' 'Got: {}'.format(type(eval_spec))) ================================================ FILE: tensorflow_estimator/python/estimator/training_test.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for training.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import glob import json import os import random import shutil import tempfile import time import numpy as np import tensorflow as tf from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import server_lib from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import exporter as exporter_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator import run_config as run_config_lib from tensorflow_estimator.python.estimator import training from tensorflow_estimator.python.estimator.canned import dnn from tensorflow_estimator.python.estimator.canned import prediction_keys from tensorflow_estimator.python.estimator.export import export as export_lib _DEFAULT_EVAL_STEPS = 100 _DEFAULT_EVAL_DELAY_SECS = 120 _DEFAULT_EVAL_THROTTLE_SECS = 600 _DELAY_SECS_PER_WORKER = 5 _GLOBAL_STEP_KEY = tf.compat.v1.GraphKeys.GLOBAL_STEP _INVALID_INPUT_FN_MSG = '`input_fn` must be callable' _INVALID_HOOK_MSG = 'All hooks must be `SessionRunHook` instances' _INVALID_MAX_STEPS_MSG = 'Must specify max_steps > 0' _INVALID_STEPS_MSG = 'Must specify steps > 0' _INVALID_NAME_MSG = '`name` must be string' _INVALID_EVAL_DELAY_SECS_MSG = 'Must specify start_delay_secs >= 0' _INVALID_EVAL_THROTTLE_SECS_MSG = 'Must specify throttle_secs >= 0' _INVALID_ESTIMATOR_MSG = '`estimator` must have type `tf.estimator.Estimator`' _INVALID_SAVING_LISTENER_MSG = ( 'All saving_listeners must be `CheckpointSaverListener` instances') _STALE_CHECKPOINT_MSG = 'There was no new checkpoint after the training.' _INVALID_EXPORTER_MSG = '`exporters` must be an Exporter' _INVALID_EXPORTER_NAME_TYPE_MSG = 'An Exporter must have a string name' _DUPLICATE_EXPORTER_NAMES_MSG = '`exporters` must have unique names.' _NONE_EXPORTER_NAME_MSG = ( 'An Exporter cannot have a name that is `None` or empty.') _INVALID_TRAIN_SPEC_MSG = '`train_spec` must have type `tf.estimator.TrainSpec`' _INVALID_EVAL_SPEC_MSG = '`eval_spec` must have type `tf.estimator.EvalSpec`' _EVAL_SPEC_OR_NONE_MSG = ( '`eval_spec` must be either `None` or have type `tf.estimator.EvalSpec`') _INVALID_EVAL_LISTENER_MSG = 'must have type `_ContinuousEvalListener`' _INVALID_CONFIG_FOR_STD_SERVER_MSG = 'Could not start server; .*TF_CONFIG' _INVALID_LOCAL_TASK_WITH_CLUSTER = '`task.type` in TF_CONFIG cannot be `local`' _INVALID_TASK_TYPE = '`estimator.config` must have task_type set.' _INPROPER_THROTTL_SECS = ( 'EvalSpec.throttle_secs is set as 0.*Please consider to increase') # The message should NOT have 'local' word as part of it. As (?!word) is looking # ahead, so, the $ (ending) check is required; otherwise, it will match # partially and return successuful. _INVALID_TASK_TO_RUN = ( 'Task type .* is not supported. Supported task types are ((?!local).)*$') _INVALID_EMPTY_EVAL_RESULT_ERR = ( 'Internal error: `Estimator.evaluate` should never return empty metrics') _INVALID_EVAL_RESULT_TYPE_ERR = '`Estimator.evaluate` should return dict.' _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR = ( 'Internal error: `Estimator.evaluate` result should have `global_step`') _INVALID_EVAL_TASK_ID_ERR = ( 'there can only be one `evaluator` task .*with task id 0') _TF_CONFIG_FOR_CHIEF = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } _TF_CONFIG_FOR_MASTER = { 'cluster': { run_config_lib.TaskType.MASTER: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4'] }, 'task': { 'type': run_config_lib.TaskType.MASTER, 'index': 0 } } _TF_CONFIG_FOR_WORKER = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4'] }, 'task': { 'type': run_config_lib.TaskType.WORKER, 'index': 1 } } _TF_CONFIG_FOR_PS = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4'] }, 'task': { 'type': run_config_lib.TaskType.PS, 'index': 1 } } _TF_CONFIG_FOR_EVALUATOR = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.PS: ['host1:1', 'host2:2'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4'] }, 'task': { 'type': run_config_lib.TaskType.EVALUATOR, 'index': 0 } } _TF_CONFIG_FOR_GOOGLE = {'environment': 'google'} class _FakeHook(tf.compat.v1.train.SessionRunHook): """Fake implementation of `SessionRunHook`.""" class _InvalidHook(object): """Invalid hook (not a subclass of `SessionRunHook`).""" class _InvalidCheckpointSaverListener(object): """Invalid hook (not a subclass of `CheckpointSaverListener`).""" def _create_exporter(name): class FakeExporter(exporter_lib.Exporter): def __init__(self, name): self._name = name @property def name(self): return self._name def export(self, *args, **kwargs): del args, kwargs return FakeExporter(name=name) def _create_run_config_with_cluster_spec(tf_config): with tf.compat.v1.test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): return run_config_lib.RunConfig() class TrainSpecTest(tf.test.TestCase): """Tests TrainSpec.""" def testRequiredArgumentsSet(self): """Tests that no errors are raised when all required arguments are set.""" spec = training.TrainSpec(input_fn=lambda: 1) self.assertEqual(1, spec.input_fn()) self.assertIsNone(spec.max_steps) self.assertEqual(0, len(spec.hooks)) def testAllArgumentsSet(self): """Tests that no errors are raised when all arguments are set.""" hooks = [_FakeHook()] spec = training.TrainSpec(input_fn=lambda: 1, max_steps=2, hooks=hooks) self.assertEqual(1, spec.input_fn()) self.assertEqual(2, spec.max_steps) self.assertEqual(tuple(hooks), spec.hooks) def testInvalidInputFn(self): with self.assertRaisesRegexp(TypeError, _INVALID_INPUT_FN_MSG): training.TrainSpec(input_fn='invalid') def testInvalidMaxStep(self): with self.assertRaisesRegexp(ValueError, _INVALID_MAX_STEPS_MSG): training.TrainSpec(input_fn=lambda: 1, max_steps=0) def testInvalidHook(self): with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG): training.TrainSpec(input_fn=lambda: 1, hooks=[_InvalidHook()]) def testInvalidSavingListener(self): with self.assertRaisesRegexp(TypeError, _INVALID_SAVING_LISTENER_MSG): training.TrainSpec(input_fn=lambda: 1, saving_listeners=[_InvalidCheckpointSaverListener()]) class EvalSpecTest(tf.test.TestCase): """Tests EvalSpec.""" def testRequiredArgumentsSet(self): """Tests that no errors are raised when all required arguments are set.""" spec = training.EvalSpec(input_fn=lambda: 1) self.assertEqual(1, spec.input_fn()) self.assertEqual(_DEFAULT_EVAL_STEPS, spec.steps) self.assertIsNone(spec.name) self.assertEqual(0, len(spec.hooks)) self.assertEqual(0, len(spec.exporters)) self.assertEqual(_DEFAULT_EVAL_DELAY_SECS, spec.start_delay_secs) self.assertEqual(_DEFAULT_EVAL_THROTTLE_SECS, spec.throttle_secs) def testAllArgumentsSet(self): """Tests that no errors are raised when all arguments are set.""" hooks = [_FakeHook()] exporter = _create_exporter('a') spec = training.EvalSpec( input_fn=lambda: 1, steps=2, name='name', hooks=hooks, exporters=exporter, start_delay_secs=3, throttle_secs=4) self.assertEqual(1, spec.input_fn()) self.assertEqual(2, spec.steps) self.assertEqual('name', spec.name) self.assertEqual(tuple(hooks), spec.hooks) self.assertEqual((exporter,), spec.exporters) self.assertEqual(3, spec.start_delay_secs) self.assertEqual(4, spec.throttle_secs) def testListOfExporters(self): """Tests that no errors are raised with multiple exporters.""" exporters = [_create_exporter('a'), _create_exporter('b')] spec = training.EvalSpec(input_fn=lambda: 1, exporters=exporters) self.assertEqual(1, spec.input_fn()) self.assertEqual(tuple(exporters), spec.exporters) def testInvalidInputFn(self): with self.assertRaisesRegexp(TypeError, _INVALID_INPUT_FN_MSG): training.EvalSpec(input_fn='invalid') def testInvalidMaxStep(self): with self.assertRaisesRegexp(ValueError, _INVALID_STEPS_MSG): training.EvalSpec(input_fn=lambda: 1, steps=0) def testInvalidName(self): with self.assertRaisesRegexp(TypeError, _INVALID_NAME_MSG): training.EvalSpec(input_fn=lambda: 1, name=123) def testInvalidHook(self): with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG): training.EvalSpec(input_fn=lambda: 1, hooks=[_InvalidHook()]) def testInvalidDelaySecs(self): with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_DELAY_SECS_MSG): training.EvalSpec(input_fn=lambda: 1, start_delay_secs=-1) def testInvalidThrottleSecs(self): with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_THROTTLE_SECS_MSG): training.EvalSpec(input_fn=lambda: 1, throttle_secs=-1) def testInvalidTypeOfListOfExporters(self): with self.assertRaisesRegexp(TypeError, _INVALID_EXPORTER_MSG): training.EvalSpec( input_fn=lambda: 1, exporters=[_create_exporter('a'), _FakeHook()]) def testInvalidTypeOfIndividualExporter(self): with self.assertRaisesRegexp(TypeError, _INVALID_EXPORTER_MSG): training.EvalSpec(input_fn=lambda: 1, exporters=_FakeHook()) def testInvalidTypeOfExporterName(self): with self.assertRaisesRegexp(ValueError, _INVALID_EXPORTER_NAME_TYPE_MSG): training.EvalSpec( input_fn=lambda: 1, exporters=_create_exporter(name=123)) def testMultipleExportersWithTheSameName(self): with self.assertRaisesRegexp(ValueError, _DUPLICATE_EXPORTER_NAMES_MSG): training.EvalSpec( input_fn=lambda: 1, exporters=[_create_exporter('a'), _create_exporter('a')]) def testMultipleExportersAndOneWithoutAName(self): with self.assertRaisesRegexp(ValueError, _NONE_EXPORTER_NAME_MSG): training.EvalSpec( input_fn=lambda: 1, exporters=[_create_exporter('a'), _create_exporter(None)]) def testSingleExporterWithoutAName(self): with self.assertRaisesRegexp(ValueError, _NONE_EXPORTER_NAME_MSG): training.EvalSpec(input_fn=lambda: 1, exporters=_create_exporter(None)) class TrainAndEvaluateTest(tf.test.TestCase): def test_run_task(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) with tf.compat.v1.test.mock.patch.object( training, '_TrainingExecutor') as mock_executor: mock_executor_instance = tf.compat.v1.test.mock.Mock() mock_executor.return_value = mock_executor_instance training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec) mock_executor.assert_called_with( estimator=mock_est, train_spec=mock_train_spec, eval_spec=mock_eval_spec) self.assertTrue(mock_executor_instance.run.called) def test_error_out_if_evaluator_task_id_is_non_zero(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], }, 'task': { 'type': run_config_lib.TaskType.EVALUATOR, 'index': 1 } } mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = _create_run_config_with_cluster_spec(tf_config) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_TASK_ID_ERR): training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec) def test_invalid_estimator(self): invalid_estimator = object() mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) with self.assertRaisesRegexp(TypeError, _INVALID_ESTIMATOR_MSG): training.train_and_evaluate(invalid_estimator, mock_train_spec, mock_eval_spec) def test_fail_fast_if_invalid_eval_spec(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) invalid_eval_spec = object() with tf.compat.v1.test.mock.patch.object( training, '_TrainingExecutor') as mock_executor: with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG): training.train_and_evaluate(mock_est, mock_train_spec, invalid_eval_spec) mock_executor.assert_not_called() class TrainingExecutorConstructorTest(tf.test.TestCase): """Tests constructor of _TrainingExecutor.""" def test_required_arguments_set(self): estimator = estimator_lib.Estimator(model_fn=lambda features: features) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec(input_fn=lambda: 1) executor = training._TrainingExecutor(estimator, train_spec, eval_spec) self.assertEqual(estimator, executor.estimator) def test_invalid_estimator(self): invalid_estimator = object() train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec(input_fn=lambda: 1) with self.assertRaisesRegexp(TypeError, _INVALID_ESTIMATOR_MSG): training._TrainingExecutor(invalid_estimator, train_spec, eval_spec) def test_invalid_train_spec(self): estimator = estimator_lib.Estimator(model_fn=lambda features: features) invalid_train_spec = object() eval_spec = training.EvalSpec(input_fn=lambda: 1) with self.assertRaisesRegexp(TypeError, _INVALID_TRAIN_SPEC_MSG): training._TrainingExecutor(estimator, invalid_train_spec, eval_spec) def test_invalid_eval_spec(self): estimator = estimator_lib.Estimator(model_fn=lambda features: features) train_spec = training.TrainSpec(input_fn=lambda: 1) invalid_eval_spec = object() with self.assertRaisesRegexp(TypeError, _EVAL_SPEC_OR_NONE_MSG): training._TrainingExecutor(estimator, train_spec, invalid_eval_spec) def test_eval_spec_none(self): estimator = estimator_lib.Estimator(model_fn=lambda features: features) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = None # Tests that no error is raised. training._TrainingExecutor(estimator, train_spec, eval_spec) def test_invalid_train_hooks(self): estimator = estimator_lib.Estimator(model_fn=lambda features: features) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec(input_fn=lambda: 1) invalid_train_hooks = [object()] with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG): training._TrainingExecutor( estimator, train_spec, eval_spec, train_hooks=invalid_train_hooks) def test_invalid_continuous_eval_listener(self): estimator = estimator_lib.Estimator(model_fn=lambda features: features) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec(input_fn=lambda: 1) invalid_continuous_eval_listener = object() with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_LISTENER_MSG): training._TrainingExecutor( estimator, train_spec, eval_spec, continuous_eval_listener=invalid_continuous_eval_listener) class _TrainingExecutorTrainingTest(object): """Tests training of _TrainingExecutor.""" def __init__(self, run_config): self._run_config = run_config def _run_task(self, executor): # We should not call executor.run as the test here is intended to test # run_foo explicitly (foo is the task type). return getattr(executor, 'run_' + self._run_config.task_type)() @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_train_with_train_spec(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = self._run_config train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_server_instance = mock_server.return_value executor = training._TrainingExecutor(mock_est, train_spec, mock_eval_spec) self._run_task(executor) mock_server.assert_called_with( mock_est.config.cluster_spec, job_name=mock_est.config.task_type, task_index=mock_est.config.task_id, config=tf.compat.v1.test.mock.ANY, protocol=None, start=False) self.assertTrue(mock_server_instance.start.called) mock_est.train.assert_called_with( input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, hooks=list(train_spec.hooks), saving_listeners=tf.compat.v1.test.mock.ANY) mock_est.evaluate.assert_not_called() mock_est.export_saved_model.assert_not_called() @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_train_with_no_eval_spec(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = self._run_config train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) eval_spec = None mock_server_instance = mock_server.return_value executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) self._run_task(executor) mock_server.assert_called_with( mock_est.config.cluster_spec, job_name=mock_est.config.task_type, task_index=mock_est.config.task_id, config=tf.compat.v1.test.mock.ANY, protocol=None, start=False) self.assertTrue(mock_server_instance.start.called) mock_est.train.assert_called_with( input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, hooks=list(train_spec.hooks), saving_listeners=tf.compat.v1.test.mock.ANY) mock_est.evaluate.assert_not_called() mock_est.export_saved_model.assert_not_called() @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_train_with_train_hooks(self, unused_mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = self._run_config train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) extra_hooks = [_FakeHook()] executor = training._TrainingExecutor( mock_est, train_spec, mock_eval_spec, train_hooks=extra_hooks) self._run_task(executor) mock_est.train.assert_called_with( input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, hooks=list(train_spec.hooks) + extra_hooks, saving_listeners=tf.compat.v1.test.mock.ANY) @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = self._run_config mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, hooks=[]) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) tf_config = {'TF_CONFIG': json.dumps(_TF_CONFIG_FOR_GOOGLE)} with tf.compat.v1.test.mock.patch.dict('os.environ', tf_config): self._run_task(executor) mock_server.assert_not_called() def test_fail_with_empty_cluster_spec(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = None mock_est.config.master = 'grpc://...' mock_est.config.task_type = 'worker' mock_est.config.task_id = 2 with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): self._run_task( training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec)) def test_fail_with_empty_master(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec( {'worker': ['dummy', 'dummy1']}) mock_est.config.master = '' mock_est.config.task_type = 'worker' mock_est.config.task_id = 2 with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): self._run_task( training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec)) @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_single_worker_node_with_empty_tf_master(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, hooks=[]) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) # Single node cluster. mock_est.config.cluster_spec = tf.train.ClusterSpec({'worker': ['dummy']}) mock_est.config.master = '' mock_est.config.task_type = 'worker' mock_est.config.task_id = 2 self._run_task( training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec)) self.assertTrue(mock_est.train.called) mock_server.assert_not_called() def test_fail_with_empty_task_type(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({'worker': ['dummy']}) mock_est.config.master = 'grpc://...' mock_est.config.task_type = '' mock_est.config.task_id = 2 with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): self._run_task( training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec)) def test_fail_with_none_task_id(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({'worker': ['dummy']}) mock_est.config.master = 'grpc://...' mock_est.config.task_type = 'worker' mock_est.config.task_id = None with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): self._run_task( training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec)) class TrainingExecutorRunWorkerTest(_TrainingExecutorTrainingTest, tf.test.TestCase): """Tests run_worker of _TrainingExecutor.""" def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) _TrainingExecutorTrainingTest.__init__( self, run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_WORKER)) @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_delay_for_worker(self, _): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = self._run_config mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, hooks=[]) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) expected_secs = (self._run_config.task_id + 1) * _DELAY_SECS_PER_WORKER with tf.compat.v1.test.mock.patch.object(time, 'sleep') as mock_sleep: mock_sleep.side_effect = lambda s: self.assertEqual(expected_secs, s) self._run_task(executor) self.assertTrue(mock_sleep.called) @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_delay_disabled_for_worker(self, _): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = self._run_config.replace( experimental_max_worker_delay_secs=0) mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, hooks=[]) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) with tf.compat.v1.test.mock.patch.object(time, 'sleep') as mock_sleep: self._run_task(executor) self.assertFalse(mock_sleep.called) class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest, tf.test.TestCase): """Tests run_chief of _TrainingExecutor.""" def __init__(self, methodName='runTest'): # pylint: disable=invalid-name tf.test.TestCase.__init__(self, methodName) _TrainingExecutorTrainingTest.__init__( self, run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_CHIEF)) @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_no_delay_for_chief(self, _): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = self._run_config mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, hooks=[]) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) with tf.compat.v1.test.mock.patch.object(time, 'sleep') as mock_sleep: self._run_task(executor) mock_sleep.assert_not_called() class TrainingExecutorRunMasterTest(tf.test.TestCase): """Tests run_chief of _TrainingExecutor.""" def setUp(self): self._run_config = _create_run_config_with_cluster_spec( _TF_CONFIG_FOR_MASTER) @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_no_delay_for_master(self, _): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate = lambda *args, **kw: { tf.compat.v1.GraphKeys.GLOBAL_STEP: 123 } mock_est.config = self._run_config mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, max_steps=123, hooks=[]) mock_eval_spec = tf.compat.v1.test.mock.Mock( spec=training.EvalSpec, exporters=[]) mock_train_spec.saving_listeners = tuple([]) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) with tf.compat.v1.test.mock.patch.object(time, 'sleep') as mock_sleep: executor.run_master() mock_sleep.assert_not_called() @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_train_with_train_spec(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate = lambda *args, **kw: { tf.compat.v1.GraphKeys.GLOBAL_STEP: 123 } mock_est.config = self._run_config train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) mock_eval_spec = tf.compat.v1.test.mock.Mock( spec=training.EvalSpec, exporters=[]) mock_server_instance = mock_server.return_value executor = training._TrainingExecutor(mock_est, train_spec, mock_eval_spec) executor.run_master() mock_server.assert_called_with( mock_est.config.cluster_spec, job_name=mock_est.config.task_type, task_index=mock_est.config.task_id, config=tf.compat.v1.test.mock.ANY, protocol=None, start=False) self.assertTrue(mock_server_instance.start.called) mock_est.train.assert_called_with( input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, hooks=list(train_spec.hooks), saving_listeners=tf.compat.v1.test.mock.ANY) mock_est.export_saved_model.assert_not_called() @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_train_with_no_eval_spec_fails(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate = lambda *args, **kw: { tf.compat.v1.GraphKeys.GLOBAL_STEP: 123 } mock_est.config = self._run_config train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) eval_spec = None executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG): executor.run_master() @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_train_with_train_hooks(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate = lambda *args, **kw: { tf.compat.v1.GraphKeys.GLOBAL_STEP: 123 } mock_est.config = self._run_config train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) mock_eval_spec = tf.compat.v1.test.mock.Mock( spec=training.EvalSpec, exporters=[]) extra_hooks = [_FakeHook()] executor = training._TrainingExecutor( mock_est, train_spec, mock_eval_spec, train_hooks=extra_hooks) executor.run_master() mock_est.train.assert_called_with( input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, hooks=list(train_spec.hooks) + extra_hooks, saving_listeners=tf.compat.v1.test.mock.ANY) @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate = lambda *args, **kw: { tf.compat.v1.GraphKeys.GLOBAL_STEP: 123 } mock_est.config = self._run_config mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, max_steps=123, hooks=[]) mock_eval_spec = tf.compat.v1.test.mock.Mock( spec=training.EvalSpec, exporters=[]) mock_train_spec.saving_listeners = tuple([]) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) tf_config = {'TF_CONFIG': json.dumps(_TF_CONFIG_FOR_GOOGLE)} with tf.compat.v1.test.mock.patch.dict('os.environ', tf_config): executor.run_master() mock_server.assert_not_called() def test_fail_with_empty_cluster_spec(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = None mock_est.config.master = 'grpc://...' mock_est.config.task_type = 'master' mock_est.config.task_id = 2 mock_train_spec.saving_listeners = tuple([]) with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec).run_master() def test_fail_with_empty_master(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({ 'master': ['dummy'], 'worker': ['dummy1'] }) mock_est.config.master = '' mock_est.config.task_type = 'master' mock_est.config.task_id = 0 mock_train_spec.saving_listeners = tuple([]) with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec).run_master() @tf.compat.v1.test.mock.patch.object(time, 'sleep') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_single_master_node_with_empty_tf_master(self, mock_server, unused_mock_sleep): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate = lambda *args, **kw: { tf.compat.v1.GraphKeys.GLOBAL_STEP: 123 } mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, max_steps=123, hooks=[]) mock_eval_spec = tf.compat.v1.test.mock.Mock( spec=training.EvalSpec, exporters=[]) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({'master': ['dummy']}) mock_est.config.master = '' mock_est.config.task_type = 'master' mock_est.config.task_id = 0 mock_train_spec.saving_listeners = tuple([]) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) executor.run_master() mock_server.assert_not_called() self.assertTrue(mock_est.train.called) def test_fail_with_empty_task_type(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({'master': ['dummy']}) mock_est.config.master = 'grpc://...' mock_est.config.task_type = '' mock_est.config.task_id = 2 mock_train_spec.saving_listeners = tuple([]) with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec).run_master() def test_fail_with_none_task_id(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({'master': ['dummy']}) mock_est.config.master = 'grpc://...' mock_est.config.task_type = 'master' mock_est.config.task_id = None mock_train_spec.saving_listeners = tuple([]) with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec).run_master() @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_run_master_triggers_evaluate_and_export(self, _): def estimator_train(saving_listeners, *args, **kwargs): # There shalt be a saving_listener. Estimator is going to call # `after_save`. del args, kwargs saving_listeners[0].begin() saving_listeners[0].after_save(session=None, global_step_value=0) saving_listeners[0].after_save(session=None, global_step_value=10) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, model_dir='path/', train=estimator_train) mock_est.latest_checkpoint.return_value = 'checkpoint_path/' mock_est.config = self._run_config exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_whether_export_is_called' train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300) eval_spec = training.EvalSpec( input_fn=lambda: 1, steps=2, exporters=exporter) eval_result = {_GLOBAL_STEP_KEY: train_spec.max_steps} mock_est.evaluate.return_value = eval_result executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_master() mock_est.evaluate.assert_called_with( name=eval_spec.name, input_fn=eval_spec.input_fn, steps=eval_spec.steps, checkpoint_path='checkpoint_path/', hooks=eval_spec.hooks) self.assertEqual(1, exporter.export.call_count) exporter.export.assert_called_with( estimator=mock_est, export_path=os.path.join('path/', 'export', exporter.name), checkpoint_path='checkpoint_path/', eval_result=eval_result, is_the_final_export=True) @tf.compat.v1.test.mock.patch.object(basic_session_run_hooks, 'SecondOrStepTimer') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_run_master_throttle_eval(self, _, mock_timer_class): mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, model_dir='path/') mock_timer = tf.compat.v1.test.mock.Mock() mock_timer_class.return_value = mock_timer def estimator_train(saving_listeners, *args, **kwargs): del args, kwargs saving_listeners[0].begin() # Call four times. mock_timer.should_trigger_for_step.return_value = True saving_listeners[0].after_save(session=None, global_step_value=None) mock_timer.should_trigger_for_step.return_value = True saving_listeners[0].after_save(session=None, global_step_value=None) mock_timer.should_trigger_for_step.return_value = False saving_listeners[0].after_save(session=None, global_step_value=None) mock_timer.should_trigger_for_step.return_value = True saving_listeners[0].after_save(session=None, global_step_value=None) mock_est.train = estimator_train mock_est.latest_checkpoint.side_effect = ['ckpt1', 'ckpt2'] mock_est.config = self._run_config exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_whether_export_is_called' train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300) eval_spec = training.EvalSpec( input_fn=lambda: 1, steps=2, exporters=exporter, throttle_secs=10) mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: train_spec.max_steps // 2 }, { _GLOBAL_STEP_KEY: train_spec.max_steps }] executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_master() self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(2, exporter.export.call_count) is_final_export_list = [ call[1]['is_the_final_export'] for call in exporter.export.call_args_list ] self.assertEqual([False, True], is_final_export_list) @tf.compat.v1.test.mock.patch.object(basic_session_run_hooks, 'SecondOrStepTimer') @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_run_master_throttle_eval_which_skips_final_ckpt( self, _, mock_timer_class): mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, model_dir='path/') mock_timer = tf.compat.v1.test.mock.Mock() mock_timer_class.return_value = mock_timer def estimator_train(saving_listeners, *args, **kwargs): del args, kwargs saving_listeners[0].begin() # Call tree times (one for first saving). mock_timer.should_trigger_for_step.return_value = True saving_listeners[0].after_save(session=None, global_step_value=0) mock_timer.should_trigger_for_step.return_value = True saving_listeners[0].after_save(session=None, global_step_value=125) mock_timer.should_trigger_for_step.return_value = False saving_listeners[0].after_save(session=None, global_step_value=250) # At the end evaluate should be called even if throttle secs prevents it. mock_timer.should_trigger_for_step.return_value = False saving_listeners[0].end(session=None, global_step_value=300) mock_est.train = estimator_train mock_est.latest_checkpoint.side_effect = ['ckpt1', 'ckpt2'] mock_est.config = self._run_config exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_whether_export_is_called' train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300) eval_spec = training.EvalSpec( input_fn=lambda: 1, steps=2, exporters=exporter, throttle_secs=10) mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: train_spec.max_steps // 2 }, { _GLOBAL_STEP_KEY: train_spec.max_steps }] executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_master() self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(2, exporter.export.call_count) is_final_export_list = [ call[1]['is_the_final_export'] for call in exporter.export.call_args_list ] self.assertEqual([False, True], is_final_export_list) class TrainingExecutorRunEvaluatorTest(tf.test.TestCase): """Tests run_evaluator of _TrainingExecutor.""" def _set_up_mock_est_to_train_and_evaluate_once(self, mock_est, mock_train_spec): """Sets global step in eval result to end the while True eval loop.""" training_max_step = 200 mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: training_max_step} mock_train_spec.max_steps = training_max_step def test_evaluate_with_evaluate_spec(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.latest_checkpoint.return_value = 'latest_it_is' mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) eval_spec = training.EvalSpec( input_fn=lambda: 1, steps=2, hooks=[_FakeHook()], name='cont_eval', start_delay_secs=0, throttle_secs=0) executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) executor.run_evaluator() mock_est.evaluate.assert_called_with( name='cont_eval', input_fn=eval_spec.input_fn, steps=eval_spec.steps, checkpoint_path='latest_it_is', hooks=eval_spec.hooks) self.assertFalse(mock_est.train.called) def test_evaluate_with_no_eval_spec_fails(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.latest_checkpoint.return_value = 'latest_it_is' mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) eval_spec = None executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG): executor.run_evaluator() def test_evaluate_with_train_hooks(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.latest_checkpoint.return_value = 'latest_it_is' mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) eval_spec = training.EvalSpec( input_fn=lambda: 1, steps=2, hooks=[_FakeHook()], name='cont_eval', start_delay_secs=0, throttle_secs=0) # The train_hooks will not be called during eval. mock_hook = tf.compat.v1.test.mock.Mock( spec=tf.compat.v1.train.SessionRunHook) executor = training._TrainingExecutor( mock_est, mock_train_spec, eval_spec, train_hooks=[mock_hook]) executor.run_evaluator() mock_hook.begin.assert_not_called() def test_evaluate_multiple_times(self): training_max_step = 200 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.model_dir = tf.compat.as_bytes(tf.compat.v1.test.get_temp_dir()) mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: training_max_step // 2 }, { _GLOBAL_STEP_KEY: training_max_step }] mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2'] mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_train_spec.max_steps = training_max_step exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_how_many_times_export_is_called' mock_est.times_export_was_called = 0 mock_est.times_final_export_was_called = 0 def export(estimator, export_path, checkpoint_path, eval_result, is_the_final_export): del export_path, checkpoint_path, eval_result estimator.times_export_was_called += 1 # final_export is happened at the end. self.assertEqual(0, estimator.times_final_export_was_called) if is_the_final_export: estimator.times_final_export_was_called += 1 exporter.export = export eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0, exporters=exporter) executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) executor.run_evaluator() self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(2, mock_est.times_export_was_called) self.assertEqual(1, mock_est.times_final_export_was_called) def test_evaluate_listener_before_eval(self): training_max_step = 200 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.model_dir = tf.compat.as_bytes(tf.compat.v1.test.get_temp_dir()) # Without early stopping, this eval will be run twice. mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: training_max_step // 2 }, { _GLOBAL_STEP_KEY: training_max_step }] mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2'] mock_train_spec = tf.compat.v1.test.mock.Mock( spec=training.TrainSpec, hooks=[]) mock_train_spec.max_steps = training_max_step class _Listener(training._ContinuousEvalListener): def __init__(self): self.call_count = 0 def before_eval(self): self.call_count += 1 return self.call_count == 1 listener = _Listener() eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0) training._TrainingExecutor( mock_est, mock_train_spec, eval_spec, continuous_eval_listener=listener).run_evaluator() # Before_eval returns False during the second time, so, evaluate will be # called once. self.assertEqual(1, mock_est.evaluate.call_count) self.assertEqual(2, listener.call_count) def test_evaluate_listener_after_eval(self): training_max_step = 200 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.model_dir = tf.compat.as_bytes(tf.compat.v1.test.get_temp_dir()) # Without early stopping, this eval will be run twice. expected_eval_metrics = [{ _GLOBAL_STEP_KEY: training_max_step // 2 }, { _GLOBAL_STEP_KEY: training_max_step }] mock_est.evaluate.side_effect = expected_eval_metrics mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2'] mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_train_spec.max_steps = training_max_step class _Listener(training._ContinuousEvalListener): def __init__(self): self.call_count = 0 def after_eval(self, eval_result): self.call_count += 1 self.eval_result = eval_result return False listener = _Listener() eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0) training._TrainingExecutor( mock_est, mock_train_spec, eval_spec, continuous_eval_listener=listener).run_evaluator() # after_eval returns False during the first time, so, evaluate will be # called once. self.assertEqual(1, mock_est.evaluate.call_count) self.assertEqual(1, listener.call_count) self.assertAllEqual(expected_eval_metrics[0], listener.eval_result.metrics) self.assertEqual('path_1', listener.eval_result.checkpoint_path) def test_final_export_is_true_in_the_end(self): training_max_step = 200 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.model_dir = tf.compat.as_bytes(tf.compat.v1.test.get_temp_dir()) mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: training_max_step // 2 }, { _GLOBAL_STEP_KEY: training_max_step }] mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2'] mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_train_spec.max_steps = training_max_step mock_est.times_export_fn_was_called = 0 mock_est.times_the_final_export_was_true = 0 def export(estimator, export_path, checkpoint_path, eval_result, is_the_final_export): del export_path, checkpoint_path, eval_result estimator.times_export_fn_was_called += 1 if is_the_final_export: estimator.times_the_final_export_was_true += 1 exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_how_many_times_export_is_called' exporter.export = export eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0, exporters=exporter) executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) executor.run_evaluator() self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(2, mock_est.times_export_fn_was_called) self.assertEqual(1, mock_est.times_the_final_export_was_true) def test_skip_evaluation_due_to_ckpt(self): training_max_step = 200 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: training_max_step // 2 }, { _GLOBAL_STEP_KEY: training_max_step }] mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_train_spec.max_steps = training_max_step self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) # First two items are invalid, next two items are same. mock_est.latest_checkpoint.side_effect = [ None, '', 'same', 'same', 'path_2' ] eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, throttle_secs=2) executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) with tf.compat.v1.test.mock.patch.object(tf.compat.v1.logging, 'warning') as mock_log: executor.run_evaluator() # Three checkpoint paths are invalid. self.assertEqual(5, mock_est.latest_checkpoint.call_count) self.assertEqual(2, mock_est.evaluate.call_count) # Two warning logs are expected (last warning time is reset after a # successuful evaluation) self.assertEqual(2, mock_log.call_count) def test_warning_if_throttle_secs_is_zero(self): training_max_step = 200 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate.side_effect = [{_GLOBAL_STEP_KEY: training_max_step}] mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_train_spec.max_steps = training_max_step self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) # We need to make the first one invalid, so it will check the # throttle_secs=0. mock_est.latest_checkpoint.side_effect = [None, 'path'] eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0) executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) with tf.compat.v1.test.mock.patch.object(tf.compat.v1.logging, 'warning') as mock_log: executor.run_evaluator() # First ckpt is invalid. self.assertEqual(2, mock_est.latest_checkpoint.call_count) self.assertEqual(1, mock_est.evaluate.call_count) self.assertRegexpMatches(str(mock_log.call_args), _INPROPER_THROTTL_SECS) def test_continuous_eval_listener_eval_result(self): training_max_step = 200 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) expected_eval_metrics = [{ _GLOBAL_STEP_KEY: training_max_step // 2 }, { _GLOBAL_STEP_KEY: training_max_step }] mock_est.evaluate.side_effect = expected_eval_metrics mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_train_spec.max_steps = training_max_step class _Listener(training._ContinuousEvalListener): def __init__(self): self.eval_results = [] def after_eval(self, eval_result): self.eval_results.append(eval_result) return True continuous_eval_listener = _Listener() self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) # First two items are invalid, next two items are same. mock_est.latest_checkpoint.side_effect = [ None, '', 'same', 'same', 'path_2' ] expected_eval_results = [ training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT), training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT), training._EvalResult( training._EvalStatus.EVALUATED, metrics=expected_eval_metrics[0], checkpoint_path='same'), training._EvalResult(training._EvalStatus.NO_NEW_CHECKPOINT), training._EvalResult( training._EvalStatus.EVALUATED, metrics=expected_eval_metrics[1], checkpoint_path='path_2'), ] eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0) executor = training._TrainingExecutor( mock_est, mock_train_spec, eval_spec, continuous_eval_listener=continuous_eval_listener) executor.run_evaluator() # Three checkpoint paths are invalid. self.assertEqual(5, mock_est.latest_checkpoint.call_count) self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(5, len(continuous_eval_listener.eval_results)) for i, result in enumerate(continuous_eval_listener.eval_results): self.assertEqual(expected_eval_results[i].status, result.status) self.assertAllEqual(expected_eval_results[i].metrics, result.metrics) self.assertEqual(expected_eval_results[i].checkpoint_path, result.checkpoint_path) def test_sleep_start_delay_secs(self): training_max_step = 200 start_delay_secs = 123 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: training_max_step} mock_est.model_dir = tf.compat.as_bytes(tf.compat.v1.test.get_temp_dir()) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_train_spec.max_steps = training_max_step eval_spec = training.EvalSpec( input_fn=lambda: 1, steps=2, hooks=[_FakeHook()], name='cont_eval', start_delay_secs=start_delay_secs, throttle_secs=0) executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) with tf.compat.v1.test.mock.patch.object(time, 'sleep') as mock_sleep: executor.run_evaluator() mock_sleep.assert_called_with(start_delay_secs) self.assertTrue(mock_est.evaluate.called) @tf.compat.v1.test.mock.patch.object(time, 'time') @tf.compat.v1.test.mock.patch.object(time, 'sleep') def test_throttle_secs(self, mock_sleep, mock_time): throttle_secs = 123 operation_secs = 12 mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, throttle_secs=throttle_secs) mock_time.side_effect = [921, 921 + operation_secs] executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) # Disable logging as it calls time.time also. with tf.compat.v1.test.mock.patch.object(tf.compat.v1.logging, 'info'): executor.run_evaluator() mock_sleep.assert_called_with(throttle_secs - operation_secs) self.assertTrue(mock_est.evaluate.called) def test_that_export_is_called(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) def export(estimator, *args, **kwargs): del args, kwargs estimator.export_was_called = True exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_whether_export_is_called' exporter.export = export eval_spec = training.EvalSpec( input_fn=lambda: 1, steps=2, start_delay_secs=0, throttle_secs=0, exporters=exporter) executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) executor.run_evaluator() # Verify that export was called on the right estimator. self.assertTrue(mock_est.export_was_called) def test_errors_out_if_evaluate_returns_empty_dict(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec( input_fn=(lambda: 1), start_delay_secs=0, throttle_secs=0) mock_est.evaluate.return_value = {} executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR): executor.run_evaluator() def test_errors_out_if_evaluate_returns_non_dict(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec( input_fn=(lambda: 1), start_delay_secs=0, throttle_secs=0) mock_est.evaluate.return_value = 123 executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_RESULT_TYPE_ERR): executor.run_evaluator() def test_errors_out_if_evaluate_returns_dict_without_global_step(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) train_spec = training.TrainSpec(input_fn=lambda: 1) eval_spec = training.EvalSpec( input_fn=(lambda: 1), start_delay_secs=0, throttle_secs=0) mock_est.evaluate.return_value = {'loss': 123} executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(ValueError, _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR): executor.run_evaluator() class TrainingExecutorRunPsTest(tf.test.TestCase): """Tests run_ps of _TrainingExecutor.""" @tf.compat.v1.test.mock.patch.object(server_lib, 'Server') def test_std_server(self, mock_server): mock_server_instance = tf.compat.v1.test.mock.Mock() mock_server.return_value = mock_server_instance mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = _create_run_config_with_cluster_spec(_TF_CONFIG_FOR_PS) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) executor.run_ps() mock_server.assert_called_with( mock_est.config.cluster_spec, job_name=mock_est.config.task_type, task_index=mock_est.config.task_id, config=tf.compat.v1.test.mock.ANY, protocol=None, start=False) self.assertTrue(mock_server_instance.start.called) self.assertTrue(mock_server_instance.join.called) def test_fail_with_empty_cluster_spec(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = None mock_est.config.master = 'grpc://...' mock_est.config.task_type = 'ps' mock_est.config.task_id = 2 mock_train_spec.saving_listeners = tuple([]) with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec).run_ps() def test_fail_with_empty_master(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({'ps': ['dummy']}) mock_est.config.master = '' mock_est.config.task_type = 'ps' mock_est.config.task_id = 2 mock_train_spec.saving_listeners = tuple([]) with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec).run_ps() def test_fail_with_empty_task_type(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({'ps': ['dummy']}) mock_est.config.master = 'grpc://...' mock_est.config.task_type = '' mock_est.config.task_id = 2 with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec).run_ps() def test_fail_with_none_task_id(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.PropertyMock( spec=run_config_lib.RunConfig) mock_est.config.cluster_spec = tf.train.ClusterSpec({'ps': ['dummy']}) mock_est.config.master = 'grpc://...' mock_est.config.task_type = 'ps' mock_est.config.task_id = None with self.assertRaisesRegexp(RuntimeError, _INVALID_CONFIG_FOR_STD_SERVER_MSG): training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec).run_ps() class StopAtSecsHookTest(tf.test.TestCase): """Tests StopAtSecsHook.""" @tf.compat.v1.test.mock.patch.object(time, 'time') def test_stops_after_time(self, mock_time): mock_time.return_value = 1484695987.209386 hook = training._StopAtSecsHook(1000) with tf.Graph().as_default(): no_op = tf.no_op() # some time passed before training starts mock_time.return_value += 250 with tf.compat.v1.train.MonitoredSession(hooks=[hook]) as sess: self.assertFalse(sess.should_stop()) sess.run(no_op) self.assertFalse(sess.should_stop()) mock_time.return_value += 500 sess.run(no_op) self.assertFalse(sess.should_stop()) mock_time.return_value += 400 sess.run(no_op) self.assertFalse(sess.should_stop()) mock_time.return_value += 200 sess.run(no_op) self.assertTrue(sess.should_stop()) class TrainingExecutorRunLocalTest(tf.test.TestCase): """Tests run_local of _TrainingExecutor.""" def _model_fn(self, features, labels, mode): del labels with tf.control_dependencies([features]): train_op = tf.compat.v1.assign_add(tf.compat.v1.train.get_global_step(), 1) return model_fn_lib.EstimatorSpec( mode, loss=tf.constant(0.), train_op=train_op, predictions=tf.constant([[10.]]), eval_metric_ops={ 'mean_of_features': tf.compat.v1.metrics.mean(features) }) def _input_fn(self, repeat=True): ds = tf.compat.v1.data.Dataset.from_tensors([1]) if repeat: return ds.repeat() return ds def unique_checkpoint_every_time_fn(self): return 'checkpoint_path_%s/' % random.random() def test_runs_evaluate_with_every_new_checkpoint(self): est = estimator_lib.Estimator( model_fn=self._model_fn, config=run_config_lib.RunConfig(save_checkpoints_steps=10)) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) mock_est.times_export_was_called = 0 mock_est.times_final_export_was_called = 0 def export(estimator, export_path, checkpoint_path, eval_result, is_the_final_export): del export_path, checkpoint_path, eval_result estimator.times_export_was_called += 1 # final_export is happened at the end. self.assertEqual(0, estimator.times_final_export_was_called) if is_the_final_export: estimator.times_final_export_was_called += 1 exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_how_many_times_export_is_called' exporter.export = export train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=22) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0, exporters=exporter) executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_local() self.assertEqual(1, mock_est.train.call_count) self.assertEqual(3, mock_est.evaluate.call_count) self.assertEqual(3, mock_est.times_export_was_called) self.assertEqual(1, mock_est.times_final_export_was_called) def test_runs_with_eval_listener_before_eval(self): est = estimator_lib.Estimator( model_fn=self._model_fn, config=run_config_lib.RunConfig(save_checkpoints_steps=10)) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=12) eval_spec = training.EvalSpec(input_fn=lambda: self._input_fn(repeat=False)) mock_est.evaluate.side_effect = [{_GLOBAL_STEP_KEY: train_spec.max_steps}] class _Listener(training._ContinuousEvalListener): def __init__(self): self.call_count = 0 def before_eval(self): self.call_count += 1 return False # Will stop the run_local before first eval. listener = _Listener() executor = training._TrainingExecutor( mock_est, train_spec, eval_spec, continuous_eval_listener=listener) executor.run_local() self.assertEqual(1, mock_est.train.call_count) self.assertEqual(0, mock_est.evaluate.call_count) def test_runs_with_eval_listener_after_eval(self): est = estimator_lib.Estimator( model_fn=self._model_fn, config=run_config_lib.RunConfig(save_checkpoints_steps=10)) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=3000) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0) class _Listener(training._ContinuousEvalListener): def __init__(self): self.call_count = 0 def after_eval(self, eval_result): self.call_count += 1 return False # Will stop the run_local after first eval. listener = _Listener() executor = training._TrainingExecutor( mock_est, train_spec, eval_spec, continuous_eval_listener=listener) metrics, _ = executor.run_local() # pylint: disable=assignment-from-no-return self.assertEqual(1, mock_est.train.call_count) self.assertEqual(1, mock_est.evaluate.call_count) self.assertEqual(1, listener.call_count) # Should be less than max_steps since listener did early stopping. self.assertLess(metrics[_GLOBAL_STEP_KEY], train_spec.max_steps) def test_handles_no_new_checkpoint_found(self): est = estimator_lib.Estimator( model_fn=self._model_fn, # disable saving checkpoint config=run_config_lib.RunConfig( save_checkpoints_steps=None, save_checkpoints_secs=None)) train_spec = training.TrainSpec( input_fn=self._input_fn, max_steps=300, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), hooks=[_FakeHook()], throttle_secs=100) executor = training._TrainingExecutor(est, train_spec, eval_spec) with self.assertRaisesRegexp(ValueError, 'There should be a CheckpointSaverHook'): executor.run_local() def test_final_export_is_true_in_the_end(self): est = estimator_lib.Estimator( model_fn=self._model_fn, config=run_config_lib.RunConfig(save_checkpoints_steps=10)) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) mock_est.times_export_fn_was_called = 0 mock_est.times_the_final_export_was_true = 0 def export(estimator, export_path, checkpoint_path, eval_result, is_the_final_export): del export_path, checkpoint_path, eval_result estimator.times_export_fn_was_called += 1 if is_the_final_export: estimator.times_the_final_export_was_true += 1 exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_how_many_times_export_is_called' exporter.export = export train_spec = training.TrainSpec( input_fn=self._input_fn, max_steps=12, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0, exporters=exporter) executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_local() self.assertEqual(1, mock_est.train.call_count) self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(2, mock_est.times_export_fn_was_called) self.assertEqual(1, mock_est.times_the_final_export_was_true) def test_train_and_evaluate_args(self): est = estimator_lib.Estimator(model_fn=self._model_fn) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) train_spec = training.TrainSpec( input_fn=self._input_fn, max_steps=300, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), steps=2, hooks=[_FakeHook()], name='local_eval') executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_local() mock_est.evaluate.assert_called_with( name=eval_spec.name, input_fn=eval_spec.input_fn, steps=eval_spec.steps, checkpoint_path=est.latest_checkpoint(), hooks=eval_spec.hooks) train_args = mock_est.train.call_args[1] self.assertEqual(list(train_spec.hooks), list(train_args['hooks'])) self.assertEqual(train_spec.input_fn, train_args['input_fn']) self.assertEqual(train_spec.max_steps, train_args['max_steps']) def test_train_with_no_eval_spec_fails(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) eval_spec = None executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG): executor.run_local() def test_train_hooks(self): mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, model_dir='path/') mock_est.latest_checkpoint.return_value = 'checkpoint_path/' train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) eval_spec = training.EvalSpec(input_fn=lambda: 1, steps=2) mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps} extra_hooks = [_FakeHook()] executor = training._TrainingExecutor( mock_est, train_spec, eval_spec, train_hooks=extra_hooks) executor.run_local() train_args = mock_est.train.call_args[1] self.assertEqual( list(train_spec.hooks) + extra_hooks, [ h for h in train_args['hooks'] if not isinstance(h, training._StopAtSecsHook) ]) def test_that_export_is_called_with_run_local(self): est = estimator_lib.Estimator(model_fn=self._model_fn) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=12) mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps} def export(estimator, *args, **kwargs): del args, kwargs estimator.export_was_called = True return 'path_to_export' exporter = tf.compat.v1.test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_whether_export_is_called' exporter.export = export eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), steps=2, start_delay_secs=0, throttle_secs=213, exporters=exporter) executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) # pylint: disable=assignment-from-no-return _, export_results = executor.run_local() # pylint: enable=assignment-from-no-return self.assertTrue(mock_est.export_was_called) self.assertEqual(export_results, ['path_to_export']) def test_errors_out_if_evaluate_returns_empty_dict(self): est = estimator_lib.Estimator( model_fn=self._model_fn, config=run_config_lib.RunConfig(save_checkpoints_steps=2)) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) train_spec = training.TrainSpec(input_fn=self._input_fn) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0) mock_est.evaluate.return_value = {} executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR): executor.run_local() def test_errors_out_if_evaluate_returns_non_dict(self): est = estimator_lib.Estimator( model_fn=self._model_fn, config=run_config_lib.RunConfig(save_checkpoints_steps=2)) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) train_spec = training.TrainSpec(input_fn=self._input_fn) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0) mock_est.evaluate.return_value = 123 executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_RESULT_TYPE_ERR): executor.run_local() def test_errors_out_if_evaluate_returns_dict_without_global_step(self): est = estimator_lib.Estimator( model_fn=self._model_fn, config=run_config_lib.RunConfig(save_checkpoints_steps=2)) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) train_spec = training.TrainSpec(input_fn=self._input_fn) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0) mock_est.evaluate.return_value = {'loss': 123} executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) with self.assertRaisesRegexp(ValueError, _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR): executor.run_local() def test_train_and_evaluate_return_metrics(self): est = estimator_lib.Estimator(model_fn=self._model_fn) mock_est = tf.compat.v1.test.mock.Mock( spec=estimator_lib.Estimator, wraps=est) train_spec = training.TrainSpec( input_fn=self._input_fn, max_steps=12, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( input_fn=lambda: self._input_fn(repeat=False), steps=2, hooks=[_FakeHook()], name='local_eval') executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) # pylint: disable=assignment-from-no-return metrics, _ = executor.run_local() # pylint: enable=assignment-from-no-return self.assertEqual(metrics['global_step'], 12) class TrainAndEvaluateRunTest(tf.test.TestCase): def _test_run_task_and_executor(self, run_config): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = run_config mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) executor.call_task = {} def task_fn(name): def _fn(): executor.call_task[name] = 1 return _fn executor.run_chief = task_fn('chief') executor.run_master = task_fn('master') executor.run_ps = task_fn('ps') executor.run_evaluator = task_fn('evaluator') executor.run_worker = task_fn('worker') executor.run_local = task_fn('local') return executor def test_run_chief(self): executor = self._test_run_task_and_executor( run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_CHIEF)) executor.run() self.assertEqual(1, executor.call_task['chief']) def test_run_worker(self): executor = self._test_run_task_and_executor( run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_WORKER)) executor.run() self.assertEqual(1, executor.call_task['worker']) def test_run_ps(self): executor = self._test_run_task_and_executor( run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_PS)) executor.run() self.assertEqual(1, executor.call_task['ps']) def test_run_evaluator(self): executor = self._test_run_task_and_executor( run_config=_create_run_config_with_cluster_spec( _TF_CONFIG_FOR_EVALUATOR)) executor.run() self.assertEqual(1, executor.call_task['evaluator']) def test_run_local(self): executor = self._test_run_task_and_executor( run_config=run_config_lib.RunConfig()) executor.run() self.assertEqual(1, executor.call_task['local']) def test_invalid_local_task(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], 'local': ['hos1:1'], }, 'task': { 'type': 'local', # invalid task type. 'index': 0 } } mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = _create_run_config_with_cluster_spec(tf_config) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) with self.assertRaisesRegexp(ValueError, _INVALID_LOCAL_TASK_WITH_CLUSTER): executor.run() def test_unsupported_task_due_to_missing_run_task(self): unsupported_task = 'alloc' tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], unsupported_task: ['hos1:1'], }, 'task': { 'type': unsupported_task, 'index': 0 } } mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = _create_run_config_with_cluster_spec(tf_config) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN): executor.run() def test_unsupported_task_due_to_not_callable(self): unsupported_task = 'alloc' tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], unsupported_task: ['hos1:1'], }, 'task': { 'type': unsupported_task, 'index': 0 } } mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = _create_run_config_with_cluster_spec(tf_config) mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) executor.run_alloc = 123 # not callable with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN): executor.run() def test_invalid_task_type(self): mock_est = tf.compat.v1.test.mock.Mock(spec=estimator_lib.Estimator) mock_est.config = tf.compat.v1.test.mock.Mock() mock_train_spec = tf.compat.v1.test.mock.Mock(spec=training.TrainSpec) mock_eval_spec = tf.compat.v1.test.mock.Mock(spec=training.EvalSpec) mock_est.config = tf.compat.v1.test.mock.Mock() mock_est.config.cluster_spec = tf.train.ClusterSpec({'1': ['dummy']}) mock_est.config.task_type = '' executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE): executor.run() class TrainAndEvaluateIntegrationTest(tf.test.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() def tearDown(self): if self._model_dir: shutil.rmtree(self._model_dir) def _as_label(self, data_in_float): return np.rint(data_in_float).astype(np.int64) def _get_exporter(self, name, fc): feature_spec = tf.compat.v1.feature_column.make_parse_example_spec(fc) serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) return exporter_lib.LatestExporter( name, serving_input_receiver_fn=serving_input_receiver_fn) def _extract_loss_and_global_step(self, event_folder): """Returns the loss and global step in last event.""" event_paths = glob.glob(os.path.join(event_folder, 'events*')) loss = None global_step_count = None for e in tf.compat.v1.train.summary_iterator(event_paths[-1]): current_loss = None for v in e.summary.value: if v.tag == 'loss': current_loss = v.simple_value # If loss is not found, global step is meaningless. if current_loss is None: continue current_global_step = e.step if global_step_count is None or current_global_step > global_step_count: global_step_count = current_global_step loss = current_loss return (loss, global_step_count) def test_complete_flow_with_non_distributed_configuration(self): n_classes = 3 input_dimension = 2 batch_size = 10 eval_name = 'foo' exporter_name = 'saved_model_exporter' # max_steps should be larger than save_summary_steps max_steps = 10 save_summary_steps = 9 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) # learn y = x def train_input_fn(): return tf.compat.v1.data.Dataset.from_tensor_slices(({ 'x': x_data }, y_data)).batch(batch_size).repeat().shuffle(1000) def eval_input_fn(): return tf.compat.v1.data.Dataset.from_tensor_slices(({ 'x': x_data }, y_data)).batch(batch_size) def predict_input_fn(): return tf.compat.v1.data.Dataset.from_tensor_slices({ 'x': x_data }).batch(batch_size) feature_columns = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, config=run_config_lib.RunConfig(save_summary_steps=save_summary_steps), model_dir=self._model_dir) train_spec = training.TrainSpec( input_fn=train_input_fn, max_steps=max_steps) eval_spec = training.EvalSpec( name=eval_name, input_fn=eval_input_fn, steps=None, exporters=self._get_exporter(exporter_name, feature_columns), throttle_secs=0) training.train_and_evaluate(est, train_spec, eval_spec) # Make sure nothing is stuck in limbo. tf.compat.v1.summary.FileWriterCache.clear() # Examine the training events. Use a range to check global step to avoid # flakyness due to global step race condition. training_loss, _ = self._extract_loss_and_global_step(est.model_dir) self.assertIsNotNone(training_loss) # Examine the eval events. The global step should be accurate. eval_loss, eval_global_step = self._extract_loss_and_global_step( event_folder=est.eval_dir(eval_name)) self.assertIsNotNone(eval_loss) self.assertEqual(max_steps, eval_global_step) # Examine the export folder. export_dir = os.path.join( os.path.join(est.model_dir, 'export'), exporter_name) self.assertTrue(tf.compat.v1.gfile.Exists(export_dir)) # Examine the ckpt for predict. predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/python/estimator/util.py ================================================ # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utilities for Estimators.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import time import tensorflow as tf # import keras 2 version_fn = getattr(tf.keras, 'version', None) if version_fn and version_fn().startswith('3.'): import tf_keras # pylint: disable=g-import-not-at-top,unused-import from tf_keras.api._v1 import keras as tf_keras_v1 # pylint: disable=g-import-not-at-top,unused-import from tf_keras.api._v2 import keras as tf_keras_v2 # pylint: disable=g-import-not-at-top,unused-import else: tf_keras = tf.keras # Keras 2 tf_keras_v1 = tf.compat.v1.keras tf_keras_v2 = tf.compat.v2.keras from tensorflow.python.util import function_utils fn_args = function_utils.fn_args # When we create a timestamped directory, there is a small chance that the # directory already exists because another process is also creating these # directories. In this case we just wait one second to get a new timestamp and # try again. If this fails several times in a row, then something is seriously # wrong. MAX_DIRECTORY_CREATION_ATTEMPTS = 10 def parse_input_fn_result(result): """Gets features, labels, and hooks from the result of an Estimator input_fn. Args: result: output of an input_fn to an estimator, which should be one of: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. * A tuple (features, labels): Where `features` is a `Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. Returns: Tuple of features, labels, and input_hooks, where features are as described above, labels are as described above or None, and input_hooks are a list of SessionRunHooks to be included when running. Raises: ValueError: if the result is a list or tuple of length != 2. """ input_hooks = [] if isinstance(result, tf.compat.v2.data.Dataset): iterator = tf.compat.v1.data.make_initializable_iterator(result) input_hooks.append(_DatasetInitializerHook(iterator)) result = iterator.get_next() return parse_iterator_result(result) + (input_hooks,) def parse_iterator_result(result): """Gets features, labels from result.""" if isinstance(result, (list, tuple)): if len(result) != 2: raise ValueError( 'input_fn should return (features, labels) as a len 2 tuple.') return result[0], result[1] return result, None class _DatasetInitializerHook(tf.compat.v1.train.SessionRunHook): """Creates a SessionRunHook that initializes the passed iterator.""" def __init__(self, iterator): self._iterator = iterator def begin(self): self._initializer = self._iterator.initializer def after_create_session(self, session, coord): del coord session.run(self._initializer) class DistributedIteratorInitializerHook(tf.compat.v1.train.SessionRunHook): """Creates a SessionRunHook that initializes the passed iterator.""" def __init__(self, iterator): self._iterator = iterator def begin(self): self._initializer = self._iterator.initialize() def after_create_session(self, session, coord): del coord session.run(self._initializer) class MultiHostDatasetInitializerHook(tf.compat.v1.train.SessionRunHook): """Creates a SessionRunHook that initializes all passed iterators.""" def __init__(self, dataset_initializers): self._initializers = dataset_initializers def after_create_session(self, session, coord): del coord start = time.time() session.run(self._initializers) tf.compat.v1.logging.info('Initialized dataset iterators in %d seconds', time.time() - start) ================================================ FILE: tensorflow_estimator/python/estimator/util_test.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for util.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import parameterized import numpy as np import tensorflow as tf from tensorflow.python.framework import test_util from tensorflow_estimator.python.estimator import util @test_util.deprecated_graph_mode_only class UtilTest(tf.test.TestCase, parameterized.TestCase): """Tests for miscellaneous Estimator utils.""" def test_parse_input_fn_result_tuple(self): def _input_fn(): features = tf.constant(np.arange(100)) labels = tf.constant(np.arange(100, 200)) return features, labels features, labels, hooks = util.parse_input_fn_result(_input_fn()) with self.cached_session() as sess: vals = sess.run([features, labels]) self.assertAllEqual(vals[0], np.arange(100)) self.assertAllEqual(vals[1], np.arange(100, 200)) self.assertEqual(hooks, []) @parameterized.named_parameters(('DatasetV1', tf.compat.v1.data.Dataset), ('DatasetV2', tf.data.Dataset)) def test_parse_input_fn_result_dataset(self, dataset_class): def _input_fn(): features = np.expand_dims(np.arange(100), 0) labels = np.expand_dims(np.arange(100, 200), 0) return dataset_class.from_tensor_slices((features, labels)) features, labels, hooks = util.parse_input_fn_result(_input_fn()) with tf.compat.v1.train.MonitoredSession(hooks=hooks) as sess: vals = sess.run([features, labels]) self.assertAllEqual(vals[0], np.arange(100)) self.assertAllEqual(vals[1], np.arange(100, 200)) self.assertIsInstance(hooks[0], util._DatasetInitializerHook) def test_parse_input_fn_result_features_only(self): def _input_fn(): return tf.constant(np.arange(100)) features, labels, hooks = util.parse_input_fn_result(_input_fn()) with self.cached_session() as sess: vals = sess.run([features]) self.assertAllEqual(vals[0], np.arange(100)) self.assertEqual(labels, None) self.assertEqual(hooks, []) @parameterized.named_parameters(('DatasetV1', tf.compat.v1.data.Dataset), ('DatasetV2', tf.data.Dataset)) def test_parse_input_fn_result_features_only_dataset(self, dataset_class): def _input_fn(): features = np.expand_dims(np.arange(100), 0) return dataset_class.from_tensor_slices(features) features, labels, hooks = util.parse_input_fn_result(_input_fn()) with tf.compat.v1.train.MonitoredSession(hooks=hooks) as sess: vals = sess.run([features]) self.assertAllEqual(vals[0], np.arange(100)) self.assertEqual(labels, None) self.assertIsInstance(hooks[0], util._DatasetInitializerHook) @parameterized.named_parameters(('DatasetV1', tf.compat.v1.data.Dataset), ('DatasetV2', tf.data.Dataset)) def test_parse_input_fn_result_invalid(self, dataset_class): def _input_fn(): features = np.expand_dims(np.arange(100), 0) labels = np.expand_dims(np.arange(100, 200), 0) return dataset_class.from_tensor_slices((features, labels, labels)) with self.assertRaisesRegexp(ValueError, 'input_fn should return'): util.parse_input_fn_result(_input_fn()) if __name__ == '__main__': tf.test.main() ================================================ FILE: tensorflow_estimator/tools/pip_package/BUILD ================================================ package(default_visibility = ["//tensorflow_estimator:internal"]) # Description: # Tools for building the TensorFlow pip package. COMMON_PIP_DEPS = [ "//tensorflow_estimator", # Need to include testing libraries in pip package so our pip # release tests can run. (see py_test rule in estimator.bzl for more context). # Essentially, everything needed to run the test (except the test file itself) # must be contained in the pip package since we strip away all deps. "//tensorflow_estimator/python/estimator:dnn_testing_utils", "//tensorflow_estimator/python/estimator:dnn_testing_utils_v1", "//tensorflow_estimator/python/estimator:linear_testing_utils", "//tensorflow_estimator/python/estimator:linear_testing_utils_v1", ] sh_binary( name = "build_pip_package", srcs = ["build_pip_package.sh"], data = COMMON_PIP_DEPS, ) ================================================ FILE: tensorflow_estimator/tools/pip_package/build_pip_package.sh ================================================ #!/usr/bin/env bash # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== set -e function is_absolute { [[ "$1" = /* ]] || [[ "$1" =~ ^[a-zA-Z]:[/\\].* ]] } function real_path() { is_absolute "$1" && echo "$1" || echo "$PWD/${1#./}" } function prepare_src() { TMPDIR="$1" mkdir -p "$TMPDIR" echo $(date) : "=== Preparing sources in dir: ${TMPDIR}" if [ ! -d bazel-bin/tensorflow_estimator ]; then echo "Could not find bazel-bin. Did you run from the root of the build tree?" exit 1 fi cp -r "bazel-bin/tensorflow_estimator/tools/pip_package/build_pip_package.runfiles/org_tensorflow_estimator/tensorflow_estimator" "$TMPDIR" cp tensorflow_estimator/tools/pip_package/setup.py "$TMPDIR" # Verifies all expected files are in pip. # Creates init files in all directory in pip. python tensorflow_estimator/tools/pip_package/create_pip_helper.py --pip-root "${TMPDIR}/tensorflow_estimator/" --bazel-root "./tensorflow_estimator" } function build_wheel() { if [ $# -lt 2 ] ; then echo "No src and dest dir provided" exit 1 fi TMPDIR="$1" DEST="$2" PROJECT_NAME="$3" pushd ${TMPDIR} > /dev/null echo $(date) : "=== Building wheel" "${PYTHON_BIN_PATH:-python}" setup.py bdist_wheel --universal --project_name $PROJECT_NAME mkdir -p ${DEST} cp dist/* ${DEST} popd > /dev/null echo $(date) : "=== Output wheel file is in: ${DEST}" } function usage() { echo "Usage:" echo "$0 [--src srcdir] [--dst dstdir] [options]" echo "$0 dstdir [options]" echo "" echo " --src prepare sources in srcdir" echo " will use temporary dir if not specified" echo "" echo " --dst build wheel in dstdir" echo " if dstdir is not set do not build, only prepare sources" echo "" echo " Options:" echo " --project_name set project name to name" echo " --nightly build tensorflow_estimator nightly" echo "" exit 1 } function main() { NIGHTLY_BUILD=0 PROJECT_NAME="" SRCDIR="" DSTDIR="" CLEANSRC=1 while true; do if [[ -z "$1" ]]; then break elif [[ "$1" == "--help" ]]; then usage exit 1 elif [[ "$1" == "--nightly" ]]; then NIGHTLY_BUILD=1 elif [[ "$1" == "--project_name" ]]; then shift if [[ -z "$1" ]]; then break fi PROJECT_NAME="$1" elif [[ "$1" == "--src" ]]; then shift if [[ -z "$1" ]]; then break fi SRCDIR="$(real_path $1)" CLEANSRC=0 elif [[ "$1" == "--dst" ]]; then shift if [[ -z "$1" ]]; then break fi DSTDIR="$(real_path $1)" else DSTDIR="$(real_path $1)" fi shift done if [[ -z ${PROJECT_NAME} ]]; then PROJECT_NAME="tensorflow_estimator" if [[ ${NIGHTLY_BUILD} == "1" ]]; then PROJECT_NAME="tf_estimator_nightly" fi fi if [[ -z "$DSTDIR" ]] && [[ -z "$SRCDIR" ]]; then echo "No destination dir provided" usage exit 1 fi if [[ -z "$SRCDIR" ]]; then # make temp srcdir if none set SRCDIR="$(mktemp -d -t tmp.XXXXXXXXXX)" fi prepare_src "$SRCDIR" if [[ -z "$DSTDIR" ]]; then # only want to prepare sources exit fi build_wheel "$SRCDIR" "$DSTDIR" "$PROJECT_NAME" if [[ $CLEANSRC -ne 0 ]]; then rm -rf "${TMPDIR}" fi } main "$@" ================================================ FILE: tensorflow_estimator/tools/pip_package/create_pip_helper.py ================================================ # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Utils to help build and verify pip package for TensorFlow Estimator.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse import fnmatch import os PIP_EXCLUDED_FILES = frozenset([ 'tensorflow_estimator/python/estimator/canned/optimizers_test_v2.py', 'tensorflow_estimator/python/estimator/canned/dnn_test_fc_v2.py', 'tensorflow_estimator/python/estimator/canned/dnn_test_fc_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/dnn_estimator_test_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/linear_test_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/dnn_linear_combined_estimator_test_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/dnn_linear_combined_test_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/baseline_estimator_test_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/linear_estimator_test_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/baseline_test_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/dnn_test_fc_v1_v1.py', 'tensorflow_estimator/python/estimator/canned/v1/dnn_test_fc_v2_v1.py', 'tensorflow_estimator/python/estimator/api/extractor_wrapper.py', 'tensorflow_estimator/python/estimator/api/generator_wrapper.py', 'tensorflow_estimator/tools/pip_package/setup.py', 'tensorflow_estimator/tools/pip_package/create_pip_helper.py', ]) # Directories that should not have __init__.py files generated within them. EXCLUDED_INIT_FILE_DIRECTORIES = frozenset(['tensorflow_estimator/tools']) class PipPackagingError(Exception): pass def create_init_files(pip_root): """Create __init__.py in pip directory tree. These files are auto-generated by Bazel when doing typical build/test, but do not get auto-generated by the pip build process. Currently, the entire directory tree is just python files, so its fine to just create all of the init files. Args: pip_root: Root directory of code being packaged into pip. Returns: True: contrib code is included in pip. """ has_contrib = False for path, subdirs, _ in os.walk(pip_root): has_contrib = has_contrib or '/contrib/' in path for subdir in subdirs: init_file_path = os.path.join(path, subdir, '__init__.py') if any(excluded_path in init_file_path for excluded_path in EXCLUDED_INIT_FILE_DIRECTORIES): continue if not os.path.exists(init_file_path): # Create empty file open(init_file_path, 'w').close() return has_contrib def verify_python_files_in_pip(pip_root, bazel_root, has_contrib): """Verifies all expected files are packaged into Pip. Args: pip_root: Root directory of code being packaged into pip. bazel_root: Root directory of Estimator Bazel workspace. has_contrib: Code from contrib/ should be included in pip. Raises: PipPackagingError: Missing file in pip. """ for path, _, files in os.walk(bazel_root): if not has_contrib and '/contrib/' in path: continue python_files = set(fnmatch.filter(files, '*.py')) python_test_files = set(fnmatch.filter(files, '*test.py')) # We only care about python files in the pip package, see create_init_files. files = python_files - python_test_files for f in files: pip_path = os.path.join(pip_root, os.path.relpath(path, bazel_root), f) file_name = os.path.join(path, f) path_exists = os.path.exists(pip_path) file_excluded = file_name.lstrip('./') in PIP_EXCLUDED_FILES if not path_exists and not file_excluded: raise PipPackagingError( ('Pip package missing the file %s. If this is expected, add it ' 'to PIP_EXCLUDED_FILES in create_pip_helper.py. Otherwise, ' 'make sure it is a build dependency of the pip package') % file_name) if path_exists and file_excluded: raise PipPackagingError( ('File in PIP_EXCLUDED_FILES included in pip. %s' % file_name)) def main(): parser = argparse.ArgumentParser() parser.add_argument( '--bazel-root', type=str, required=True, help='Root directory of Estimator Bazel workspace.') parser.add_argument( '--pip-root', type=str, required=True, help='Root directory of code being packaged into pip.') args = parser.parse_args() has_contrib = create_init_files(args.pip_root) verify_python_files_in_pip(args.pip_root, args.bazel_root, has_contrib) if __name__ == '__main__': main() ================================================ FILE: tensorflow_estimator/tools/pip_package/setup.py ================================================ # Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """TensorFlow Estimator. TensorFlow Estimator is a high-level API that encapsulates model training, evaluation, prediction, and exporting. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import setuptools DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. _VERSION = '2.16.0' REQUIRED_PACKAGES = [ # We depend on TensorFlow's declared pip dependencies. # Add a new dep there if one is needed. ] project_name = 'tensorflow_estimator' if '--project_name' in sys.argv: project_name_idx = sys.argv.index('--project_name') project_name = sys.argv[project_name_idx + 1] sys.argv.remove('--project_name') sys.argv.pop(project_name_idx) setuptools.setup( name=project_name, version=_VERSION.replace('-', ''), description=DOCLINES[0], long_description='\n'.join(DOCLINES[2:]), url='https://www.tensorflow.org/', download_url='https://github.com/tensorflow/estimator/tags', author='Google Inc.', packages=setuptools.find_packages(), install_requires=REQUIRED_PACKAGES, # PyPI package information. # Supported Python versions python_requires='>=3.7', classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Intended Audience :: Education', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Scientific/Engineering :: Artificial Intelligence', 'Topic :: Software Development', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules', ], license='Apache 2.0', keywords='tensorflow estimator tensor machine learning', ) ================================================ FILE: third_party/py/BUILD ================================================ ================================================ FILE: third_party/py/BUILD.tpl ================================================ licenses(["restricted"]) package(default_visibility = ["//visibility:public"]) # Point both runtimes to the same python binary to ensure we always # use the python binary specified by ./configure.py script. load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair") py_runtime( name = "py2_runtime", interpreter_path = "%{PYTHON_BIN_PATH}", python_version = "PY2", ) py_runtime( name = "py3_runtime", interpreter_path = "%{PYTHON_BIN_PATH}", python_version = "PY3", ) py_runtime_pair( name = "py_runtime_pair", py2_runtime = ":py2_runtime", py3_runtime = ":py3_runtime", ) toolchain( name = "py_toolchain", toolchain = ":py_runtime_pair", toolchain_type = "@bazel_tools//tools/python:toolchain_type", ) ================================================ FILE: third_party/py/python_configure.bzl ================================================ """Repository rule for Python autoconfiguration. `python_configure` depends on the following environment variables: * `PYTHON_BIN_PATH`: location of python binary. """ _PYTHON_BIN_PATH = "PYTHON_BIN_PATH" def _tpl(repository_ctx, tpl, substitutions = {}, out = None): if not out: out = tpl repository_ctx.template( out, Label("//third_party/py:%s.tpl" % tpl), substitutions, ) def _fail(msg): """Output failure message when auto configuration fails.""" red = "\033[0;31m" no_color = "\033[0m" fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg)) def _get_python_bin(repository_ctx): """Gets the python bin path.""" python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) if python_bin != None: return python_bin python_bin_path = repository_ctx.which("python") if python_bin_path != None: return str(python_bin_path) _fail("Cannot find python in PATH, please make sure " + "python is installed and add its directory in PATH, or --define " + "%s='/something/else'.\nPATH=%s" % ( _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", ""), )) def _create_local_python_repository(repository_ctx): """Creates the repository containing files set up to build with Python.""" python_bin = _get_python_bin(repository_ctx) _tpl(repository_ctx, "BUILD", { "%{PYTHON_BIN_PATH}": python_bin, }) def _python_autoconf_impl(repository_ctx): """Implementation of the python_autoconf repository rule.""" _create_local_python_repository(repository_ctx) python_configure = repository_rule( implementation = _python_autoconf_impl, environ = [ _PYTHON_BIN_PATH, ], ) """Detects and configures the local Python toolchain. Add the following to your WORKSPACE FILE: ```python load("//third_party/py:python_configure.bzl", "python_configure") python_configure(name = "local_config_py_toolchain") register_toolchains("@local_config_py_toolchain//:py_toolchain") ``` Args: name: A unique name for this workspace rule. """