Repository: PAIR-code/facets Branch: master Commit: 44d9b60437bf Files: 192 Total size: 8.8 MB Directory structure: gitextract_q04hf33v/ ├── .gitignore ├── AUTHORS ├── CONTRIBUTING.md ├── CONTRIBUTORS ├── LICENSE ├── README.md ├── WORKSPACE ├── colab_facets.ipynb ├── facets/ │ ├── BUILD │ ├── colab.html │ └── visualizations.html ├── facets-dist/ │ └── facets-jupyter.html ├── facets_atlasmaker/ │ ├── .gitignore │ ├── BUILD │ ├── README.md │ ├── atlasmaker.py │ ├── atlasmaker_io.py │ ├── atlasmaker_io_test.py │ ├── convert.py │ ├── convert_test.py │ ├── montage.py │ ├── montage_test.py │ ├── parallelize.py │ ├── parallelize_test.py │ ├── pylintrc │ ├── requirements.txt │ ├── testdata/ │ │ ├── attributions.txt │ │ ├── testfiles_smalllist.csv │ │ ├── testfiles_smalllist_with_dups.csv │ │ └── wikipedia_images_16.csv │ └── utils/ │ ├── BUILD │ ├── README.md │ ├── wikipedia_sourcelist_generator.py │ └── wikipedia_sourcelist_generator_test.py ├── facets_dive/ │ ├── Dive_demo.ipynb │ ├── README.md │ ├── colab_dive_atlas_demo.ipynb │ ├── components/ │ │ ├── facets_dive/ │ │ │ ├── BUILD │ │ │ ├── facets-dive.html │ │ │ ├── facets-dive.ts │ │ │ ├── test.html │ │ │ └── test.ts │ │ ├── facets_dive_controls/ │ │ │ ├── BUILD │ │ │ ├── facets-dive-controls.html │ │ │ ├── facets-dive-controls.ts │ │ │ ├── test.html │ │ │ └── test.ts │ │ ├── facets_dive_info_card/ │ │ │ ├── BUILD │ │ │ ├── facets-dive-info-card.html │ │ │ ├── facets-dive-info-card.ts │ │ │ ├── test.html │ │ │ └── test.ts │ │ ├── facets_dive_legend/ │ │ │ ├── BUILD │ │ │ ├── facets-dive-legend.html │ │ │ ├── facets-dive-legend.ts │ │ │ ├── test.html │ │ │ └── test.ts │ │ └── facets_dive_vis/ │ │ ├── BUILD │ │ ├── facets-dive-vis.html │ │ ├── facets-dive-vis.ts │ │ ├── test.html │ │ ├── test.ts │ │ └── typings.d.ts │ ├── demo/ │ │ ├── BUILD │ │ ├── quickdraw.html │ │ └── quickdraw.ts │ └── lib/ │ ├── BUILD │ ├── axis.html │ ├── axis.ts │ ├── bounded-object.html │ ├── bounded-object.ts │ ├── data-example.html │ ├── data-example.ts │ ├── grid.html │ ├── grid.ts │ ├── info-renderers.html │ ├── info-renderers.ts │ ├── label.html │ ├── label.ts │ ├── layout.html │ ├── layout.ts │ ├── sorting.html │ ├── sorting.ts │ ├── sprite-atlas.html │ ├── sprite-atlas.ts │ ├── sprite-material.html │ ├── sprite-material.ts │ ├── sprite-mesh.html │ ├── sprite-mesh.ts │ ├── stats.html │ ├── stats.ts │ ├── string-format.html │ ├── string-format.ts │ ├── test/ │ │ ├── BUILD │ │ ├── axis_test.ts │ │ ├── bounded-object_test.ts │ │ ├── externs.js │ │ ├── grid_test.ts │ │ ├── layout_test.ts │ │ ├── sorting_test.ts │ │ ├── sprite-atlas_test.ts │ │ ├── sprite-material_test.ts │ │ ├── sprite-mesh_test.ts │ │ ├── stats_test.ts │ │ ├── string-format_test.ts │ │ ├── test.html │ │ ├── text_test.ts │ │ └── wordtree_test.ts │ ├── text.html │ ├── text.ts │ ├── wordtree.html │ └── wordtree.ts └── facets_overview/ ├── Overview_demo.ipynb ├── README.md ├── common/ │ ├── BUILD │ ├── common_bundle.html │ ├── feature_statistics_generator.ts │ ├── overview_data_model.ts │ ├── plottable_helpers.d.ts │ ├── plottable_helpers.js │ ├── test/ │ │ ├── BUILD │ │ ├── externs.js │ │ ├── feature_statistics_generator_test.ts │ │ ├── overview_data_model_test.ts │ │ ├── test.html │ │ ├── test_bundle.html │ │ └── utils_test.ts │ └── utils.ts ├── components/ │ ├── facets_overview/ │ │ ├── BUILD │ │ ├── facets-overview-filter-validator.html │ │ ├── facets-overview.html │ │ └── facets-overview.ts │ ├── facets_overview_chart/ │ │ ├── BUILD │ │ ├── externs.js │ │ ├── facets-overview-chart.html │ │ └── facets-overview-chart.ts │ ├── facets_overview_row_legend/ │ │ ├── BUILD │ │ ├── facets-overview-row-legend.html │ │ └── facets-overview-row-legend.ts │ ├── facets_overview_row_stats/ │ │ ├── BUILD │ │ ├── facets-overview-row-stats.html │ │ └── facets-overview-row-stats.ts │ └── facets_overview_table/ │ ├── BUILD │ ├── facets-overview-table.html │ └── facets-overview-table.ts ├── facets_overview/ │ ├── __init__.py │ ├── base_feature_statistics_generator.py │ ├── base_generic_feature_statistics_generator.py │ ├── feature_statistics_generator.py │ ├── feature_statistics_generator_test.py │ ├── feature_statistics_pb2.py │ ├── generic_feature_statistics_generator.py │ └── generic_feature_statistics_generator_test.py ├── functional_tests/ │ ├── many/ │ │ ├── BUILD │ │ ├── index.html │ │ ├── many-test.html │ │ └── many_test.ts │ ├── simple/ │ │ ├── BUILD │ │ ├── index.html │ │ ├── simple-test.html │ │ └── simple_test.ts │ ├── single/ │ │ ├── BUILD │ │ ├── index.html │ │ ├── single-test.html │ │ └── single_test.ts │ ├── single_feature/ │ │ ├── BUILD │ │ ├── index.html │ │ ├── single-feature-test.html │ │ └── single_feature_test.ts │ ├── stress/ │ │ ├── BUILD │ │ ├── index.html │ │ ├── stress-test.html │ │ └── stress_test.ts │ ├── test_helpers/ │ │ ├── BUILD │ │ ├── externs.js │ │ ├── test_helpers.ts │ │ └── test_helpers_bundle.html │ └── weighted/ │ ├── BUILD │ ├── index.html │ ├── weighted-test.html │ └── weighted_test.ts ├── proto/ │ ├── BUILD │ └── feature_statistics.proto ├── python/ │ ├── __init__.py │ ├── base_feature_statistics_generator.py │ ├── base_generic_feature_statistics_generator.py │ ├── feature_statistics_generator.py │ ├── feature_statistics_generator_test.py │ ├── feature_statistics_pb2.py │ ├── generic_feature_statistics_generator.py │ └── generic_feature_statistics_generator_test.py └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *~ /bazel-* **/__pycache__ **/.ipynb_checkpoints/ **/*.swp **/dist/ **/build/ **/facets_overview.egg-info/ ================================================ FILE: AUTHORS ================================================ # This the official list of Bazel rules_closure authors for copyright purposes. # This file is distinct from the CONTRIBUTORS files. # See the latter for an explanation. # Names should be added to this file as: # Name or Organization # The email address is not required for organizations. Google Inc. ================================================ FILE: CONTRIBUTING.md ================================================ Want to contribute? Great! First, read this page (including the small print at the end). ### Before you contribute Before we can use your code, you must sign the [Google Individual Contributor License Agreement] (https://cla.developers.google.com/about/google-individual) (CLA), which you can do online. The CLA is necessary mainly because you own the copyright to your changes, even after your contribution becomes part of our codebase, so we need your permission to use and distribute your code. We also need to be sure of various other things—for instance that you'll tell us if you know that your code infringes on other people's patents. You don't have to sign the CLA until after you've submitted your code for review and a member has approved it, but you must do it before we can put your code into our codebase. Before you start working on a larger contribution, you should get in touch with us first through the issue tracker with your idea so that we can help out and possibly guide you. Coordinating up front makes it much easier to avoid frustration later on. ### Code reviews All submissions, including submissions by project members, require review. We use Github pull requests for this purpose. ### The small print Contributions made by corporations are covered by a different agreement than the one above, the [Software Grant and Corporate Contributor License Agreement] (https://cla.developers.google.com/about/google-corporate). ================================================ FILE: CONTRIBUTORS ================================================ # People who have agreed to one of the CLAs and can contribute patches. # The AUTHORS file lists the copyright holders; this file # lists people. For example, Google employees are listed here # but not in AUTHORS, because Google holds the copyright. # # https://developers.google.com/open-source/cla/individual # https://developers.google.com/open-source/cla/corporate # # Names should be added to this file as: # Name James Wexler Jimbo Wilson Justine Tunney ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Introduction The facets project contains two visualizations for understanding and analyzing machine learning datasets: Facets Overview and Facets Dive. The visualizations are implemented as [Polymer](https://www.polymer-project.org) web components, backed by [Typescript](https://www.typescriptlang.org) code and can be easily embedded into Jupyter notebooks or webpages. Live demos of the visualizations can be found on the [Facets project description page](https://pair-code.github.io/facets/). ## Facets Overview ![Overview visualization of UCI census data](/img/overview-census.png "Overview visualization of UCI census data - Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml/datasets/Census+Income]. Irvine, CA: University of California, School of Information and Computer Science") Overview gives a high-level view of one or more data sets. It produces a visual feature-by-feature statistical analysis, and can also be used to compare statistics across two or more data sets. The tool can process both numeric and string features, including multiple instances of a number or string per feature. Overview can help uncover issues with datasets, including the following: * Unexpected feature values * Missing feature values for a large number of examples * Training/serving skew * Training/test/validation set skew Key aspects of the visualization are outlier detection and distribution comparison across multiple datasets. Interesting values (such as a high proportion of missing data, or very different distributions of a feature across multiple datasets) are highlighted in red. Features can be sorted by values of interest such as the number of missing values or the skew between the different datasets. The python code to generate the statistics for visualization can be installed through `pip install facets-overview`. As of version 1.1.0, the `facets-overview` package requires a version of `protobuf` at version 3.20.0 or later. Details about Overview usage can be found in its [README](./facets_overview/README.md). ## Facets Dive ![Dive visualization of UCI census data](/img/dive-census.png "Dive visualization of UCI census data - Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml/datasets/Census+Income]. Irvine, CA: University of California, School of Information and Computer Science") Dive is a tool for interactively exploring up to tens of thousands of multidimensional data points, allowing users to seamlessly switch between a high-level overview and low-level details. Each example is a represented as single item in the visualization and the points can be positioned by faceting/bucketing in multiple dimensions by their feature values. Combining smooth animation and zooming with faceting and filtering, Dive makes it easy to spot patterns and outliers in complex data sets. Details about Dive usage can be found in its [README](./facets_dive/README.md). # Setup ## Usage in Google Colabratory/Jupyter Notebooks Using Facets in [Google Colabratory](https://colab.research.google.com) and [Jupyter](http://jupyter.org) notebooks can be seen [in this notebook](https://colab.research.google.com/github/PAIR-code/facets/blob/master/colab_facets.ipynb). These notebooks work without the need to first download/install this repository. Both Facets visualizations make use of HTML imports. So in order to use them, you must first load the appropriate polyfill, through ``, as shown in the demo notebooks in this repo. Note that for using Facets Overview in a Jupyter notebook, there are two considerations: 1. In the notebook, you will need to change the path that the Facets Overview python code is loaded from to the correct path given where your notebook kernel is run from. 2. You must also have the Protocol Buffers python runtime library installed: https://github.com/google/protobuf/tree/master/python. If you used pip or anaconda to install Jupyter, you can use the same tool to install the runtime library. When visualizing a large amount of data in Dive in a Juypter notebook, as is done in the [Dive demo Jupyter notebook](./facets_dive/Dive_demo.ipynb), you will need to start the notebook server with an increased IOPub data rate. This can be done with the command ```jupyter notebook --NotebookApp.iopub_data_rate_limit=10000000```. ## Code Installation ``` git clone https://github.com/PAIR-code/facets cd facets ``` ## Building the Visualizations If you make code changes to the visualization and would like to rebuild them, follow these directions: 1. Install bazel: https://bazel.build/ 2. Build the visualizations: ```bazel build facets:facets_jupyter``` (run from the facets top-level directory) ## Using the rebuilt Visualizations in a Jupyter notebook If you want to use the visualizations you built locally in a Jupyter notebook, follow these directions: 1. Move the resulting vulcanized html file from the build step into the facets-dist directory: ```cp -f bazel-bin/facets/facets-jupyter.html facets-dist/``` 2. Install the visualizations into Jupyter as an nbextension. * If jupyter was installed with pip, you can use ```jupyter nbextension install facets-dist/ ``` if jupyter was installed system-wide or ```jupyter nbextension install facets-dist/ --user``` if installed per-user (run from the facets top-level directory). You do not need to run any follow-up ```jupyter nbextension enable``` command for this extension. * Alternatively, you can manually install the nbextension by finding your jupyter installation's ```share/jupyter/nbextensions``` folder and copying the facets-dist directory into it. 3. In the notebook cell's HTML link tag that loads the built facets html, load from ```/nbextensions/facets-dist/facets-jupyter.html```, which is the locally installed facets distribution. from the previous step. ## Known Issues * The Facets visualizations currently work only in Chrome - [Issue 9](../../issues/9). **Disclaimer: This is not an official Google product** ================================================ FILE: WORKSPACE ================================================ workspace(name = "ai_google_pair_facets") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") # Needed as a transitive dependency of rules_webtesting below. http_archive( name = "bazel_skylib", sha256 = "2b9af2de004d67725c9985540811835389b229c27874f2e15f5e319622a53a3b", strip_prefix = "bazel-skylib-e9fc4750d427196754bebb0e2e1e38d68893490a", urls = [ "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/archive/e9fc4750d427196754bebb0e2e1e38d68893490a.tar.gz", "https://github.com/bazelbuild/bazel-skylib/archive/e9fc4750d427196754bebb0e2e1e38d68893490a.tar.gz", ], ) load("@bazel_skylib//lib:versions.bzl", "versions") versions.check(minimum_bazel_version = "0.22.0") http_archive( name = "io_bazel_rules_closure", sha256 = "b6936ecc0b5a1ef616b9d7e76694d414aa5605265c11322257a610fb256b1bf7", # The changes that we need for Bazel 0.26 compatibility are not in # any release, so we pin to HEAD as of 2019-06-04. strip_prefix = "rules_closure-7434c41542ca9e1b05166d897b90073d1b8b2cf8", urls = [ "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/7434c41542ca9e1b05166d897b90073d1b8b2cf8.tar.gz", "https://github.com/bazelbuild/rules_closure/archive/7434c41542ca9e1b05166d897b90073d1b8b2cf8.tar.gz", # 2019-06-04 ], ) http_archive( name = "io_bazel_rules_webtesting", sha256 = "89f041028627d801ba3b4ea1ef2211994392d46e25c1fc3501b95d51698e4a1e", strip_prefix = "rules_webtesting-0.2.2", urls = [ "https://mirror.bazel.build/github.com/bazelbuild/rules_webtesting/archive/0.2.2.tar.gz", "https://github.com/bazelbuild/rules_webtesting/archive/0.2.2.tar.gz", ], ) load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories") load("@io_bazel_rules_closure//closure:defs.bzl", "web_library_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") closure_repositories( # omit_com_google_protobuf = True, omit_com_google_protobuf_js = True, ) http_archive( name = "org_tensorflow_tensorboard", sha256 = "1534836f297ae70d089379bc8f91e8817bd7911066bd8a93088f095bd05797e5", strip_prefix = "tensorboard-dcf27c0496348349f9d905d09ed80717f421cdfe", urls = [ "https://github.com/stephanwlee/tensorboard/archive/dcf27c0496348349f9d905d09ed80717f421cdfe.tar.gz", # 2019-07-12 ], ) load("@org_tensorflow_tensorboard//third_party:workspace.bzl", "tensorboard_workspace") tensorboard_workspace() web_library_external( name = "org_polymer_paper_card", srcs = ["paper-card.html"], licenses = ["notice"], # BSD-3-Clause path = "/paper-card", sha256 = "daf6f5326501f74811c2e10ca4ca8d2a42613e88f3ac64e218e6a3cf4cc1dac2", strip_prefix = "paper-card-2.0.0", urls = [ "https://mirror.bazel.build/github.com/PolymerElements/paper-card/archive/v2.0.0.tar.gz", "https://github.com/PolymerElements/paper-card/archive/v2.0.0.tar.gz", ], deps = [ "@org_polymer", "@org_polymer_iron_flex_layout", "@org_polymer_iron_image", "@org_polymer_paper_styles", ], ) web_library_external( name = "org_polymer_iron_image", srcs = ["iron-image.html"], licenses = ["notice"], # BSD-3-Clause path = "/iron-image", sha256 = "40c7b2ec941e29a1721c6fb19d6de69308c50a960a3c3319faf2447eed0d4d88", strip_prefix = "iron-image-2.0.0", urls = [ "https://mirror.bazel.build/github.com/PolymerElements/iron-image/archive/v2.0.0.tar.gz", "https://github.com/PolymerElements/iron-image/archive/v2.0.0.tar.gz", ], deps = [ "@org_polymer", ], ) web_library_external( name = "org_polymer_iron_validator_behavior", srcs = ["iron-validator-behavior.html"], licenses = ["notice"], # BSD-3-Clause path = "/iron-validator-behavior", sha256 = "0956488f849c0528d66d5ce28bbfb66e163a7990df2cc5f157a5bf34dcb7dfd2", strip_prefix = "iron-validator-behavior-1.0.2", urls = [ "https://mirror.bazel.build/github.com/PolymerElements/iron-validator-behavior/archive/v1.0.2.tar.gz", "https://github.com/PolymerElements/iron-validator-behavior/archive/v1.0.2.tar.gz", ], deps = [ "@org_polymer", "@org_polymer_iron_meta", ], ) ================================================ FILE: colab_facets.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Facets Dive and Overview Colab Example", "version": "0.3.2", "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "M7JcESAhpKG-", "colab_type": "code", "colab": {} }, "source": [ "#@title Install the facets_overview pip package.\n", "!pip install facets-overview" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "colab_type": "code", "id": "blPpZw5R3Bb4", "colab": {} }, "source": [ "# Load UCI census train and test data into dataframes.\n", "import pandas as pd\n", "features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n", " \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n", " \"Hours per week\", \"Country\", \"Target\"]\n", "train_data = pd.read_csv(\n", " \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n", " names=features,\n", " sep=r'\\s*,\\s*',\n", " engine='python',\n", " na_values=\"?\")\n", "test_data = pd.read_csv(\n", " \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", " names=features,\n", " sep=r'\\s*,\\s*',\n", " skiprows=[0],\n", " engine='python',\n", " na_values=\"?\")" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "colab_type": "code", "id": "XtOzRy8Z3M36", "colab": {} }, "source": [ "\n", "# Display the Dive visualization for the training data.\n", "from IPython.core.display import display, HTML\n", "\n", "jsonstr = train_data.to_json(orient='records')\n", "HTML_TEMPLATE = \"\"\"\n", " \n", " \n", " \n", " \"\"\"\n", "html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", "display(HTML(html))" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "colab_type": "code", "id": "mjv5Kr1Mflq7", "colab": {} }, "source": [ "# Create the feature stats for the datasets and stringify it.\n", "import base64\n", "from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n", "\n", "gfsg = GenericFeatureStatisticsGenerator()\n", "proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},\n", " {'name': 'test', 'table': test_data}])\n", "protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "colab_type": "code", "id": "b7zs2p2_goJa", "colab": {} }, "source": [ "# Display the facets overview visualization for this data\n", "from IPython.core.display import display, HTML\n", "\n", "HTML_TEMPLATE = \"\"\"\n", " \n", " \n", " \n", " \"\"\"\n", "html = HTML_TEMPLATE.format(protostr=protostr)\n", "display(HTML(html))" ], "execution_count": 0, "outputs": [] } ] } ================================================ FILE: facets/BUILD ================================================ package(default_visibility = ["//visibility:public"]) load("@org_tensorflow_tensorboard//tensorboard/defs:web.bzl", "tf_web_library") load("@org_tensorflow_tensorboard//tensorboard/defs:vulcanize.bzl", "tensorboard_html_binary") licenses(["notice"]) # Apache 2.0 tf_web_library( name = "visualizations", srcs = [ "visualizations.html", ], path = "/facets", deps = [ "//facets_dive/components/facets_dive", "//facets_overview/components/facets_overview", "@org_tensorflow_tensorboard//tensorboard/components/tf_imports:polymer", ], ) # Compiles standalone HTML for Facets Dive demo, used by ":facets_jupyter" # # NOTE: This runs TensorBoard Vulcanize.java to inline HTML imports and # runs the Closure Compiler on the JavaScript outputted by the # TypeScript Compiler, in order to remove ES6 imports, which don't # work in web browsers. Otherwise we'd `bazel run` tf_web_library. tensorboard_html_binary( name = "facets", compile = True, input_path = "/facets/visualizations.html", output_path = "/all/visualizations.html", deps = [":visualizations"], ) # Add javascript to undefine the define function when building the vulcanized # visualizations. This is to avoid issues with require.js dependency loading # when using the visualizations inside of a Jupyter notebook. # TODO(jwexler): Figure out a cleaner way to get vulcanized visualizations that # work in Jupyter notebooks. genrule( name = "facets_jupyter", srcs = [":facets"], outs = ["facets-jupyter.html"], cmd = "sed 's|||' $(location :facets) > $@", ) tf_web_library( name = "colab", srcs = [ "colab.html", ], path = "/facets", deps = [ "//facets_dive/components/facets_dive", "//facets_overview/components/facets_overview", "@org_tensorflow_tensorboard//tensorboard/components/tf_imports:polymer", ], ) # Compiles standalone HTML for Facets in colab. # # NOTE: This runs TensorBoard Vulcanize.java to inline HTML imports and # runs the Closure Compiler on the JavaScript outputted by the # TypeScript Compiler, in order to remove ES6 imports, which don't # work in web browsers. Otherwise we'd `bazel run` tf_web_library. tensorboard_html_binary( name = "facets_colab", compile = True, input_path = "/facets/colab.html", output_path = "/all/colab.html", deps = [":colab"], ) ================================================ FILE: facets/colab.html ================================================ ================================================ FILE: facets/visualizations.html ================================================ ================================================ FILE: facets-dist/facets-jupyter.html ================================================ ================================================ FILE: facets_atlasmaker/.gitignore ================================================ /bazel-* **/*.pyc # virtualenv ENV* # test output dir testoutput/* outputs/* # Utilities utils/*.csv ================================================ FILE: facets_atlasmaker/BUILD ================================================ licenses(["notice"]) # Apache 2.0 py_binary( name = "atlasmaker", srcs = ["atlasmaker.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ ":atlasmaker_io", ":convert", ], ) py_library( name = "convert", srcs = ["convert.py"], srcs_version = "PY2AND3", deps = [ ], ) py_library( name = "atlasmaker_io", srcs = ["atlasmaker_io.py"], srcs_version = "PY2AND3", ) ================================================ FILE: facets_atlasmaker/README.md ================================================ # Facets Atlasmaker Atlasmaker is a command line utility and library for creating sprite atlases. These atlases can be used in Facets Dive and other applications. ## Setup and Dependencies Although optional, we recommend that you install packages within a [virtualenv](https://virtualenv.pypa.io/en/stable/) for dependency isolation. The easiest way to get the required and optional dependencies is to install them from the requirements.txt file via pip: ```sh pip install -r requirements.txt ``` However, you can also choose to install requirements manually. If so, you'll need these general python packages: ``` pip install absl-py pip install mock ``` As well as these specific packages for Atlasmaker: ``` pip install pillow pip install joblib pip install requests ``` Additionally, you can install these optional dependencies: * Tensorflow: [install Tensorflow and its dependencies](https://www.tensorflow.org/install/install_sources#install_tensorflow_python_dependencies) (if you haven't done so already from previous Facets setup steps). This provides an interface (gfile) for interfacing with other google file sources. * [Nose](http://nose.readthedocs.io/en/latest/): for running unit tests. * pylint: For linting code against the provided Google style pylintrc config. ## Functionality Atlasmaker currently supports the following functionality: * Reading input images from localfile and URLs (http/https). * Using a default image in the atlas if an image was not successfully retrieved or converted. * Various image resizing and conversion options. * Parallelization of image fetch/conversion. Future features (in rough order of priority) that it may support include: * Conversion of a single image. * Caching images onto disk instead of doing everything in memory (useful when converting many large image files). * Reading from other storage types, such as Google Cloud Storage. * Reading the source images list from a [protocol buffer](https://developers.google.com/protocol-buffers/docs/overview) text format file to allow different conversion settings by image if desired. * Creating sprite atlases comprised of different sized images. * Creating multiple sprite atlases based on desired atlas sizes. ## Getting Started: Generate Your First Sprite Atlas To build and then execute the binary, first run bazel build from this directory: ```sh bazel build :atlasmaker # The binary will be within the bazel-bin directory in the root Facets directory, i.e.: ../bazel-bin/facets_atlasmaker/atlasmaker ``` To run Atlasmaker, you point it to a file listing desired source images as well as conversion options via command-line flags. To list the full set of flags, call the ```--help``` flag. E.g., ```sh ../bazel-bin/facets_atlasmaker/atlasmaker --help ``` Here's an example command for combining a set of images from wikipedia into an atlas and manifest located in an `outputs` subdirectory: ```sh # Create temp dir to hold outputs if it doesn't exist (optional) mkdir $PWD/outputs/ # Now create your sprite atlas ../bazel-bin/facets_atlasmaker/atlasmaker --sourcelist=$PWD/testdata/wikipedia_images_16.csv \ --output_dir=$PWD/outputs/ --image_width=50 --image_height=50 \ --default_image_path=https://upload.wikimedia.org/wikipedia/en/d/d1/Image_not_available.png ``` Alternatively, you can also run Atlasmaker with a generated list of images from wikipedia, using the utility in the `utils/` directory to create this input list. Some of the most useful image settings flags you can set include: * ```--image_width``` (required): width of each final desired output image in pixels. * ```--image_height``` (required): height of each final desired output image in pixels. * ```--image_format```: Output image format, such as png, jpeg, etc. By default is png. * ```--keep_aspect_ratio```: Whether to retain the image aspect ratio, or make a best effort to fit resize original image, cropping as necessary. * ```---bg_color_name or --bg_color_rgb```: You can specify the background used (e.g., for images that were resized and now don't fill the entire output sprite as well as for locations on the atlas with no images) via a color name or RGB value, such as 'green' or '255,255,0'. ## Reading the Manifest The manifest provides you a comprehensive list of all the images contained in the Atlas, their positions in the Atlas (as the location of each image's left corner in relation to (x, y) pixel offsets from the top left Atlas corner), and any failure statuses. Some things to know: * Each line is a JSON object representing metadata about a single image. * Any images that failed retrieval/conversion and were replaced by a default image will have an 'error' key with its associated failure message as value. ## For Developers ### pylint This code should follow the [Google Python Style Guide](https://github.com/google/styleguide/blob/gh-pages/pyguide.md). Running pylint using the provided pylintrc file allow you to lint according to Google style. However, note that we deviate from the style guide (and PEP8) for indents by using 2 spaces, not 4 (to match the internal Google Python Style guide). Sorry! ### Unit Tests Note that for Google compatibility we use [abseil](https://abseil.io/)'s unit testing framework instead of unittest/pyunit, but using this framework should feel almost identical to unittest. We recommend you run all the unit tests with ```nose``` (i.e., as standard unittests, not Bazel tests). There are no Bazel test rules for running the tests because Bazel doesn't yet fully support testing using dependencies from a virtualenv. Once you have nose installed, you can simply run all tests within the facets atlasmaker directory with command: `nosetests`. ================================================ FILE: facets_atlasmaker/atlasmaker.py ================================================ """Main function for sprite atlas maker command line utility.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from absl import app from absl import flags from absl import logging from PIL import ImageColor import atlasmaker_io import convert import montage import parallelize # DEFAULT settings _DEFAULT_COLOR_RGB = [255, 255, 255] FLAGS = flags.FLAGS flags.DEFINE_integer('max_failures', None, 'Max number of images that can fail retrieval and ' 'conversion before atlasmaker aborts.') flags.DEFINE_string('sourcelist', None, 'Full path to csv file that lists source images. Each line ' 'should contain the path to an image.') flags.DEFINE_enum('sourcelist_dups_handling', 'ignore', ['ignore', 'fail', 'unique'], 'What action to take if duplicate image locations are found ' 'in the source list.') flags.DEFINE_string('default_error_image', None, 'Path to image that should be used if desired image fails ' 'retrieval or conversion. If specified, we require ' 'successfully processing of this default image before ' 'any other operations can proceed.') flags.DEFINE_string('output_dir', None, 'Output location where final sprite atlas and manifest ' 'will be written. If not specified, it will write to the ' 'existing directory. If directory doesn\'t exist, will ' 'attempt to create it.') flags.DEFINE_integer('http_request_timeout', 60, 'Timeout in seconds that we will wait to fetch http ' 'image requests. See the requests library documentation ' 'for more details.') flags.DEFINE_integer('http_max_retries', 2, 'Max number of times we will retry to fetch an image for ' 'timeout errors, and only timeout errors.') # Atlas settings flags.DEFINE_integer('atlas_width', None, 'Desired width for each atlas (number of images).') flags.DEFINE_integer('atlas_height', None, 'Desired height for each atlas (number of images).') # Default image flags.DEFINE_string('default_image_path', None, 'Default image to use if unable to retrieve image. ' 'If not specified, we\'ll use the specified background ' 'color and opacity') # Image settings flags.DEFINE_string('image_format', 'png', 'Desired image output format. For a list of fully ' 'supported formats, see ' 'http://pillow.readthedocs.io/en/latest/handbook/' 'image-file-formats.html#fully-supported-formats') flags.DEFINE_bool('keep_aspect_ratio', True, 'Whether to retain aspect ratio ' 'of original image') flags.DEFINE_integer('image_width', None, 'Desired output width for each image (in pixels).') flags.DEFINE_integer('image_height', None, 'Desired output height for each image (in pixels).') flags.DEFINE_integer('image_opacity', 0, 'Desired opacity to use for background (0 to 255).') flags.DEFINE_bool('resize_if_larger', False, 'Resize image larger if desired size is larger than source ' 'image') flags.DEFINE_bool('use_truncated_images', False, 'If true, PIL will attempt to load and process truncated ' 'images') flags.mark_flag_as_required('output_dir') flags.mark_flag_as_required('image_width') flags.mark_flag_as_required('image_height') # Background and default image color settings flags.DEFINE_list('bg_color_rgb', _DEFAULT_COLOR_RGB, 'This is one of two ways to specify the desired background ' 'color that will be used to fill in empty space after image ' 'resizing (as well as the default image upon failure to get ' 'or convert an image if no an default image is explicitly ' 'specified. Default color is transparent (RGB (0,0,0).') flags.DEFINE_string('bg_color_name', None, 'If specified, will attempt to set the color RGB values ' 'based on this name instead of using RGB values. See the ' 'PIL ColorModule documentation for more details. ' 'This is one of two ways to specify the desired background ' 'color that will be used to fill in empty space after image ' 'resizing (as well as the default image upon failure to get ' 'or convert an image if no an default image is explicitly ' 'specified. Default color is transparent (RGB (0,0,0).') # Parallelization settings flags.DEFINE_integer('num_parallel_jobs', -1, 'Number of threads to parallelize with. If -1, will ' 'autoset to number of CPUs.') flags.DEFINE_integer('parallelization_verbosity', 10, 'Verbosity level for parallel. See joblib.Parallel ' 'documentation.') def _determine_bg_rgb(): """Helper method that returns background color RGB.""" if FLAGS.bg_color_name is not None: return ImageColor.getrgb(FLAGS.bg_color_name) return tuple(FLAGS.bg_color_rgb) def main(argv): del argv # Unused. # TODO: Add more flag validations. if FLAGS.max_failures is not None and FLAGS.max_failures > 0: raise NotImplementedError( 'Does not yet handle image retrieval/conversion ' 'failures') if FLAGS.atlas_width is not None or FLAGS.atlas_height is not None: raise NotImplementedError( 'Does not yet support specifying an atlas size.') if FLAGS.sourcelist is None: raise flags.ValidationError('You must specify a list of image sources.') bg_color_rgb = _determine_bg_rgb() outputdir = FLAGS.output_dir if outputdir is None: outputdir = os.path.join(os.getcwd()) image_source_list = atlasmaker_io.read_src_list_csvfile( FLAGS.sourcelist, FLAGS.sourcelist_dups_handling) # Provide some useful confirmation info about settings to user. logging.info('Desired output size in pixels width, height for each image is: ' '(%d, %d)' % (FLAGS.image_width, FLAGS.image_height)) logging.info('Image format for Atlas is: %s' % FLAGS.image_format) logging.info('Background RGB is set to %s' % str(bg_color_rgb)) logging.info('Background opacity is set to %d' % FLAGS.image_opacity) logging.info('Should we preserve image aspect ratio during conversion? %s' % FLAGS.keep_aspect_ratio) image_convert_settings = convert.ImageConvertSettings( img_format=FLAGS.image_format, width=FLAGS.image_width, height=FLAGS.image_height, bg_color_rgb=bg_color_rgb, opacity=FLAGS.image_opacity, preserve_aspect_ratio=FLAGS.keep_aspect_ratio, resize_if_larger=FLAGS.resize_if_larger) # Ensure we can write to the output dir or fail fast. atlasmaker_io.create_output_dir_if_not_exist(FLAGS.output_dir) # Create default image to be used for images that we can't get or convert. if FLAGS.default_image_path is not None: logging.info('Using image %s as default image when a specified image ' 'can\'t be fetched or converted' % FLAGS.default_image_path) default_img = parallelize.convert_default_image( FLAGS.default_image_path, image_convert_settings) else: logging.info('No default image for failures specified by user, so just ' 'using the background as the default image.') default_img = convert.create_default_image(image_convert_settings) # Verify we can write the specified output format, or fail fast. try: testimage_file_name = '{}.{}'.format('testimage', str(FLAGS.image_format).lower()) atlasmaker_io.save_image( default_img, os.path.join(FLAGS.output_dir, testimage_file_name), delete_after_write=True) logging.info('Confirmed we can output images in %s format' % FLAGS.image_format) except: logging.error('Unable to write test image in desired output format. ' 'Please confirm that \'%s\' is a supported PIL output ' 'format.' % FLAGS.image_format) raise # Convert images in parallel. logging.info('Scheduling %d tasks.' % len(image_source_list)) converted_images_with_statuses = parallelize.get_and_convert_images_parallel( image_source_list, image_convert_settings, n_jobs=FLAGS.num_parallel_jobs, verbose=FLAGS.parallelization_verbosity, allow_truncated_images=FLAGS.use_truncated_images, request_timeout=FLAGS.http_request_timeout, http_max_retries=FLAGS.http_max_retries) sprite_atlas_settings = montage.SpriteAtlasSettings( img_format=FLAGS.image_format, height=FLAGS.atlas_height, width=FLAGS.atlas_width) # Generate the atlas from converted images. sprite_atlas_generator = montage.SpriteAtlasGenerator( images_with_statuses=converted_images_with_statuses, img_src_paths=image_source_list, atlas_settings=sprite_atlas_settings, default_img=default_img) atlases, manifests = sprite_atlas_generator.create_atlas() atlasmaker_io.save_atlas_and_manifests( outdir=outputdir, atlases=atlases, manifests=manifests, sprite_atlas_settings=sprite_atlas_settings) if __name__ == '__main__': app.run(main) ================================================ FILE: facets_atlasmaker/atlasmaker_io.py ================================================ """Utilities and wrappers for file and source catalog I/O.""" import io import json import os import time import sys if sys.version_info[0] == 2: from urlparse import urlparse else: from urllib.parse import urlparse from absl import logging from PIL import Image import requests from requests.exceptions import Timeout # Use TF gfile interface if available (for supporting more file types). try: import tensorflow as tf except ImportError: pass # Location Type names LOCATION_URL = 'url' LOCATION_GFILE = 'gfile' def create_output_dir_if_not_exist(dirpath, testfile_name='testfile'): """Ensure output dir exists and can be written to. If dir doesn't exist, attempts to create it. Writes and deletes a test file to confirm we have write permissions. Args: dirpath: local directory path. Raises: OSError: Directory can't be created. IOError: Test file can't be written. """ if not os.path.isdir(dirpath): logging.info('Output dir %s doesn\'t exist, so attempting to create it.' % dirpath) os.makedirs(dirpath) with open(os.path.join(dirpath, testfile_name), 'w') as testfile: testfile.write('') logging.debug('Successfully wrote test file to output dir.') os.remove(os.path.join(dirpath, testfile_name)) logging.info('Confirmed we have permissions to write to output dir.') def _check_src_list_dups(locations, handle_dups='ignore'): """Check source list for duplicate source image locations. If dups are found, either ignore with warning, don't use duplicates (take the first one encountered), or fail. Args: locations: List of source image locations. handle_dups: One of the following strings: ignore, fail, unique. Returns: List of file locations. """ ingore = 'ignore' fail = 'fail' unique = 'unique' logging.info('Number of images listed in source list: %d' % len(locations)) if handle_dups not in [ingore, fail, unique]: raise ValueError('Unknown action for handling dups in source list.') if len(locations) == len(set(locations)): logging.debug('No duplicates in source list.') return locations uniques = [] dups = set() seen = set() for location in locations: if location not in seen: uniques.append(location) seen.add(location) else: dups.add(location) if handle_dups == fail: raise ValueError('Found duplicates in source list: %s' % ', '.join(dups)) logging.warn('Found the following duplicates in source list: %s' % ', '.join(dups)) if handle_dups == unique: logging.info('Found duplicates but only using %d unique entries in image ' 'source list' % len(uniques)) return uniques return locations def read_src_list_csvfile(filepath, handle_dups='ignore'): """Read source list from csv file. Each line should contain the location of a source image file. Returns: List of file locations. """ logging.debug('Reading images list from %s.' % filepath) try: with tf.gfile.GFile(filepath) as input_file: return _check_src_list_dups(input_file.read().splitlines(), handle_dups) except NameError: with open(filepath) as input_file: return _check_src_list_dups(input_file.read().splitlines(), handle_dups) def get_image(location, request_timeout=60, http_max_retries=2, http_retry_interval=3): """Wrapper function that routes to appropriate utility to get image data. Args: location: location of source image data. This can be a URL, local file location, or possibly other location types which may be supported in the future. If localfile, it should be the full path to the file. request_timeout: Timeout in seconds for file download. http_max_retries: Max number of attempts we will try to retrive http images due to timeout errors. http_retry_interval: Seconds to wait between retries. Returns: PIL Image object. """ if http_max_retries < 1: raise ValueError('Max retries must be 1 or greater.') if http_retry_interval < 0: raise ValueError('Retry interval must be 0 or more seconds.') if urlparse(location).scheme in ('http', 'https'): # File should be downloaded from URL. # Retry if we run into timeout errors, give up otherwise. attempts = 0 while attempts < http_max_retries - 1: try: req = requests.get(location, stream=True, timeout=request_timeout) image_data = io.BytesIO(req.raw.read()) return Image.open(image_data) except Timeout: logging.debug('Timeout error while retrieving image from URL. Waiting ' '%d seconds before retrying' % http_retry_interval) time.sleep(http_retry_interval) except Exception: raise attempts += 1 # Final attempt req = requests.get(location, stream=True, timeout=request_timeout) image_data = io.BytesIO(req.raw.read()) return Image.open(image_data) else: try: with tf.gfile.GFile(location) as input_file: return Image.open(input_file) except NameError: return Image.open(location) def save_atlas_and_manifests(outdir, atlases, manifests, sprite_atlas_settings): """Write atlases and manifests to local file. Handles multiple items. Args: outdir: full path to output directory atlases: List of sprite atlas PIL Image objects. manifests: List of manifests (list of dicts) sprite_atlas_settings: SpriteAtlasSettings object. """ if len(atlases) == 1: save_image(atlases[0], os.path.join( outdir, # Filename with correct image format extension '{}.{}'.format( sprite_atlas_settings.filename, str(sprite_atlas_settings.img_format).lower()))) _output_manifest( os.path.join( outdir, sprite_atlas_settings.manifest_filename + '.json'), manifests[0]) else: for idx, atlas in enumerate(atlases): save_image(atlas, os.path.join( outdir, # Filename with correct image format extension '{}{}.{}'.format( sprite_atlas_settings.filename, str(idx), str(sprite_atlas_settings.img_format).lower()))) _output_manifest( os.path.join( outdir, sprite_atlas_settings.manifest_filename + str(idx) + '.json'), manifests[idx]) def save_image(img, outpath, delete_after_write=False): """Save an image to file. We are using RGBA by default, but not all types can use RGBA, such as JPEG, so this handles conversions if needed. For output format validation purposes, saving a test image to disk verifies that the specified output format is supported by PIL, as there's no API to verify that the image format string is allowed other than by attempting to save the image. Args: img: PIL Image object. outpath: Full output path for image along with image format extension. E.g., /path/to/myimage.jpg delete_after_write: If True, will delete the image after writing it. This should be used when writing a test image to disk to verify that PIL can actually output the specified image format. """ try: img.save(outpath) except IOError: logging.warn('Unable to save image as RGBA to desired output format. ' 'Converting to RGB and retrying.') img.convert('RGB').save(outpath) logging.info('Successfully saved image in RGB color space.') if delete_after_write: os.remove(outpath) def _output_manifest(filepath, manifest): """Writes sprite atlas manifest to localfile. Each image manifest is a json object listed on a separate line. Args: filepath: full filepath to output manifest. manifest: list of dicts. Returns: Nothing. """ with open(filepath, 'a') as fp: fp.seek(0, 0) fp.truncate() for item in manifest: json.dump(item, fp) fp.write('\n') ================================================ FILE: facets_atlasmaker/atlasmaker_io_test.py ================================================ """Unit tests for IO utilities.""" import io import os import shutil import mock from absl.testing import absltest from PIL import Image from requests.exceptions import ConnectTimeout, ReadTimeout, InvalidURL import atlasmaker_io TESTDATA_DIR = 'testdata' class AtlasmakerIOTests(absltest.TestCase): def setUp(self): self.testdata_dir = os.path.join(os.getcwd(), TESTDATA_DIR) def testCreateOutputDir(self): # Verify we create a output dir when it doesn't exist. test_subdir = 'temptest' full_testdir_path = os.path.join(self.testdata_dir, test_subdir) try: # Ensure we are starting with a nonexistent dir. if os.path.isdir(full_testdir_path): shutil.rmtree(full_testdir_path) atlasmaker_io.create_output_dir_if_not_exist(full_testdir_path) self.assertEqual(os.path.isdir(full_testdir_path), True) except: raise finally: shutil.rmtree(os.path.join(self.testdata_dir, test_subdir), ignore_errors=True) @mock.patch.object(atlasmaker_io, 'os') def testCreateOutputDirUnableToCreateDir(self, mock_os): # Should raise error when unable to create dir. test_subdir = 'temptest' full_testdir_path = os.path.join(self.testdata_dir, test_subdir) mock_os.path.isdir.return_value = False with mock.patch.object(mock_os, 'makedirs') as makedirs_mock: makedirs_mock.side_effect = OSError() with self.assertRaises(OSError): atlasmaker_io.create_output_dir_if_not_exist(full_testdir_path) @mock.patch.object(atlasmaker_io, 'os') def testCreateOutputDirFailsWrite(self, mock_os): # Should fail if testfile can't be written. test_subdir = 'temptest' full_testdir_path = os.path.join(self.testdata_dir, test_subdir) mock_os.path.isdir.return_value = True with mock.patch('__builtin__.open', mock.mock_open(read_data='data')): with open(full_testdir_path, 'w') as mockfile: mockfile.write.side_effect = IOError() with self.assertRaises(IOError): atlasmaker_io.create_output_dir_if_not_exist(full_testdir_path) def testReadSrcListCsvfile(self): # Test file only contains unique entries expected = ['https://www.wikipedia/image1.png', 'http://www.wordpress/testimage1.png', 'http://www.npr.org/myimageA.jpg'] testfile = os.path.join(self.testdata_dir, 'testfiles_smalllist.csv') self.assertSameElements(atlasmaker_io.read_src_list_csvfile(testfile), expected) def testReadSrcListCsvfileIgnoreDups(self): expected = ['https://www.wikipedia/image1.png', 'http://www.wordpress/testimage1.png', 'http://www.wordpress/testimage1.png', 'http://www.npr.org/myimageA.jpg', 'https://www.wikipedia/image1.png'] testfile = os.path.join(self.testdata_dir, 'testfiles_smalllist_with_dups.csv') self.assertSameElements(atlasmaker_io.read_src_list_csvfile(testfile), expected) def testReadSrcListCsvfileFailWhenDups(self): testfile = os.path.join(self.testdata_dir, 'testfiles_smalllist_with_dups.csv') with self.assertRaises(ValueError): atlasmaker_io.read_src_list_csvfile(testfile, handle_dups='fail') def testReadSrcListCsvfileDupsReturnUniques(self): expected = ['https://www.wikipedia/image1.png', 'http://www.wordpress/testimage1.png', 'http://www.npr.org/myimageA.jpg'] testfile = os.path.join(self.testdata_dir, 'testfiles_smalllist_with_dups.csv') self.assertSameElements( atlasmaker_io.read_src_list_csvfile(testfile, handle_dups='unique'), expected) def testGetImageFromLocalFile(self): testfile = os.path.join(self.testdata_dir, 'Googleplex-Patio-Aug-2014.JPG') image = atlasmaker_io.get_image(testfile) self.assertEqual(image.size, (3264, 2448)) @mock.patch.object(atlasmaker_io, 'requests') @mock.patch.object(atlasmaker_io, 'io') def testGetImageFromUrlHTTP(self, mock_io, mock_requests): # Simply verify we initiate a requests GET. image_url = 'http://some.url.com/image.jpg' orig_img = Image.new('RGBA', (200, 200)) img_bytes = io.BytesIO() orig_img.save(img_bytes, format='png') mock_io.BytesIO.return_value = img_bytes output_img = atlasmaker_io.get_image(image_url, request_timeout=30) mock_requests.get.assert_called_with(image_url, stream=True, timeout=30) self.assertEqual(output_img.size, (200, 200)) @mock.patch.object(atlasmaker_io, 'requests') @mock.patch.object(atlasmaker_io, 'io') def testGetImageFromUrlHTTPS(self, mock_io, mock_requests): # Simply verify we initiate a requests GET. image_url = 'https://some.url.com/image.jpg' orig_img = Image.new('RGBA', (200, 200)) img_bytes = io.BytesIO() orig_img.save(img_bytes, format='png') mock_io.BytesIO.return_value = img_bytes output_img = atlasmaker_io.get_image(image_url, request_timeout=30) mock_requests.get.assert_called_with(image_url, stream=True, timeout=30) self.assertEqual(output_img.size, (200, 200)) self.assertEqual(mock_requests.get.call_count, 1) @mock.patch.object(atlasmaker_io, 'requests') @mock.patch.object(atlasmaker_io, 'io') def testGetImageFromUrlSucceedsLastTry(self, mock_io, mock_requests): # Verify that we retry up to max retries for timeout errors. image_url = 'https://some.url.com/image.jpg' orig_img = Image.new('RGBA', (200, 200)) img_bytes = io.BytesIO() orig_img.save(img_bytes, format='png') mock_response = mock.MagicMock() mock_response.raw.read.return_value = 'something' mock_io.BytesIO.return_value = img_bytes mock_requests.get.side_effect = [ConnectTimeout(), ReadTimeout(), mock_response] output_img = atlasmaker_io.get_image(image_url, request_timeout=30, http_max_retries=3, http_retry_interval=0) self.assertEqual(mock_requests.get.call_count, 3) self.assertEqual(output_img.size, (200, 200)) @mock.patch.object(atlasmaker_io, 'requests') @mock.patch.object(atlasmaker_io, 'io') def testGetImageFromUrlNotWorthRetry(self, mock_io, mock_requests): # We should give up quickly for non-timeout errors. image_url = 'https://some.url.com/image.jpg' orig_img = Image.new('RGBA', (200, 200)) img_bytes = io.BytesIO() orig_img.save(img_bytes, format='png') mock_response = mock.MagicMock() mock_response.raw.read.return_value = 'something' mock_io.BytesIO.return_value = img_bytes mock_requests.get.side_effect = [InvalidURL(), ReadTimeout(), mock_response] with self.assertRaises(InvalidURL): atlasmaker_io.get_image(image_url, request_timeout=30, http_max_retries=3, http_retry_interval=0) self.assertEqual(mock_requests.get.call_count, 1) def testSaveImageJpegDontDelete(self): # Verify we can output to jpeg despite using RGBA. output_file = os.path.join(self.testdata_dir, 'testfile.jpg') img = Image.new('RGBA', (50, 50)) try: atlasmaker_io.save_image(img=img, outpath=output_file) output_img = Image.open(output_file) self.assertEqual(output_img.size, (50, 50)) except: raise finally: # Cleanup. if os.path.isfile(output_file): os.remove(output_file) def testSaveImageJpegAndDelete(self): # Verify we can output to jpeg despite using RGBA and it gets deleted. output_file = os.path.join(self.testdata_dir, 'testfile.jpg') img = Image.new('RGBA', (50, 50)) try: atlasmaker_io.save_image(img=img, outpath=output_file, delete_after_write=True) self.assertFalse(os.path.isfile(output_file)) except: raise finally: # Cleanup. if os.path.isfile(output_file): os.remove(output_file) if __name__ == '__main__': absltest.main() ================================================ FILE: facets_atlasmaker/convert.py ================================================ """Image conversion classes and methods. Methods for converting images as well as for creating a default image for use upon image retrieval/conversion failures. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl import logging from PIL import Image from PIL import ImageOps DEFAULT_OPACITY = 0 # Between (0, 255) # Conversion settings _REQUIRED_MIN_DIMENSION = 10 # required minimum size of converted image class ImageConvertSettings(object): """Image conversion settings.""" def __init__(self, img_format, width, height, position=(0.5, 0.5), bg_color_rgb=(0, 0, 0), opacity=DEFAULT_OPACITY, resize_if_larger=False, preserve_aspect_ratio=True): """Initialize settings object. Args: img_format: image output format, e.g., 'png', 'jpeg', etc. See supported PIL image types. width: desired output width in pixels. height: desired output height in pixels. position: position of resized image within new size frame. This should be specified as a tuple with each value between [0, 1]. Default is (0.5, 0.5) for center. bg_color_rgb: Background color in RGB values. E.g., (255, 255, 255). opacity: Opacity or alpha value, from 0 to 255. resize_if_larger: If True, will resize the image to fit the larger size if the desired output size is larger than the original image size. preserve_aspect_ratio: If True, will attempt to preserve the original image's aspect ration when resizing. Otherwise will attempt a best effort. See PIL ImageOps.fit documentation. """ self._format = img_format self._width = width self._height = height self._transparency = opacity self._opacity = opacity # Background color is specified as RGBA self._bg_mode = 'RGBA' self._bg_color = (bg_color_rgb[0], bg_color_rgb[1], bg_color_rgb[2], opacity) self._position = position self._preserve_aspect_ratio = preserve_aspect_ratio # Whether image should be upsized if desired size is larger than orig size. self._resize_if_larger = resize_if_larger self._validate_settings() def _validate_settings(self): error_messages = [] for val in self._position: if val < 0 or val > 1: error_messages.append('Position must be a percent of width/height with ' 'values ranging from 0 to 1.\n') for val in self._bg_color: if val < 0 or val > 255: error_messages.append('RGB and opacity values must be between (0, 255)' '.\n') if self.width < _REQUIRED_MIN_DIMENSION: error_messages.append('Desired width must be greater than %d pixels.\n' % _REQUIRED_MIN_DIMENSION) if self.height < _REQUIRED_MIN_DIMENSION: error_messages.append('Desired width must be greater than %d pixels.\n' % _REQUIRED_MIN_DIMENSION) if error_messages: raise ValueError('The following invalid conversion settings were found: ' '%s' % str(error_messages)) @property def bg_color(self): return self._bg_color @property def bg_mode(self): return self._bg_mode @property def format(self): return self._format @property def height(self): return self._height @property def width(self): return self._width @property def position(self): return self._position @property def resize_if_larger(self): return self._resize_if_larger @property def preserve_aspect_ratio(self): return self._preserve_aspect_ratio class ImageConverter(object): """Converter for images""" def __init__(self, image, image_convert_settings): """Initialize converter. Args: image: input image. image_convert_settings: ImageConvertSettings object. """ self._orig_img = image self._settings = image_convert_settings self._desired_size = (image_convert_settings.width, image_convert_settings.height) self._desired_format = str(image_convert_settings.format).lower() self._preserve_aspect_ratio = image_convert_settings.preserve_aspect_ratio self._resize_if_larger = image_convert_settings.resize_if_larger self._position = image_convert_settings.position # Tuple (x, y) as decimal. # Background settings self._bg_mode = image_convert_settings.bg_mode self._bg_color = image_convert_settings.bg_color def convert(self): """Returns a converted image as a PIL Image object.""" # TODO: Do we need an option to allow pre-cropping? if (self._orig_img.size == self._desired_size and self._orig_img.format == self._desired_format.upper()): logging.debug('Desired image is same format and size as original image' 'so no conversion needed.') return self._orig_img # Desired image is larger than original image. if (self._desired_size[0] > self._orig_img.size[0] and self._desired_size[1] > self._orig_img.size[1]): if self._preserve_aspect_ratio and self._resize_if_larger: return self._resize_larger_keep_aspect_ratio() elif self._resize_if_larger: return self._resize_larger_dont_keep_aspect_ratio() return self._pad_to_larger_size() if self._preserve_aspect_ratio: return self._resize_thumbnail_keep_aspect_ratio() return self._resize_thumbnail_and_crop() def _resize_thumbnail_keep_aspect_ratio(self): """Resize image to fit the new smaller size, retaining aspect ratio. We use the PIL thumbnail function, which guarantees the aspect ratio is maintained but does not ensure the output size matches your desired size (only that it will not exceed it). We then paste it onto a background image that matches the desired output size. If the new size is smaller than the original image size (in either dimension), the image is reduced so it's just large enough to fit the desired size, and padded with a background to ensure it's exactly the desired size. If the new size is larger than the original image size, the original size is retained and the image is padded with a background to make it exactly the desired size. Args: orig_img: PIL Image object. Returns: Void. """ # We use Image.ANTIALIAS for better quality. This is the recommended PIL # setting. self._orig_img.thumbnail(self._desired_size, Image.ANTIALIAS) offset = ( int( round( (self._desired_size[0] - self._orig_img.size[0]) * self._position[0])), int( round( (self._desired_size[1] - self._orig_img.size[1]) * self._position[1]))) bkgd_img = Image.new(self._bg_mode, self._desired_size, self._bg_color) bkgd_img.paste(self._orig_img, offset) return bkgd_img def _resize_thumbnail_and_crop(self): """Resize image smaller, not retaining aspect ratio. For image reduction, reduce size to fit smallest of the dimensions and crop the larger dimension as necessary. Crop centering is based on center of image.""" logging.debug('Reducing image size and cropping as necessary.') return ImageOps.fit(image=self._orig_img, size=self._desired_size, centering=self._position) def _pad_to_larger_size(self): """Just pad orig image to larger size if desired size is larger.""" offset = (int(round((self._desired_size[0]) / 2)), int(round((self._desired_size[1]) / 2))) bkgd_img = Image.new(self._bg_mode, self._desired_size, self._bg_color) bkgd_img.paste(self._orig_img, offset) return bkgd_img def _resize_larger_dont_keep_aspect_ratio(self): """Stretch image to fit new size, NOT retaining the aspect ratio.""" logging.info('Desired size is larger than source image size.') return ImageOps.fit(image=self._orig_img, size=self._desired_size) def _resize_larger_keep_aspect_ratio(self): """Stretch image to fit new size, keeping the aspect ratio.""" orig_w, orig_h = self._orig_img.size # Find min stretch ratio, create a scaled up image. resize_ratio = min(self._desired_size[0] / orig_w, self._desired_size[1] / orig_h) (new_width, new_height) = (int(round(orig_h * resize_ratio)), int(round(orig_w * resize_ratio))) new_img = ImageOps.fit(image=self._orig_img, size=(new_width, new_height)) # If after stretching, it's the desired size already, then return image. if self._desired_size == (new_width, new_height): return new_img # Otherwise we need to pad it to the correct size. offset = (int(round((self._desired_size[0] - new_width) / 2)), int(round((self._desired_size[1] - new_height) / 2))) bkgd_img = Image.new(self._bg_mode, self._desired_size, self._bg_color) bkgd_img.paste(new_img, offset) return bkgd_img def create_default_image(image_convert_settings): """Returns a default PIL image for use on image retrieval/conversion failures. Helper method for creating a default image (of the background) to be used in Atlas for images that fail retrieval/conversion. Returns: PIL Image. """ return Image.new( image_convert_settings.bg_mode, (image_convert_settings.width, image_convert_settings.height), image_convert_settings.bg_color) ================================================ FILE: facets_atlasmaker/convert_test.py ================================================ """Unit tests for image conversion.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from absl.testing import absltest from PIL import Image from PIL import ImageColor import convert TESTDATA_DIR = 'testdata' class ImageConvertSettingsTests(absltest.TestCase): """Test settings class""" def test_initializeWithDefaultSettings(self): # Simply verifies we can create an instance and return some properties. settings = convert.ImageConvertSettings('png', 100, 100) self.assertEqual(settings.format, 'png') self.assertEqual(settings.height, 100) self.assertEqual(settings.width, 100) def test_initializeWithAllSettings(self): # Simply verifies we can create an instance and return some properties. settings = convert.ImageConvertSettings( 'png', 100, 100, position=(0.1, 0.1), bg_color_rgb=(100, 100, 100), opacity=100, resize_if_larger=True, preserve_aspect_ratio=True) self.assertEqual(settings.format, 'png') self.assertEqual(settings.height, 100) self.assertEqual(settings.width, 100) def test_invalidWidth(self): with self.assertRaises(ValueError): convert.ImageConvertSettings('png', 0, 10) def test_invalidHeight(self): with self.assertRaises(ValueError): convert.ImageConvertSettings('png', 10, 0) def test_invalidPosition(self): with self.assertRaises(ValueError): convert.ImageConvertSettings('png', 10, 10, position=(-1, 0)) def test_invalidBGColorRGB(self): with self.assertRaises(ValueError): convert.ImageConvertSettings('png', 10, 10, bg_color_rgb=(-1, 500, 255)) def test_invalidOpacity(self): with self.assertRaises(ValueError): convert.ImageConvertSettings('png', 10, 10, opacity=300) class TestImageConverterTests(absltest.TestCase): """Test image conversion outputs.""" def setUp(self): self.testdata_dir = os.path.join(os.getcwd(), TESTDATA_DIR) self.desired_width = 100 self.desired_height = 100 self.conversion_settings = convert.ImageConvertSettings( 'png', self.desired_width, self.desired_height) # Color settings used to verifying image output is correct self.red_rgb = ImageColor.getrgb('red') self.blue_rgb = ImageColor.getrgb('blue') self.orange_rgb = ImageColor.getrgb('orange') self.green_rgb = ImageColor.getrgb('green') self.yellow_rgb = ImageColor.getrgb('yellow') def testConvertSmallerKeepRatioHasCorrectSize(self): # Larger image is resized smaller, keeping aspect ratio. # This test simply verifies the size # TODO: delete and merge with following tests. conversion_settings = convert.ImageConvertSettings( 'png', self.desired_width, self.desired_height, preserve_aspect_ratio=True) orig_img = Image.new('RGBA', (1000, 500)) image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() self.assertEqual(converted_img.size, (self.desired_width, self.desired_height)) def testConvertResizedSmallKeepRatio(self): # Verify that for an original red rectangular (vertically long image), # after conversion to a square size with blue background the output image # looks correct based on sampling color at several locations. orig_img = Image.new('RGBA', (50, 500), self.red_rgb) conversion_settings = convert.ImageConvertSettings( 'png', 100, 100, bg_color_rgb=self.blue_rgb) # Final image should be a 10px wide by 100px tall rectangle centered # within the square, i.e., with center at pixel (49, 49) and borders of # 45 pixels on each side. image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() # Just compare the RGB values, not opacity # Center should be red self.assertEqual(converted_img.getpixel((45, 45))[0:3], self.red_rgb) # Top Center should be red self.assertEqual(converted_img.getpixel((45, 0))[0:3], self.red_rgb) # Bottom Center should be red self.assertEqual(converted_img.getpixel((45, 99))[0:3], self.red_rgb) # Top left edge border should be blue background, then red rect. self.assertEqual(converted_img.getpixel((44, 0))[0:3], self.blue_rgb) self.assertEqual(converted_img.getpixel((45, 0))[0:3], self.red_rgb) # Bottom right edge border should be red, then blue background. self.assertEqual(converted_img.getpixel((54, 99))[0:3], self.red_rgb) self.assertEqual(converted_img.getpixel((55, 99))[0:3], self.blue_rgb) def testConvertResizedSmallTopLeftKeepRatio(self): # Same as above but we specify that the position remains in the top left. # Verify that for an original red rectangular (vertically long image), # after conversion to a square size with blue background the output image # looks correct based on sampling color at several locations. orig_img = Image.new('RGBA', (50, 500), self.red_rgb) conversion_settings = convert.ImageConvertSettings( 'png', 100, 100, position=(0, 0), bg_color_rgb=self.blue_rgb) # Final image should be a 10px wide by 100px tall rectangle flush with # the left edge of the output size. I.e., rectangle with center at pixel # (4, 49) and the blue background staring at x position 9. image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() # Just compare the RGB values, not opacity # Center of image should be blue self.assertEqual(converted_img.getpixel((45, 45))[0:3], self.blue_rgb) # Top left corner should be red self.assertEqual(converted_img.getpixel((0, 0))[0:3], self.red_rgb) # Bottom right corner of rectangle along with adjacent blue background self.assertEqual(converted_img.getpixel((9, 99))[0:3], self.red_rgb) self.assertEqual(converted_img.getpixel((10, 99))[0:3], self.blue_rgb) # Right edge of image should be background color self.assertEqual(converted_img.getpixel((45, 99))[0:3], self.blue_rgb) def testConvertSmallerRightPosKeepRatio(self): # Same as above but we specify that the position remains in the right. # Verify that for an original red rectangular (vertically long image), # after conversion to a square size with blue background the output image # looks correct based on sampling color at several locations. orig_img = Image.new('RGBA', (50, 500), self.red_rgb) conversion_settings = convert.ImageConvertSettings( 'png', 100, 100, position=(1, 0.5), bg_color_rgb=self.blue_rgb) # Final image should be a 10px wide by 100px tall rectangle flush with # the right edge of the output size. I.e., rectangle with center at pixel # (94, 49) and the blue background staring at x position 9. image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() # Just compare the RGB values, not opacity # Center of image should be blue self.assertEqual(converted_img.getpixel((45, 45))[0:3], self.blue_rgb) # Top left corner should be blue background. self.assertEqual(converted_img.getpixel((0, 0))[0:3], self.blue_rgb) # Center of rectangle should be red self.assertEqual(converted_img.getpixel((94, 49))[0:3], self.red_rgb) # Bottom left edge of rectangle and adjacent background self.assertEqual(converted_img.getpixel((90, 99))[0:3], self.red_rgb) self.assertEqual(converted_img.getpixel((89, 99))[0:3], self.blue_rgb) # Top left edge of rectangle and adjacent background self.assertEqual(converted_img.getpixel((90, 0))[0:3], self.red_rgb) self.assertEqual(converted_img.getpixel((89, 0))[0:3], self.blue_rgb) def testConvertSmallerIgnoreRatio(self): # Test resize image to smaller sprite without retaining aspect ratio. # Simply verifies correct size output. conversion_settings = convert.ImageConvertSettings( 'png', self.desired_width, self.desired_height, preserve_aspect_ratio=False) orig_img = Image.new('RGBA', (1000, 500)) image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() self.assertEqual(converted_img.size, (self.desired_width, self.desired_height)) def testConvertResizedSmallDontKeepRatioCenterCrop(self): # Verify that we get the desired output image when cropping from center. # Orig image is a 4 quadrant (200, 200) pixel image, with colors # (orange, red, green, yellow) starting from top left and going clockwise. orig_img = Image.new('RGBA', (200, 200), self.yellow_rgb) orig_img.paste(Image.new('RGBA', (100, 100), self.orange_rgb), (0, 0)) orig_img.paste(Image.new('RGBA', (100, 100), self.red_rgb), (100, 0)) orig_img.paste(Image.new('RGBA', (100, 100), self.green_rgb), (100, 100)) conversion_settings = convert.ImageConvertSettings( 'png', self.desired_width, self.desired_height, preserve_aspect_ratio=False) # Output image should be 100x100 cropped from the center. image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() # Top left of output image should be orange. self.assertEqual(converted_img.getpixel((0, 0))[0:3], self.orange_rgb) # Top right should be red self.assertEqual(converted_img.getpixel((99, 0))[0:3], self.red_rgb) # Bottom right should be green self.assertEqual(converted_img.getpixel((99, 99))[0:3], self.green_rgb) # Bottom left should be yellow self.assertEqual(converted_img.getpixel((0, 99))[0:3], self.yellow_rgb) # Center section should comprise the same set of colors clockwise. self.assertEqual(converted_img.getpixel((49, 49))[0:3], self.orange_rgb) self.assertEqual(converted_img.getpixel((50, 49))[0:3], self.red_rgb) self.assertEqual(converted_img.getpixel((50, 50))[0:3], self.green_rgb) self.assertEqual(converted_img.getpixel((49, 50))[0:3], self.yellow_rgb) def testConvertSmallerIgnoreRatioCropTopLeftSameAspectRatio(self): # Verify that we get the desired output image when cropping from left. # In this case, since the output image's aspect ratio is same as the input # image's aspect ratio, we're able to crop it correctly and keep the same # pattern. # Orig image is a 4 quadrant (200, 200) pixel image, with colors # (orange, red, green, yellow) starting from top left and going clockwise. orig_img = Image.new('RGBA', (200, 200), self.yellow_rgb) orig_img.paste(Image.new('RGBA', (100, 100), self.orange_rgb), (0, 0)) orig_img.paste(Image.new('RGBA', (100, 100), self.red_rgb), (100, 0)) orig_img.paste(Image.new('RGBA', (100, 100), self.green_rgb), (100, 100)) conversion_settings = convert.ImageConvertSettings( 'png', self.desired_width, self.desired_height, position=(0, 0), preserve_aspect_ratio=False) # Output image should be 100x100 cropped from the center. image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() # Top left of output image should be orange. self.assertEqual(converted_img.getpixel((0, 0))[0:3], self.orange_rgb) # Top right should be red self.assertEqual(converted_img.getpixel((99, 0))[0:3], self.red_rgb) # Bottom right should be green self.assertEqual(converted_img.getpixel((99, 99))[0:3], self.green_rgb) # Bottom left should be yellow self.assertEqual(converted_img.getpixel((0, 99))[0:3], self.yellow_rgb) # Center section should comprise the same set of colors clockwise. self.assertEqual(converted_img.getpixel((49, 49))[0:3], self.orange_rgb) self.assertEqual(converted_img.getpixel((50, 49))[0:3], self.red_rgb) self.assertEqual(converted_img.getpixel((50, 50))[0:3], self.green_rgb) self.assertEqual(converted_img.getpixel((49, 50))[0:3], self.yellow_rgb) def testConvertSmallerIgnoreRatioCropTopLeftNoAspectRatio(self): # Verify that we get the desired output image when cropping from top left. # In this case, the aspect ratio of the output image is different than # that of the orig image so cropping occurs. # Orig image is a 4 quadrant (200, 200) pixel image, with colors # (orange, red, green, yellow) starting from top left and going clockwise. orig_img = Image.new('RGBA', (200, 200), self.yellow_rgb) orig_img.paste(Image.new('RGBA', (100, 100), self.orange_rgb), (0, 0)) orig_img.paste(Image.new('RGBA', (100, 100), self.red_rgb), (100, 0)) orig_img.paste(Image.new('RGBA', (100, 100), self.green_rgb), (100, 100)) # Note that in this case, the aspect ratio has changed. conversion_settings = convert.ImageConvertSettings( 'png', width=20, height=200, position=(0, 0), preserve_aspect_ratio=False) # Output image should be 20x200 that only contains the orange and yellow # sections since we crop from the left. image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() # Top left of output image should be orange. self.assertEqual(converted_img.getpixel((0, 0))[0:3], self.orange_rgb) # Top right should be orange self.assertEqual(converted_img.getpixel((19, 0))[0:3], self.orange_rgb) # Bottom right should be yellow self.assertEqual(converted_img.getpixel((19, 199))[0:3], self.yellow_rgb) # Bottom left should be yellow self.assertEqual(converted_img.getpixel((0, 199))[0:3], self.yellow_rgb) def testConvertResizedLargerKeepAspectRatio( self): conversion_settings = convert.ImageConvertSettings( 'png', self.desired_width, self.desired_height, resize_if_larger=True, preserve_aspect_ratio=False) orig_img = Image.new('RGBA', (10, 10)) image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() self.assertEqual(converted_img.size, (self.desired_width, self.desired_height)) def testConvertImageResizedLargerIgnoreRatio(self): # Verifies that output size and image (based on pixel samples) are correct. conversion_settings = convert.ImageConvertSettings( 'png', self.desired_width, self.desired_height, bg_color_rgb=self.blue_rgb, resize_if_larger=True, preserve_aspect_ratio=False) orig_img = Image.new('RGBA', (20, 10), self.red_rgb) image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() # Verify size. self.assertEqual(converted_img.size, (self.desired_width, self.desired_height)) # Image should be stretched to fit desired output size, so entire output # should be red. Verify center, and top left / bottom right edges. self.assertEqual(converted_img.getpixel((45, 45))[0:3], self.red_rgb) self.assertEqual(converted_img.getpixel((0, 0))[0:3], self.red_rgb) self.assertEqual(converted_img.getpixel((99, 99))[0:3], self.red_rgb) def testConvertImagePaddedLargerCorrectSize(self): # Input image is smaller than desired, but we just pad it to fit new size. conversion_settings = convert.ImageConvertSettings( 'png', self.desired_width, self.desired_height, resize_if_larger=True) orig_img = Image.new('RGBA', (10, 10)) image_converter = convert.ImageConverter(orig_img, conversion_settings) converted_img = image_converter.convert() self.assertEqual(converted_img.size, (self.desired_width, self.desired_height)) def testConvertInputSizeSameAsOutput(self): # Verify that if input and output size are the same, things still work. orig_img = Image.new('RGBA', (100, 100)) image_converter = convert.ImageConverter(orig_img, self.conversion_settings) converted_img = image_converter.convert() self.assertEqual(converted_img.size, (self.desired_width, self.desired_height)) def testConvertTruncatedImage(self): # Should fail with a message that image is truncated. # To test image truncation, we actually need to write a file to disk. img_filepath = os.path.join(self.testdata_dir, 'test_img.png') try: orig_img = Image.new('RGBA', (500, 500), self.orange_rgb) orig_img.save(img_filepath) filesize = os.path.getsize(img_filepath) with open(img_filepath, 'r+') as img_on_disk: img_on_disk.truncate(filesize - 100) img_truncated = Image.open(img_filepath) image_converter = convert.ImageConverter(img_truncated, self.conversion_settings) with self.assertRaises(IOError) as e: image_converter.convert() self.assertTrue('truncated' in str(e.exception).lower()) except: raise finally: # Cleanup. if os.path.isfile(img_filepath): os.remove(img_filepath) class TestImageConverterHelpersTests(absltest.TestCase): """Test misc helper functions.""" def testCreateDefaultImage(self): conversion_settings = convert.ImageConvertSettings( 'png', 20, 20, resize_if_larger=True) output_img = convert.create_default_image(conversion_settings) self.assertEqual(output_img.size, (20, 20)) if __name__ == '__main__': absltest.main() ================================================ FILE: facets_atlasmaker/montage.py ================================================ """Utilities to combine converted images into atlas and to create manifest.""" import math import os from absl import logging from PIL import Image # Manifest Key names MANIFEST_IMAGE_NAME_KEY = 'image_name' MANIFEST_SOURCE_IMAGE_KEY = 'source_image' MANIFEST_OFFSET_X_KEY = 'offset_x' MANIFEST_OFFSET_Y_KEY = 'offset_y' MANIFEST_IMAGE_FAIL_KEY = 'errors' class SpriteAtlasSettings(object): """Sprite atlas settings.""" def __init__(self, img_format, height=None, width=None, filename='spriteatlas', manifest_filename='manifest'): """ Width and height are in units of number of images Args: img_format: output format (JPG, PNG, etc). height: Height of atlas in number of images. width: Width in number of images. filename: Desired filename of atlas (without file extension). manifest_filename: Desired filename of atlas manifest (without file extension). """ self._img_format = img_format self._width = width self._height = height self._filename = filename self._manifest_filename = manifest_filename @property def filename(self): return self._filename @property def img_format(self): return self._img_format @property def height(self): return self._height @property def width(self): return self._width @property def manifest_filename(self): return self._manifest_filename class SpriteAtlasGenerator(object): """Class that generates one or more sprite atlases.""" def __init__(self, images_with_statuses, img_src_paths, atlas_settings, default_img): """Initialize Atlas generator. Args: images_with_statuses: List of tuples, where each tuple contains (converted Image object or None, status/error message string). These will be montaged into the Atlas. img_src_paths: List of image source paths (strings). settings: SpritesheetSettings instance. default_img: PIL Image object representing the default image to be used when a desired image failed retrieval/conversion. """ self._images_with_statuses = images_with_statuses self._img_src_paths = img_src_paths self._atlas_settings = atlas_settings self._default_image = default_img self._atlas_manifests = [] # List of atlas manifests. # Store tuple representing individual image (width, height) in pixels. self._img_width_height_px = self._identify_image_size() self._num_input_images = len(self._images_with_statuses) self._validate_inputs() def _identify_image_size(self): """Returns size of the first no failed image. Helper method to identify image size for validation. Raises: ValueError if all images had failed retrieval and/or conversion. """ for img_with_status in self._images_with_statuses: img = img_with_status[0] if img is not None: return img.size raise ValueError('No images were successfully retrieved and converted.') def _validate_inputs(self): if len(self._img_src_paths) != self._num_input_images: raise ValueError('Number of elements in image list is different from ' 'number of elements in src paths list.') for img_with_status in self._images_with_statuses: img = img_with_status[0] if img is not None and img.size != self._img_width_height_px: raise ValueError('Input images are not all the same size.') def create_atlas(self): """Returns tuple of (list of sprite atlas images, list of manifests). If atlas size is not specified, it creates a single square atlas. Otherwise, it creates how ever many atlases are required to contain the images at the specific atlas size. Atlases are populated with images from left to right, top to bottom. Returns: List of Sprite Atlases. """ # If no atlas size specified or if a single atlas can fit all images, # create a single atlas. atlas_size_is_specified = (self._atlas_settings.height is not None and self._atlas_settings.width is not None) if not atlas_size_is_specified: spriteatlas1, manifest1 = self._create_single_atlas() return ([spriteatlas1], [manifest1]) else: # TODO: Support creating multiple sprite atlases. raise NotImplementedError() def _create_single_atlas(self): """Returns a single sprite atlas""" (atlas_width, atlas_height) = self._generate_default_atlas_size() # Initialize manifest manifest = [] # Ensure they're ints atlas_width = int(atlas_width) atlas_height = int(atlas_height) # Atlas size in pixels (width, height) atlas_size_pixels = (atlas_width * self._img_width_height_px[0], atlas_height * self._img_width_height_px[1]) logging.debug('generating atlas of size %d, %d' % (atlas_size_pixels[0], atlas_size_pixels[1])) # We create a background image of the atlas size that we will paste the # sprite images onto. montage = Image.new('RGBA', atlas_size_pixels, (255, 255, 255, 255)) # Generate row by row, from left to right, top to bottom. image_idx = 0 failed_images_count = 0 for row_idx in range(0, atlas_width): offset_height = row_idx * self._img_width_height_px[1] for col_idx in range(0, atlas_height): if image_idx >= self._num_input_images: # Finished montaging all images. break offset_width = col_idx * self._img_width_height_px[0] # Manifest entry for that image. img_manifest = { MANIFEST_IMAGE_NAME_KEY: os.path.basename(self._img_src_paths[image_idx]), MANIFEST_SOURCE_IMAGE_KEY: self._img_src_paths[image_idx], MANIFEST_OFFSET_X_KEY: offset_width, MANIFEST_OFFSET_Y_KEY: offset_height } img = self._images_with_statuses[image_idx][0] status = self._images_with_statuses[image_idx][1] if img is not None: montage.paste(img, (offset_width, offset_height)) else: montage.paste(self._default_image, (offset_width, offset_height)) failed_images_count += 1 # Add error message to manifest. img_manifest[MANIFEST_IMAGE_FAIL_KEY] = status manifest.append(img_manifest) image_idx += 1 logging.info('Montaged %d images onto sprite atlas of size %s ' 'pixels.' % (image_idx, str(atlas_size_pixels))) if failed_images_count > 0: logging.warning('%d images had failures and were replaced by the default ' 'image' % failed_images_count) return montage, manifest def _generate_default_atlas_size(self): """Generate a default square size for the atlas if input size is not specified. Note that square means that it attempts to have the same number of images in the height and the width, which is NOT the same as having a square size in terms of number of pixels. Returns: A tuple representing the number of images per side. """ side = math.ceil(math.sqrt(self._num_input_images)) return side, side ================================================ FILE: facets_atlasmaker/montage_test.py ================================================ """Unit tests for montaging images to create sprite atlases.""" from absl.testing import absltest from PIL import Image from PIL import ImageColor import montage class SpriteAtlasGeneratorTests(absltest.TestCase): def setUp(self): # Color settings used to verifying image output is correct self.red_rgb = ImageColor.getrgb('red') self.blue_rgb = ImageColor.getrgb('blue') self.orange_rgb = ImageColor.getrgb('orange') self.green_rgb = ImageColor.getrgb('green') self.yellow_rgb = ImageColor.getrgb('yellow') self.black_rgb = ImageColor.getrgb('black') def testAtlasGeneratorDifferentInputSizes(self): # Should raise error if image count does not match source path count. atlas_settings = montage.SpriteAtlasSettings(img_format='png') source_images_with_statuses = [(Image.new('RGBA', (50, 30)), '')] * 5 source_paths = ['/some/path/file' + str(i) + '.jpg' for i in range(0, 4)] with self.assertRaises(ValueError): montage.SpriteAtlasGenerator( images_with_statuses=source_images_with_statuses, img_src_paths=source_paths, atlas_settings=atlas_settings, default_img=Image.new('RGBA', (50, 30))) def testAtlasGeneratorDifferentImageSizes(self): # Should raise error if converted images are different sizes. atlas_settings = montage.SpriteAtlasSettings(img_format='png') source_images_with_statuses = [ (Image.new('RGBA', (50, 30)), ''), (Image.new('RGBA', (50, 30)), ''), (Image.new('RGBA', (50, 30)), ''), (Image.new('RGBA', (50, 10)), '') ] source_paths = ['/some/path/file' + str(i) + '.jpg' for i in range(0, 4)] with self.assertRaises(ValueError): montage.SpriteAtlasGenerator( images_with_statuses=source_images_with_statuses, img_src_paths=source_paths, atlas_settings=atlas_settings, default_img=Image.new('RGBA', (50, 30))) def testCreateAtlasIfNoSizeSpecified(self): # Verify that manifests and atlases contains single items. # and verify atlas size is correct. atlas_settings = montage.SpriteAtlasSettings(img_format='png') source_images_with_statuses = [(Image.new('RGBA', (50, 30)), '')] * 20 source_paths = ['/some/path/file' + str(i) + '.jpg' for i in range(0, 20)] atlas_generator = montage.SpriteAtlasGenerator( images_with_statuses=source_images_with_statuses, img_src_paths=source_paths, atlas_settings=atlas_settings, default_img=Image.new('RGBA', (50, 30))) atlases, manifests = atlas_generator.create_atlas() self.assertEqual(len(atlases), 1) self.assertEqual(atlases[0].size, (250, 150)) self.assertEqual(len(manifests), 1) self.assertEqual(len(manifests[0]), 20) def testCreateAtlas(self): # Verify that atlas is correct based on sampling pixels and output size. atlas_settings = montage.SpriteAtlasSettings(img_format='png') source_paths = ['/some/path/file' + str(i) + '.jpg' for i in range(0, 4)] images_with_statuses = [ (Image.new('RGBA', (50, 50), self.orange_rgb), ''), (Image.new('RGBA', (50, 50), self.red_rgb), ''), (Image.new('RGBA', (50, 50), self.green_rgb), ''), (Image.new('RGBA', (50, 50), self.yellow_rgb), '') ] atlas_generator = montage.SpriteAtlasGenerator( images_with_statuses=images_with_statuses, img_src_paths=source_paths, atlas_settings=atlas_settings, default_img=Image.new('RGBA', (50, 50), self.black_rgb)) atlases, manifests = atlas_generator.create_atlas() atlas = atlases[0] # Only care about a single atlas del manifests self.assertEqual(atlas.size, (100, 100)) # Verify pixels in corners of atlas. self.assertEqual(atlas.getpixel((0, 0))[0:3], self.orange_rgb) self.assertEqual(atlas.getpixel((99, 0))[0:3], self.red_rgb) self.assertEqual(atlas.getpixel((0, 99))[0:3], self.green_rgb) self.assertEqual(atlas.getpixel((99, 99))[0:3], self.yellow_rgb) # Verify pixels in center of atlas self.assertEqual(atlas.getpixel((49, 49))[0:3], self.orange_rgb) self.assertEqual(atlas.getpixel((50, 49))[0:3], self.red_rgb) self.assertEqual(atlas.getpixel((49, 50))[0:3], self.green_rgb) self.assertEqual(atlas.getpixel((50, 50))[0:3], self.yellow_rgb) def testCreateAtlasWithFailures(self): # Verify that atlas is correct based on sampling pixels and output size. # when one of the images failed retrival/conversion. atlas_settings = montage.SpriteAtlasSettings(img_format='png') source_paths = ['/some/path/file' + str(i) + '.jpg' for i in range(0, 4)] images_with_statuses = [ (Image.new('RGBA', (50, 50), self.orange_rgb), ''), (Image.new('RGBA', (50, 50), self.red_rgb), ''), (None, 'some error message'), # Failed (Image.new('RGBA', (50, 50), self.yellow_rgb), '') ] atlas_generator = montage.SpriteAtlasGenerator( images_with_statuses=images_with_statuses, img_src_paths=source_paths, atlas_settings=atlas_settings, default_img=Image.new('RGBA', (50, 50), self.black_rgb)) atlases, manifests = atlas_generator.create_atlas() atlas = atlases[0] # Only care about a single atlas del manifests self.assertEqual(atlas.size, (100, 100)) # Verify pixels in corners of atlas. self.assertEqual(atlas.getpixel((0, 0))[0:3], self.orange_rgb) self.assertEqual(atlas.getpixel((99, 0))[0:3], self.red_rgb) self.assertEqual(atlas.getpixel((0, 99))[0:3], self.black_rgb) self.assertEqual(atlas.getpixel((99, 99))[0:3], self.yellow_rgb) # Verify pixels in center of atlas self.assertEqual(atlas.getpixel((49, 49))[0:3], self.orange_rgb) self.assertEqual(atlas.getpixel((50, 49))[0:3], self.red_rgb) self.assertEqual(atlas.getpixel((49, 50))[0:3], self.black_rgb) self.assertEqual(atlas.getpixel((50, 50))[0:3], self.yellow_rgb) def testCreateAtlasManifest(self): # Verify manifest contains correct data. atlas_settings = montage.SpriteAtlasSettings(img_format='png') source_images_with_statuses = [(Image.new('RGBA', (50, 30)), '')] * 4 source_paths = ['/some/path/file' + str(i) + '.jpg' for i in range(0, 4)] expected_manifest = [ {'source_image': '/some/path/file0.jpg', 'offset_x': 0, 'image_name': 'file0.jpg', 'offset_y': 0}, {'source_image': '/some/path/file1.jpg', 'offset_x': 50, 'image_name': 'file1.jpg', 'offset_y': 0}, {'source_image': '/some/path/file2.jpg', 'offset_x': 0, 'image_name': 'file2.jpg', 'offset_y': 30}, {'source_image': '/some/path/file3.jpg', 'offset_x': 50, 'image_name': 'file3.jpg', 'offset_y': 30}] atlas_generator = montage.SpriteAtlasGenerator( images_with_statuses=source_images_with_statuses, img_src_paths=source_paths, atlas_settings=atlas_settings, default_img=Image.new('RGBA', (50, 50), self.black_rgb)) atlases, manifests = atlas_generator.create_atlas() del atlases # linter self.assertEqual(manifests[0], expected_manifest) def testCreateAtlasManifestWithImgFailures(self): # Verify manifest contains correct data when one image failed. atlas_settings = montage.SpriteAtlasSettings(img_format='png') source_images_with_statuses = [( Image.new('RGBA', (50, 30)), '')] * 3 + [(None, 'Failure msg')] source_paths = ['/some/path/file' + str(i) + '.jpg' for i in range(0, 4)] expected_manifest = [ {'source_image': '/some/path/file0.jpg', 'offset_x': 0, 'image_name': 'file0.jpg', 'offset_y': 0}, {'source_image': '/some/path/file1.jpg', 'offset_x': 50, 'image_name': 'file1.jpg', 'offset_y': 0}, {'source_image': '/some/path/file2.jpg', 'offset_x': 0, 'image_name': 'file2.jpg', 'offset_y': 30}, {'source_image': '/some/path/file3.jpg', 'offset_x': 50, 'image_name': 'file3.jpg', 'offset_y': 30, 'errors': 'Failure msg'}] atlas_generator = montage.SpriteAtlasGenerator( images_with_statuses=source_images_with_statuses, img_src_paths=source_paths, atlas_settings=atlas_settings, default_img=Image.new('RGBA', (50, 50), self.black_rgb)) atlases, manifests = atlas_generator.create_atlas() del atlases # linter self.assertEqual(manifests[0], expected_manifest) if __name__ == '__main__': absltest.main() ================================================ FILE: facets_atlasmaker/parallelize.py ================================================ """Parallelize file fetch and conversion utilities and wrappers.""" from absl import logging from joblib import Parallel, delayed from PIL import ImageFile import atlasmaker_io import convert def get_and_convert_image(image_location, image_convert_settings, allow_truncated_images=False, disk_cache=False, request_timeout=60, http_max_retries=2): """Wrapper method that retrieves and converts one image. If run all in-memory (i.e., no disk spill), then returns PIL Image object. Otherwise returns path of disk-cached image. Args: image_location: Image path from the input list of locations. image_convert_settings: ImageConvertSettings object. allow_truncated_images: If True, PIL will be tolerant of truncated image files and load/process them. Note that this isn't supported on old versions on PIL, just pillow. disk_cache: Store intermediary image objects to disk. Not supported yet. request_timeout: Max secs for http requests before timeout. http_max_retries: Max number of attempts we will try to retrive http images due to timeout errors. Returns: A tuple (Image object or None if fails, status message string). Status message string will be empty if success, or error message if failure. Exceptions handled: All exceptions for image retrieval are handled. Some notable ones are: - DecompressionBombError: Image is too large (>0.5G). See PIL documentation for instructions on setting a higher threshold. For image conversion, the following errors are handled: - IOError: error retrieving image file, or truncated image file. """ if disk_cache: raise NotImplementedError() if allow_truncated_images: try: ImageFile.LOAD_TRUNCATED_IMAGES = True except AttributeError as e: logging.warning('Are you using PILLOW and not a very old version of PIL? ' 'Unable to force load of truncated image files: %s', e) try: src_image = atlasmaker_io.get_image(image_location, request_timeout, http_max_retries=http_max_retries) except Exception as e: logging.error('Retrieval of file %s failed with error: %s', image_location, e) return None, str(e) try: image_converter = convert.ImageConverter(src_image, image_convert_settings) logging.debug('Successfully converted image: %s' % image_location) return image_converter.convert(), '' except IOError as e: logging.error('Conversion of file %s failed with error: %s', image_location, e) return None, str(e) def get_and_convert_images_parallel(image_src_locations, image_convert_settings, n_jobs=-1, disk_cache=False, threads=False, verbose=10, allow_truncated_images=False, request_timeout=60, http_max_retries=2): """Parallelize retrieving and converting image tasks. Args: images: List of source image paths (filepaths, URLs, etc). image_convert_settings: ImageConvertSettings object. disk_cache:: If True, will cache converted images to disk. threads: If true, use threads instead of processes. verbose: verbosity level for parallel. See joblib.Parallel documentation. allow_truncated_images: If True, PIL will be tolerant of truncated image files and load/process them. Note that this isn't supported on old versions on PIL, just pillow. request_timeout: Max secs for http requests before timeout. http_max_retries: Max number of attempts we will try to retrive http images due to timeout errors. Returns: List of tuples, where each tuple contains (converted Image object or None, status/error message string). """ logging.info('Parallelizing with setting %d jobs' % n_jobs) backend = None if threads: logging.debug('Parallelizing using threads.') backend = 'threading' outputs = Parallel(n_jobs=n_jobs, backend=backend, verbose=verbose)( delayed(get_and_convert_image)( location, image_convert_settings, allow_truncated_images=allow_truncated_images, disk_cache=disk_cache, request_timeout=request_timeout, http_max_retries=http_max_retries) for location in image_src_locations) return outputs def convert_default_image(image_location, image_convert_settings): """Return converted default image used for failures Args: image_location: Path or URL of image. image_convert_settings: ImageConvertSettings object. """ default_img, status = get_and_convert_image( image_location, image_convert_settings=image_convert_settings) del status # linter. if default_img is None: raise IOError('Unable to retrive and convert default image.') return default_img ================================================ FILE: facets_atlasmaker/parallelize_test.py ================================================ """Tests for parallize.""" import os from absl import logging from absl.testing import absltest from PIL import Image from PIL import ImageFile import parallelize import convert TESTDATA_DIR = 'testdata' class ParallelizeTests(absltest.TestCase): def setUp(self): self.testdata_dir = os.path.join(os.getcwd(), TESTDATA_DIR) # Ensure setting is set to default for each test. try: ImageFile.LOAD_TRUNCATED_IMAGES = False except AttributeError as e: logging.warning('Are you using PILLOW and not a very old version of PIL? ' 'Unable to force load of truncated image files: %s', e) def testGetAndConvertOneImageFromLocalFile(self): # Test parallelization of a single image succeeds. testfile_path = os.path.join(self.testdata_dir, 'Googleplex-Patio-Aug-2014.JPG') expected_output_image_size = (100, 100) image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) output_image, status = parallelize.get_and_convert_image( testfile_path, image_convert_settings) del status # linter self.assertSameElements(output_image.size, expected_output_image_size) def testGetAndConvertOneImageMissingFile(self): # Returns None if can't open file. testfile_path = os.path.join(self.testdata_dir, 'file_doesnt_exist.png') image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) output_image, status = parallelize.get_and_convert_image( testfile_path, image_convert_settings) del status # linter self.assertEqual(output_image, None) def testGetAndConvertOneImageNotImageFile(self): # Returns None if can't open file. testfile_path = os.path.join(self.testdata_dir, 'attributions.txt') image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) output_image, status = parallelize.get_and_convert_image( testfile_path, image_convert_settings) del status # linter self.assertEqual(output_image, None) def testGetAndConvertTruncatedImageFail(self): # Should note fail but return None when PIL fails on truncated image. # To test image truncation, we actually need to write a file to disk. img_filepath = os.path.join(self.testdata_dir, 'test_img.png') try: orig_img = Image.new('RGBA', (500, 500)) orig_img.save(img_filepath) filesize = os.path.getsize(img_filepath) with open(img_filepath, 'r+') as img_on_disk: img_on_disk.truncate(filesize - 100) image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) output_image_with_status = parallelize.get_and_convert_image( img_filepath, image_convert_settings) self.assertIsNone(output_image_with_status[0]) self.assertTrue(output_image_with_status[1]) # Has error message. except: raise finally: # Cleanup. if os.path.isfile(img_filepath): os.remove(img_filepath) def testGetAndConvertAllowTruncatedImage(self): # Should return a converted image if we tolerate truncated images. # To test image truncation, we actually need to write a file to disk. img_filepath = os.path.join(self.testdata_dir, 'test_img.png') try: orig_img = Image.new('RGBA', (500, 500)) orig_img.save(img_filepath) filesize = os.path.getsize(img_filepath) with open(img_filepath, 'r+') as img_on_disk: img_on_disk.truncate(filesize - 100) image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) output_image_with_status = parallelize.get_and_convert_image( img_filepath, image_convert_settings, allow_truncated_images=True) self.assertEqual(output_image_with_status[0].size, (100, 100)) except: raise finally: # Cleanup. if os.path.isfile(img_filepath): os.remove(img_filepath) def testGetAndConvertOneImageBadUrl(self): # Returns None if can't open file. testfile_path = 'http://www.google.com' image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) output_image, status = parallelize.get_and_convert_image( testfile_path, image_convert_settings) del status # linter self.assertEqual(output_image, None) def testParallelizeConvertImagesFromLocalfile(self): # Test parallelization with multiple reads of the same image file produces # expected output images with correct sizes. testfile_path = os.path.join(self.testdata_dir, 'Googleplex-Patio-Aug-2014.JPG') testfile_locations = [testfile_path] * 3 expected_output_image_sizes = [(100, 100)] * 3 image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) output_imgs_with_status = parallelize.get_and_convert_images_parallel( testfile_locations, image_convert_settings, verbose=1) resulting_image_sizes = [] for (image, status) in output_imgs_with_status: del status # linter resulting_image_sizes.append(image.size) self.assertSameElements(resulting_image_sizes, expected_output_image_sizes) def testParallelizeConvertWithFailures(self): # 3 images are attempted, the last one should fail. testfile_path = os.path.join(self.testdata_dir, 'Googleplex-Patio-Aug-2014.JPG') bad_testfile_path = os.path.join(self.testdata_dir, 'attributions.txt') testfile_locations = [testfile_path, testfile_path, bad_testfile_path] expected_image_size = (100, 100) image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) output_images = parallelize.get_and_convert_images_parallel( testfile_locations, image_convert_settings, verbose=1) first_converted_img = output_images[0][0] second_converted_img = output_images[1][0] third_converted_img = output_images[2][0] self.assertSameElements(first_converted_img.size, expected_image_size) self.assertSameElements(second_converted_img.size, expected_image_size) self.assertEqual(third_converted_img, None) # Failed conversion. def testConvertDefaultImageSucceeds(self): testfile_path = os.path.join(self.testdata_dir, 'Googleplex-Patio-Aug-2014.JPG') image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) img = parallelize.convert_default_image(testfile_path, image_convert_settings) self.assertEqual(img.size, (100, 100)) def testConvertDefaultImageFails(self): testfile_path = os.path.join(self.testdata_dir, 'attributions.txt') image_convert_settings = convert.ImageConvertSettings( img_format='png', width=100, height=100) with self.assertRaises(IOError): parallelize.convert_default_image(testfile_path, image_convert_settings) if __name__ == '__main__': absltest.main() ================================================ FILE: facets_atlasmaker/pylintrc ================================================ [MASTER] # Specify a configuration file. #rcfile= # Python code to execute, usually for sys.path manipulation such as # pygtk.require(). #init-hook= # Add files or directories to the blacklist. They should be base names, not # paths. ignore=CVS # Pickle collected data for later comparisons. persistent=yes # List of plugins (as comma separated values of python modules names) to load, # usually to register additional checkers. load-plugins= # Use multiple processes to speed up Pylint. jobs=1 # Allow loading of arbitrary C extensions. Extensions are imported into the # active Python interpreter and may run arbitrary code. unsafe-load-any-extension=no # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code extension-pkg-whitelist= # Allow optimization of some AST trees. This will activate a peephole AST # optimizer, which will apply various small optimizations. For instance, it can # be used to obtain the result of joining multiple strings with the addition # operator. Joining a lot of strings can lead to a maximum recursion error in # Pylint and this flag can prevent that. It has one side effect, the resulting # AST will be different than the one from reality. optimize-ast=no [MESSAGES CONTROL] # Only show warnings with the listed confidence levels. Leave empty to show # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED confidence= # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option # multiple time (only on the command line, not in the configuration file where # it should appear only once). See also the "--disable" option for examples. #enable= # Disable the message, report, category or checker with the given id(s). You # can either give multiple identifiers separated by comma (,) or put this # option multiple times (only on the command line, not in the configuration # file where it should appear only once).You can also use "--disable=all" to # disable everything first and then reenable specific checks. For example, if # you want to run only the similarities checker, you can use "--disable=all # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" disable= # disabled by me, locally-disabled, missing-docstring, fixme, # disabled by default, import-star-module-level, old-octal-literal, oct-method, print-statement, unpacking-in-except, parameter-unpacking, backtick, old-raise-syntax, old-ne-operator, long-suffix, dict-view-method, dict-iter-method, metaclass-assignment, next-method-called, raising-string, indexing-exception, raw_input-builtin, long-builtin, file-builtin, execfile-builtin, coerce-builtin, cmp-builtin, buffer-builtin, basestring-builtin, apply-builtin, filter-builtin-not-iterating, using-cmp-argument, useless-suppression, range-builtin-not-iterating, suppressed-message, no-absolute-import, old-division, cmp-method, reload-builtin, zip-builtin-not-iterating, intern-builtin, unichr-builtin, reduce-builtin, standarderror-builtin, unicode-builtin, xrange-builtin, coerce-method, delslice-method, getslice-method, setslice-method, input-builtin, round-builtin, hex-method, nonzero-method, map-builtin-not-iterating, [REPORTS] # Set the output format. Available formats are text, parseable, colorized, msvs # (visual studio) and html. You can also give a reporter class, eg # mypackage.mymodule.MyReporterClass. output-format=text # Put messages in a separate file for each module / package specified on the # command line instead of printing them on stdout. Reports (if any) will be # written in a file name "pylint_global.[txt|html]". files-output=no # Tells whether to display a full report or only the messages reports=yes # Python expression which should return a note less than 10 (10 is the highest # note). You have access to the variables errors warning, statement which # respectively contain the number of errors / warnings messages and the total # number of statements analyzed. This is used by the global evaluation report # (RP0004). evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) # Template used to display messages. This is a python new-style format string # used to format the message information. See doc for all details #msg-template= [FORMAT] # Maximum number of characters on a single line. max-line-length=100 # Regexp for a line that is allowed to be longer than the limit. ignore-long-lines=^\s*(# )??$ # Allow the body of an if to be on the same line as the test if there is no # else. single-line-if-stmt=no # List of optional constructs for which whitespace checking is disabled. `dict- # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. # `trailing-comma` allows a space between comma and closing bracket: (a, ). # `empty-line` allows space-only lines. no-space-check=trailing-comma,dict-separator # Maximum number of lines in a module max-module-lines=1000 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 # tab). indent-string=' ' # Number of spaces of indent required inside a hanging or continued line. indent-after-paren=4 # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. expected-line-ending-format= [SPELLING] # Spelling dictionary name. Available dictionaries: none. To make it working # install python-enchant package. spelling-dict= # List of comma separated words that should not be checked. spelling-ignore-words= # A path to a file that contains private dictionary; one word per line. spelling-private-dict-file= # Tells whether to store unknown words to indicated private dictionary in # --spelling-private-dict-file option instead of raising a message. spelling-store-unknown-words=no [LOGGING] # Logging modules to check that the string format arguments are in logging # function parameter format logging-modules=logging [BASIC] # List of builtins function names that should not be used, separated by a comma bad-functions=map,filter,input # Good variable names which should always be accepted, separated by a comma good-names=i,e,s,_,fd,fp # Bad variable names which should always be refused, separated by a comma bad-names=foo,bar,baz,toto,tutu,tata # Colon-delimited sets of names that determine each other's naming style when # the name regexes allow several styles. name-group= # Include a hint for the correct naming format with invalid-name include-naming-hint=no # Regular expression matching correct function names # original: #function-rgx=[a-z_][a-z0-9_]{2,30}$ function-rgx=[a-zA-Z_][a-zA-Z0-9_]{2,40}$ # Naming hint for function names function-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression matching correct variable names variable-rgx=[a-z_][a-z0-9_]{2,30}$ # Naming hint for variable names variable-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression matching correct constant names # original: #const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ const-rgx=(([a-zA-Z_][a-zA-Z0-9_]*)|(__.*__))$ # Naming hint for constant names const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ # Regular expression matching correct attribute names attr-rgx=[a-z_][a-z0-9_]{2,30}$ # Naming hint for attribute names attr-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression matching correct argument names argument-rgx=[a-z_][a-z0-9_]{2,30}$ # Naming hint for argument names argument-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression matching correct class attribute names # original: #class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,40}|(__.*__))$ # Naming hint for class attribute names class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ # Regular expression matching correct inline iteration names inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ # Naming hint for inline iteration names inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ # Regular expression matching correct class names # original: #class-rgx=[A-Z_][a-zA-Z0-9]+$ class-rgx=[a-zA-Z_][a-zA-Z0-9]+$ # Naming hint for class names class-name-hint=[A-Z_][a-zA-Z0-9]+$ # Regular expression matching correct module names module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Naming hint for module names module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Regular expression matching correct method names # original: #method-rgx=[a-z_][a-z0-9_]{2,30}$ method-rgx=[a-zA-Z_][a-zA-Z0-9_]{2,40}$ # Naming hint for method names method-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match function or class names that do # not require a docstring. no-docstring-rgx=^_ # Minimum line length for functions/classes that require docstrings, shorter # ones are exempt. docstring-min-length=-1 [ELIF] # Maximum number of nested blocks for function / method body max-nested-blocks=5 [SIMILARITIES] # Minimum lines number of a similarity. min-similarity-lines=4 # Ignore comments when computing similarities. ignore-comments=yes # Ignore docstrings when computing similarities. ignore-docstrings=yes # Ignore imports when computing similarities. ignore-imports=no [TYPECHECK] # Tells whether missing members accessed in mixin class should be ignored. A # mixin class is detected if its name ends with "mixin" (case insensitive). ignore-mixin-members=yes # List of module names for which member attributes should not be checked # (useful for modules/projects where namespaces are manipulated during runtime # and thus existing member attributes cannot be deduced by static analysis. It # supports qualified module names, as well as Unix pattern matching. ignored-modules= # List of classes names for which member attributes should not be checked # (useful for classes with attributes dynamically set). This supports can work # with qualified names. ignored-classes= # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular # expressions are accepted. generated-members= [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. notes=FIXME,XXX,TODO [VARIABLES] # Tells whether we should check for unused import in __init__ files. init-import=no # A regular expression matching the name of dummy variables (i.e. expectedly # not used). dummy-variables-rgx=_$|dummy # List of additional names supposed to be defined in builtins. Remember that # you should avoid to define new builtins when possible. additional-builtins= # List of strings which can identify a callback function by name. A callback # name must start or end with one of those strings. callbacks=cb_,_cb [CLASSES] # List of method names used to declare (i.e. assign) instance attributes. defining-attr-methods=__init__,__new__,setUp # List of valid names for the first argument in a class method. valid-classmethod-first-arg=cls # List of valid names for the first argument in a metaclass class method. valid-metaclass-classmethod-first-arg=mcs # List of member names, which should be excluded from the protected access # warning. exclude-protected=_asdict,_fields,_replace,_source,_make [DESIGN] # Maximum number of arguments for function / method max-args=5 # Argument names that match this expression will be ignored. Default to name # with leading underscore ignored-argument-names=_.* # Maximum number of locals for function / method body max-locals=15 # Maximum number of return / yield for function / method body max-returns=6 # Maximum number of branch for function / method body max-branches=12 # Maximum number of statements in function / method body max-statements=50 # Maximum number of parents for a class (see R0901). max-parents=7 # Maximum number of attributes for a class (see R0902). max-attributes=7 # Minimum number of public methods for a class (see R0903). min-public-methods=2 # Maximum number of public methods for a class (see R0904). max-public-methods=20 # Maximum number of boolean expressions in a if statement max-bool-expr=5 [IMPORTS] # Deprecated modules which should not be used, separated by a comma deprecated-modules=regsub,TERMIOS,Bastion,rexec # Create a graph of every (i.e. internal and external) dependencies in the # given file (report RP0402 must not be disabled) import-graph= # Create a graph of external dependencies in the given file (report RP0402 must # not be disabled) ext-import-graph= # Create a graph of internal dependencies in the given file (report RP0402 must # not be disabled) int-import-graph= [EXCEPTIONS] # Exceptions that will emit a warning when being caught. Defaults to # "Exception" overgeneral-exceptions=Exception ================================================ FILE: facets_atlasmaker/requirements.txt ================================================ absl-py joblib mock nose pillow pylint requests ================================================ FILE: facets_atlasmaker/testdata/attributions.txt ================================================ https://commons.wikimedia.org/wiki/File:Googleplex-Patio-Aug-2014.JPG ================================================ FILE: facets_atlasmaker/testdata/testfiles_smalllist.csv ================================================ https://www.wikipedia/image1.png http://www.wordpress/testimage1.png http://www.npr.org/myimageA.jpg ================================================ FILE: facets_atlasmaker/testdata/testfiles_smalllist_with_dups.csv ================================================ https://www.wikipedia/image1.png http://www.wordpress/testimage1.png http://www.wordpress/testimage1.png http://www.npr.org/myimageA.jpg https://www.wikipedia/image1.png ================================================ FILE: facets_atlasmaker/testdata/wikipedia_images_16.csv ================================================ https://upload.wikimedia.org/wikipedia/commons/thumb/3/33/ARD-Hauptstadtstudio%2C_Berlin-Mitte%2C_Fassade%2C_170117%2C_ako.jpg/300px-ARD-Hauptstadtstudio%2C_Berlin-Mitte%2C_Fassade%2C_170117%2C_ako.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/3/3b/Marmot-edit1.jpg/300px-Marmot-edit1.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/1/19/Moscow_Gorky_Park_Pushkinsky_Bridge_08-2016_img3.jpg/300px-Moscow_Gorky_Park_Pushkinsky_Bridge_08-2016_img3.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/5/56/Bubo_bubo_sibiricus_-_01.JPG/300px-Bubo_bubo_sibiricus_-_01.JPG https://upload.wikimedia.org/wikipedia/commons/thumb/d/d1/M%C3%BCnster%2C_LVM_--_2017_--_6351-7.jpg/300px-M%C3%BCnster%2C_LVM_--_2017_--_6351-7.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/c/c6/2013_Porsche_911_Carrera_4S_%28991%29_%289626546987%29.jpg/300px-2013_Porsche_911_Carrera_4S_%28991%29_%289626546987%29.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/a/aa/NSB_Di_4_Saltfjell.jpg/300px-NSB_Di_4_Saltfjell.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/1/1b/Thai_Airways_International_Boeing_747-4D7_HS-TGP_MUC_2015_03.jpg/300px-Thai_Airways_International_Boeing_747-4D7_HS-TGP_MUC_2015_03.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/6/60/James_Webb_Space_Telescope_Mirrors_Will_Piece_Together_Cosmic_Puzzles_%2830108124923%29.jpg/300px-James_Webb_Space_Telescope_Mirrors_Will_Piece_Together_Cosmic_Puzzles_%2830108124923%29.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/4/4f/Shakedown_2008_Figure_1a.jpg/300px-Shakedown_2008_Figure_1a.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Groomed_skirun_from_Seceda_Val_Gardena.jpg/300px-Groomed_skirun_from_Seceda_Val_Gardena.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/f/f9/H-sovetskaa-11-5249.jpg/300px-H-sovetskaa-11-5249.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/5/59/Small_red_damselflies_%28Ceriagrion_tenellum%29_mating_female_typica.jpg/300px-Small_red_damselflies_%28Ceriagrion_tenellum%29_mating_female_typica.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/7/76/Ron-Zacapa-XO.jpg/300px-Ron-Zacapa-XO.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/0/0a/2015_Chinese_New_Year_Fashion_Show%2C_Sudirman_Street%2C_Yogyakarta%2C_2015-02-15_02.jpg/300px-2015_Chinese_New_Year_Fashion_Show%2C_Sudirman_Street%2C_Yogyakarta%2C_2015-02-15_02.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/c/ce/Canis_latrans_%28Yosemite%2C_2009%29.jpg/300px-Canis_latrans_%28Yosemite%2C_2009%29.jpg ================================================ FILE: facets_atlasmaker/utils/BUILD ================================================ licenses(["notice"]) # Apache 2.0 py_binary( name = "wikipedia_sourcelist_generator", srcs = ["wikipedia_sourcelist_generator.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], ) ================================================ FILE: facets_atlasmaker/utils/README.md ================================================ # Atlasmaker Utilities This directory contains utilities that can be useful for testing or experimenting with Atlasmaker. ## Wikipedia Sourcelist Generator The script connects to the Wikipedia API to get a list of URLs of Featured Images that you can use as input to Atlasmaker. Example usage for getting a list of 1000 images with debug messages printed to stdout: ```sh bazel run :wikipedia_sourcelist_generator -- --num_images=1000 --outputdir=$PWD --verbosity=1 ``` ================================================ FILE: facets_atlasmaker/utils/wikipedia_sourcelist_generator.py ================================================ """Script that grabs URLs of the desired N featured images from wikipedia. This list can then be used as a test input to Atlasmaker. Alternately, you can also specify images from a different category. """ import csv import os from absl import app from absl import flags from absl import logging import requests BASE_API_URL = 'https://commons.wikimedia.org/w/api.php' FLAGS = flags.FLAGS flags.DEFINE_integer('num_images', None, 'Number of image URLs you want to retrieve from ' 'Wikipedia\'s featured images category.') flags.DEFINE_string('outputdir', None, 'Output directory where CSV will be written.') flags.DEFINE_string('category', 'Featured pictures on Wikimedia Commons', 'Wikipedia category name.') flags.DEFINE_string('filename', 'wikipedia_images_list.csv', 'Name of output csv file') flags.mark_flag_as_required('num_images') flags.mark_flag_as_required('outputdir') def _get_pageids_for_category(category, limit=500, cmcontinue=None): """Returns a list of pageids for a specified category. See API docs: https://www.mediawiki.org/wiki/API:Categorymembers Args: category: Is the wikipedia category name as a string. limit: max number of pageids returned in response. Wikipedia by default limits us to 500. cmcontinue: continue hash used to request the next set of pages. Returns: List of pageids. """ # Replace any whitespaces with underscores per wikipedia's URL patterns. category_url_param = 'Category:%s' % str(category).replace(' ', '_') # Set query params. payload = {'action': 'query', 'list': 'categorymembers', 'cmprop': 'ids', 'cmlimit': limit, 'cmtype': 'file', 'cmtitle': category_url_param, 'format': 'json'} if cmcontinue is not None: payload['cmcontinue'] = cmcontinue r = requests.get(BASE_API_URL, params=payload) logging.debug('Connecting to url: %s' % r.url) response_data = r.json() try: cmcontinue = response_data['continue']['cmcontinue'] except KeyError: # Reached the end! cmcontinue = None pages_list = response_data['query']['categorymembers'] pageids = [] for element in pages_list: pageids.append(element['pageid']) return pageids, cmcontinue def _get_image_urls_for_pageids(page_ids): """Return a list of image urls associated with the given page ids. Note that the limit per request by default via the API is 50. Args: pageids: List of wikipedia page ids. """ # Concat page ids into a single string page_ids_concat = '|'.join([str(x) for x in page_ids]) image_urls = [] # Query params. payload = {'action': 'query', 'iiprop': 'url', 'prop': 'imageinfo', 'format': 'json', 'pageids': page_ids_concat} r = requests.get(BASE_API_URL, params=payload) logging.debug('Connecting to url: %s' % r.url) response_data = r.json() try: pages_dict = response_data['query']['pages'] except KeyError: raise KeyError('Unable to find the required elements when retrieving ' 'image URLs from response. See message: %s' % response_data) for pageid_key in pages_dict: images = pages_dict[pageid_key]['imageinfo'] for imageinfo_obj in images: image_urls.append(imageinfo_obj['url']) return image_urls def _chunk_page_ids(page_ids, chunk_size=50): """ Returns a list of lists to limit page ids being queried to specified size. :param chunk_size: :param page_ids: List of pageids :return: """ if not page_ids: return [] page_id_chunks = [] while len(page_ids) > chunk_size: page_id_chunks.append(page_ids[0:chunk_size]) page_ids = page_ids[chunk_size:] page_id_chunks.append(page_ids) return page_id_chunks def get_images_list(category, num_images_desired=100, categories_pageids_request_limit=100, images_pageids_request_limit=50): """ :param category: :param num_images_desired: Max number of images desired for return. :param categories_pageids_request_limit: :param images_pageids_request_limit: :return: """ pages_searched = 0 image_urls = [] pageids, cmcontinue = _get_pageids_for_category( category, categories_pageids_request_limit) logging.debug('Retrieved %d pageids from initial request.' % len(pageids)) for chunk in _chunk_page_ids(pageids, chunk_size=images_pageids_request_limit): pages_searched += len(chunk) image_urls.extend(_get_image_urls_for_pageids(chunk)) logging.debug('Found a total of %d image urls.' % len(image_urls)) if len(image_urls) > num_images_desired: logging.info('Traversed %d pages and returned the desired count of ' '%d image urls.' % (pages_searched, len(image_urls))) return image_urls[0:num_images_desired] while cmcontinue and len(image_urls) < num_images_desired: pageids, cmcontinue = _get_pageids_for_category( category, categories_pageids_request_limit, cmcontinue) for chunk in _chunk_page_ids(pageids, chunk_size=images_pageids_request_limit): pages_searched += len(chunk) image_urls.extend(_get_image_urls_for_pageids(chunk)) if len(image_urls) >= num_images_desired: logging.info('Traversed %d pages and returned the desired count of ' '%d image urls.' % (pages_searched, len(image_urls))) return image_urls[0:num_images_desired] if len(image_urls) < num_images_desired: logging.info('Unable to get the desired number of urls. Traversed %d page ' 'ids but only able to return %d URLs' % (pages_searched, len(image_urls))) return image_urls def write_to_csv(urls, outputfile): """ :param urls: List of image urls :param outputfile: path to outputfile. :return: """ with open(outputfile, 'w') as csvfile: csvwriter = csv.writer(csvfile) for url in urls: csvwriter.writerow([url]) def main(argv): del argv # Unused. image_urls = get_images_list(FLAGS.category, FLAGS.num_images) write_to_csv(image_urls, os.path.join(FLAGS.outputdir, FLAGS.filename)) if __name__ == '__main__': app.run(main) ================================================ FILE: facets_atlasmaker/utils/wikipedia_sourcelist_generator_test.py ================================================ """Unit tests for Wikipedia Sourcelist Generator.""" from absl import flags from absl.testing import absltest from absl.testing import flagsaver import wikipedia_sourcelist_generator as scraper FLAGS = flags.FLAGS class WikipediaSourcelistGeneratorTests(absltest.TestCase): def testChunkPageIds(self): results = scraper._chunk_page_ids([1, 2, 3, 4, 5, 6, 7], chunk_size=3) self.assertSameElements(results, [[1, 2, 3], [4, 5, 6], [7]]) def testChunkPageIdsEmptyList(self): results = scraper._chunk_page_ids([], chunk_size=3) self.assertEqual(results, []) if __name__ == '__main__': # Pass flags to pass initial required flags check. FLAGS.num_images = 10 FLAGS.outputdir = 'somedir' absltest.main() ================================================ FILE: facets_dive/Dive_demo.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Load UCI census and convert to json for sending to the visualization\n", "import pandas as pd\n", "features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n", " \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n", " \"Hours per week\", \"Country\", \"Target\"]\n", "\n", "# Load dataframe from external CSV and add header information\n", "df = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", " names=features, # name features for header row\n", " sep=r'\\s*,\\s*', # separator used in this dataset\n", " engine='python',\n", " skiprows=[0], # skip first row without data \n", " na_values=\"?\") # add ? where data is missing\n", "\n", "# set the sprite_size based on the number of records in dataset,\n", "# larger datasets can crash the browser if the size is too large (>50000)\n", "sprite_size = 32 if len(df.index)>50000 else 64\n", "\n", "jsonstr = df.to_json(orient='records')\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "# Display the Dive visualization for this data\n", "from IPython.core.display import display, HTML\n", "\n", "# Create Facets template \n", "HTML_TEMPLATE = \"\"\"\n", " \n", " \n", " \n", " \"\"\"\n", "\n", "# Load the json dataset and the sprite_size into the template\n", "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n", "\n", "# Display the template\n", "display(HTML(html))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.16" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: facets_dive/README.md ================================================ Facets Dive is a data visualization for interactively exploring large numbers of records at once—many thousands at a time. Each record should be an object with key/value pairs representing the features of that record, and the values should be strings or numbers. ## Getting Started In this section, you'll learn how to use Dive embedded in your on page or app. The two things you need are your own data, and the Dive Polymer element. ### Providing Data to Dive The `` element has many attributes you can set to customize its behavior, but the only one you absolutely must set is `data`. This should be an array of JavaScript objects, where each object represents a single record. For example, say your data is a list of food items. Each food has a unique name, belongs to a category, and provides calories. As JSON, your data would look something like this: ```js [{ "name": "apple", "category": "fruit", "calories": 95 },{ "name": "broccoli", "category": "vegetable", "calories": 50 },{ ...Many more foods... }] ``` The objects don't all need to have exactly the same set of keys. If an object is missing keys that are present in another object, that record will still be shown in Dive. At this time, Dive only handles numeric and string values. If the values on your objects are complex (like arrays, or nested objects), these will be cast as strings prior to being visualized. ### Providing Sprites For Dive to Render By default, Dive will render text onto a circle to represent each data point. However, you can supply a sprite atlas that it can use instead. We have provided a utility called *facets atlasmaker* that can be used for creating your own sprite atlas, given a list of image locations. Please see its [documentation](../facets_atlasmaker/) for more details. A sprite atlas is one big image containing many tiny images at predictable coordinates. Starting from the top-left hand corner of the image, sprites proceed across and down, from left-to-right and from top-to-bottom. For example, consider a data set with 10,000 data points. Indexed from zero, they'd be arranged in a sprite atlas like so: ``` +---------+---------+---------+- - - - -+---------+ | | | | | | | 0 | 1 | 2 | ... | 99 | | | | | | | +---------+---------+---------+- - - - -+---------+ | | | | | | | 100 | 101 | 102 | ... | 199 | | | | | | | +---------+---------+---------+- - - - -+---------+ | | | | | | | 200 | 201 | 202 | ... | 299 | | | | | | | +---------+---------+---------+- - - - -+---------+ | | | | | | . . . . . | . | . | . | . | . | . . . . . | | | | | | +---------+---------+---------+- - - - -+---------+ | | | | | | | 9900 | 9901 | 9902 | ... | 9999 | | | | | | | +---------+---------+---------+- - - - -+---------+ ``` To specify the URL to an atlas to use, set the `atlasUrl` property of the Dive Polymer Element in JavaScript (or the `atlas-url` attribute in HTML). If the atlas image is served from a different domain than the visualization, it will have to use [CORS headers](https://developer.mozilla.org/en-US/docs/Web/API/WebGL_API/Tutorial/Using_textures_in_WebGL#Cross-domain_textures) to be useful. In that case, you'll also have to set the `crossOrigin` property (or `cross-origin` HTML attribute) to be either `anonymous` or `use-credentials` just like you would for an `` or `