Repository: deepmind/interval-bound-propagation Branch: master Commit: 217a14d12686 Files: 42 Total size: 391.2 KB Directory structure: gitextract_zmfehsxp/ ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── examples/ │ ├── eval.py │ ├── language/ │ │ ├── README.md │ │ ├── config.py │ │ ├── data/ │ │ │ ├── character_substitution_enkey_sub1.json │ │ │ ├── sst_binary_character_vocabulary_sorted.txt │ │ │ └── sst_binary_character_vocabulary_sorted_pad.txt │ │ ├── exhaustive_verification.py │ │ ├── interactive_example.py │ │ ├── models.py │ │ ├── robust_model.py │ │ ├── robust_train.py │ │ └── utils.py │ └── train.py ├── interval_bound_propagation/ │ ├── __init__.py │ ├── src/ │ │ ├── __init__.py │ │ ├── attacks.py │ │ ├── bounds.py │ │ ├── crown.py │ │ ├── fastlin.py │ │ ├── layer_utils.py │ │ ├── layers.py │ │ ├── loss.py │ │ ├── model.py │ │ ├── relative_bounds.py │ │ ├── simplex_bounds.py │ │ ├── specification.py │ │ ├── utils.py │ │ └── verifiable_wrapper.py │ └── tests/ │ ├── attacks_test.py │ ├── bounds_test.py │ ├── crown_test.py │ ├── fastlin_test.py │ ├── layers_test.py │ ├── loss_test.py │ ├── model_test.py │ ├── relative_bounds_test.py │ ├── simplex_bounds_test.py │ └── specification_test.py └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: CONTRIBUTING.md ================================================ # How to Contribute We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to follow. ## Contributor License Agreement Contributions to this project must be accompanied by a Contributor License Agreement. You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project. Head over to to see your current agreements on file or to sign a new one. You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again. ## Code reviews All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. ## Community Guidelines This project follows [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Interval Bound Propagation for Training Verifiably Robust Models This repository contains a simple implementation of Interval Bound Propagation (IBP) using TensorFlow: [https://arxiv.org/abs/1810.12715](https://arxiv.org/abs/1810.12715). It also contains an implementation of CROWN-IBP: [https://arxiv.org/abs/1906.06316](https://arxiv.org/abs/1906.06316). It also contains a sentiment analysis example under [`examples/language`](https://github.com/deepmind/interval-bound-propagation/tree/master/examples/language) for [https://arxiv.org/abs/1909.01492](https://arxiv.org/abs/1909.01492). This is not an official Google product ## Installation IBP can be installed with the following command: ```bash pip install git+https://github.com/deepmind/interval-bound-propagation ``` IBP will work with both the CPU and GPU version of tensorflow and dm-sonnet, but to allow for that it does not list Tensorflow as a requirement, so you need to install Tensorflow and Sonnet separately if you haven't already done so. ## Usage The following command trains a small model on MNIST with epsilon set to 0.3: ```bash cd examples python train.py --model=small --output_dir=/tmp/small_model ``` ## Pretrained Models Models trained using IBP and CROWN-IBP can be downloaded [here](https://drive.google.com/open?id=1lovI-fUabgs3swMgIe7MLRvHB9KtjzNT). ### IBP models: | Dataset | Test epsilon | Model path | Clean accuracy | Verified accuracy | Accuracy under attack | |----------|--------------|----------------------------|----------------|-------------------|-----------------------| | MNIST | 0.1 | ibp/mnist_0.2_medium | 98.94% | 97.08% | 97.99% | | MNIST | 0.2 | ibp/mnist_0.4_large_200 | 98.34% | 95.47% | 97.06% | | MNIST | 0.3 | ibp/mnist_0.4_large_200 | 98.34% | 91.79% | 96.03% | | MNIST | 0.4 | ibp/mnist_0.4_large_200 | 98.34% | 84.99% | 94.56% | | CIFAR-10 | 2/255 | ibp/cifar_2-255_large_200 | 70.21% | 44.12% | 56.53% | | CIFAR-10 | 8/255 | ibp/cifar_8-255_large | 49.49% | 31.56% | 39.53% | ### CROWN-IBP models: | Dataset | Test epsilon | Model path | Clean accuracy | Verified accuracy | Accuracy under attack | |----------|--------------|------------------------------|----------------|-------------------|-----------------------| | MNIST | 0.1 | crown-ibp/mnist_0.2_large | 99.03% | 97.75% | 98.34% | | MNIST | 0.2 | crown-ibp/mnist_0.4_large | 98.38% | 96.13% | 97.28% | | MNIST | 0.3 | crown-ibp/mnist_0.4_large | 98.38% | 93.32% | 96.38% | | MNIST | 0.4 | crown-ibp/mnist_0.4_large | 98.38% | 87.51% | 94.95% | | CIFAR-10 | 2/255 | crown-ibp/cifar_2-255_large | 71.52% | 53.97% | 59.72% | | CIFAR-10 | 8/255 | crown-ibp/cifar_8-255_large | 47.14% | 33.30% | 36.81% | | CIFAR-10 | 16/255 | crown-ibp/cifar_16-255_large | 34.19% | 23.08% | 26.55% | In these tables, we evaluated the verified accuracy using IBP only. We evaluted the accuracy under attack using a 20-step untargeted PGD attack. You can evaluate these models yourself using `eval.py`, for example: ```bash cd examples python eval.py --model_dir pretrained_models/ibp/mnist_0.4_large_200/ \ --epsilon 0.3 ``` Note that we evaluated the CIFAR-10 2/255 CROWN-IBP model using CROWN-IBP (instead of pure IBP). You can do so yourself by setting the flag `--bound_method=crown-ibp`: ```bash python eval.py --model_dir pretrained_models/crown-ibp/cifar_2-255_large/ \ --epsilon 0.00784313725490196 --bound_method=crown-ibp ``` ## Giving credit If you use this code in your work, we ask that you cite this paper: Sven Gowal, Krishnamurthy Dvijotham, Robert Stanforth, Rudy Bunel, Chongli Qin, Jonathan Uesato, Relja Arandjelovic, Timothy Mann, and Pushmeet Kohli. "On the Effectiveness of Interval Bound Propagation for Training Verifiably Robust Models." _arXiv preprint arXiv:1810.12715 (2018)_. If you use CROWN-IBP, we also ask that you cite: Huan Zhang, Hongge Chen, Chaowei Xiao, Sven Gowal, Robert Stanforth, Bo Li, Duane Boning, Cho-Jui Hsieh. "Towards Stable and Efficient Training of Verifiably Robust Neural Networks." _arXiv preprint arXiv:1906.06316 (2019)_. If you use the sentiment analysis example, please cite: Po-Sen Huang, Robert Stanforth, Johannes Welbl, Chris Dyer, Dani Yogatama, Sven Gowal, Krishnamurthy Dvijotham, Pushmeet Kohli. "Achieving Verified Robustness to Symbol Substitutions via Interval Bound Propagation." _EMNLP 2019_. ## Acknowledgements In addition to the people involved in the original IBP publication, we would like to thank Huan Zhang, Sumanth Dathathri and Johannes Welbl for their contributions. ================================================ FILE: examples/eval.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Evaluates a verifiable model on Mnist or CIFAR-10.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl import app from absl import flags from absl import logging import interval_bound_propagation as ibp import tensorflow.compat.v1 as tf FLAGS = flags.FLAGS flags.DEFINE_enum('dataset', 'auto', ['auto', 'mnist', 'cifar10'], 'Dataset ' '("auto", "mnist" or "cifar10"). When set to "auto", ' 'the dataset is inferred from the model directory path.') flags.DEFINE_enum('model', 'auto', ['auto', 'tiny', 'small', 'medium', 'large_200', 'large'], 'Model size. ' 'When set to "auto", the model name is inferred from the ' 'model directory path.') flags.DEFINE_string('model_dir', None, 'Model checkpoint directory.') flags.DEFINE_enum('bound_method', 'ibp', ['ibp', 'crown-ibp'], 'Bound progataion method. For models trained with CROWN-IBP ' 'and beta_final=1 (e.g., CIFAR 2/255), use "crown-ibp". ' 'Otherwise use "ibp".') flags.DEFINE_integer('batch_size', 200, 'Batch size.') flags.DEFINE_float('epsilon', .3, 'Target epsilon.') def layers(model_size): """Returns the layer specification for a given model name.""" if model_size == 'tiny': return ( ('linear', 100), ('activation', 'relu')) elif model_size == 'small': return ( ('conv2d', (4, 4), 16, 'VALID', 2), ('activation', 'relu'), ('conv2d', (4, 4), 32, 'VALID', 1), ('activation', 'relu'), ('linear', 100), ('activation', 'relu')) elif model_size == 'medium': return ( ('conv2d', (3, 3), 32, 'VALID', 1), ('activation', 'relu'), ('conv2d', (4, 4), 32, 'VALID', 2), ('activation', 'relu'), ('conv2d', (3, 3), 64, 'VALID', 1), ('activation', 'relu'), ('conv2d', (4, 4), 64, 'VALID', 2), ('activation', 'relu'), ('linear', 512), ('activation', 'relu'), ('linear', 512), ('activation', 'relu')) elif model_size == 'large_200': # Some old large checkpoints have 200 hidden neurons in the last linear # layer. return ( ('conv2d', (3, 3), 64, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 64, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 2), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1), ('activation', 'relu'), ('linear', 200), ('activation', 'relu')) elif model_size == 'large': return ( ('conv2d', (3, 3), 64, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 64, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 2), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1), ('activation', 'relu'), ('linear', 512), ('activation', 'relu')) else: raise ValueError('Unknown model: "{}"'.format(model_size)) def show_metrics(metric_values, bound_method='ibp'): if bound_method == 'crown-ibp': verified_accuracy = metric_values.crown_ibp_verified_accuracy else: verified_accuracy = metric_values.verified_accuracy print('nominal accuracy = {:.2f}%, ' 'verified accuracy = {:.2f}%, ' 'accuracy under PGD attack = {:.2f}%'.format( metric_values.nominal_accuracy * 100., verified_accuracy* 100., metric_values.attack_accuracy * 100.)) def main(unused_args): dataset = FLAGS.dataset if FLAGS.dataset == 'auto': if 'mnist' in FLAGS.model_dir: dataset = 'mnist' elif 'cifar' in FLAGS.model_dir: dataset = 'cifar10' else: raise ValueError('Cannot guess the dataset name. Please specify ' '--dataset manually.') model_name = FLAGS.model if FLAGS.model == 'auto': model_names = ['large_200', 'large', 'medium', 'small', 'tiny'] for name in model_names: if name in FLAGS.model_dir: model_name = name logging.info('Using guessed model name "%s".', model_name) break if model_name == 'auto': raise ValueError('Cannot guess the model name. Please specify --model ' 'manually.') checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir) if checkpoint_path is None: raise OSError('Cannot find a valid checkpoint in {}.'.format( FLAGS.model_dir)) # Dataset. input_bounds = (0., 1.) num_classes = 10 if dataset == 'mnist': data_train, data_test = tf.keras.datasets.mnist.load_data() else: assert dataset == 'cifar10', ( 'Unknown dataset "{}"'.format(dataset)) data_train, data_test = tf.keras.datasets.cifar10.load_data() data_train = (data_train[0], data_train[1].flatten()) data_test = (data_test[0], data_test[1].flatten()) # Base predictor network. original_predictor = ibp.DNN(num_classes, layers(model_name)) predictor = original_predictor if dataset == 'cifar10': mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1994, 0.2010) predictor = ibp.add_image_normalization(original_predictor, mean, std) if FLAGS.bound_method == 'crown-ibp': predictor = ibp.crown.VerifiableModelWrapper(predictor) else: predictor = ibp.VerifiableModelWrapper(predictor) # Test using while loop. def get_test_metrics(batch_size, attack_builder=ibp.UntargetedPGDAttack): """Returns the test metrics.""" num_test_batches = len(data_test[0]) // batch_size assert len(data_test[0]) % batch_size == 0, ( 'Test data is not a multiple of batch size.') def cond(i, *unused_args): return i < num_test_batches def body(i, metrics): """Compute the sum of all metrics.""" test_data = ibp.build_dataset(data_test, batch_size=batch_size, sequential=True) predictor(test_data.image, override=True, is_training=False) input_interval_bounds = ibp.IntervalBounds( tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]), tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1])) predictor.propagate_bounds(input_interval_bounds) test_specification = ibp.ClassificationSpecification( test_data.label, num_classes) test_attack = attack_builder(predictor, test_specification, FLAGS.epsilon, input_bounds=input_bounds, optimizer_builder=ibp.UnrolledAdam) # Use CROWN-IBP bound or IBP bound. if FLAGS.bound_method == 'crown-ibp': test_losses = ibp.crown.Losses(predictor, test_specification, test_attack, use_crown_ibp=True, crown_bound_schedule=tf.constant(1.)) else: test_losses = ibp.Losses(predictor, test_specification, test_attack) test_losses(test_data.label) new_metrics = [] for m, n in zip(metrics, test_losses.scalar_metrics): new_metrics.append(m + n) return i + 1, new_metrics if FLAGS.bound_method == 'crown-ibp': metrics = ibp.crown.ScalarMetrics else: metrics = ibp.ScalarMetrics total_count = tf.constant(0, dtype=tf.int32) total_metrics = [tf.constant(0, dtype=tf.float32) for _ in range(len(metrics._fields))] total_count, total_metrics = tf.while_loop( cond, body, loop_vars=[total_count, total_metrics], back_prop=False, parallel_iterations=1) total_count = tf.cast(total_count, tf.float32) test_metrics = [] for m in total_metrics: test_metrics.append(m / total_count) return metrics(*test_metrics) test_metrics = get_test_metrics( FLAGS.batch_size, ibp.UntargetedPGDAttack) # Prepare to load the pretrained-model. saver = tf.compat.v1.train.Saver(original_predictor.get_variables()) # Run everything. tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.train.SingularMonitoredSession(config=tf_config) as sess: logging.info('Restoring from checkpoint "%s".', checkpoint_path) saver.restore(sess, checkpoint_path) logging.info('Evaluating at epsilon = %f.', FLAGS.epsilon) metric_values = sess.run(test_metrics) show_metrics(metric_values, FLAGS.bound_method) if __name__ == '__main__': flags.mark_flag_as_required('model_dir') app.run(main) ================================================ FILE: examples/language/README.md ================================================ # Achieving Verified Robustness to Symbol Substitutions via Interval Bound Propagation Here contains an implementation of [Achieving Verified Robustness to Symbol Substitutions via Interval Bound Propagation](https://arxiv.org/abs/1909.01492). ## Installation The installation can be done with the following commands: ```bash pip3 install "tensorflow-gpu<2" "dm-sonnet<2" "tensorflow-probability==0.7.0" "tensorflow-datasets" "absl-py" pip3 install git+https://github.com/deepmind/interval-bound-propagation ``` ## Usage The following command reproduces the [SST](https://nlp.stanford.edu/sentiment/) character level experiments using perturbation radius of 3: ```bash cd examples/language python3 robust_train.py ``` You should expect to see the following at the end of training (note we only use SST dev set only for evaluation here). ```bash step: 149900, train loss: 0.392112, verifiable train loss: 0.826042, train accuracy: 0.850000, dev accuracy: 0.747619, test accuracy: 0.747619, Train Bound = -0.42432, train verified: 0.800, dev verified: 0.695, test verified: 0.695 best dev acc 0.780952 best test acc 0.780952 best verified dev acc 0.716667 best verified test acc 0.716667 ``` We can verify the model in `config['model_location']='/tmp/robust_model/checkpoint/final'` using IBP. For example, after changing `config['delta']=1.`, we can evaluate the IBP verified accuracy with perturbation radius of 1: ```bash python3 robust_train.py --analysis --batch_size=1 ``` We expect to see results like the following: ```bash test final correct: 0.748, verified: 0.722 {'datasplit': 'test', 'nominal': 0.7477064220183486, 'verify': 0.7224770642201835, 'delta': 1.0, 'num_perturbations': 268, 'model_location': '/tmp/robust_model/checkpoint/final', 'final': True} ``` We can also exhaustively search all valid perturbations to exhaustively verify the models. ```bash python3 exhaustive_verification.py --num_examples=0 ``` We should expect the following results ```bash verified_proportion: 0.7350917431192661 {'delta': 1, 'character_level': True, 'mode': 'validation', 'checkpoint_path': '/tmp/robust_model/checkpoint/final', 'verified_proportion': 0.7350917431192661} ``` The IBP verified accuracy ` 0.7224770642201835` is a lower bound of the exhaustive verification results, `0.7350917431192661`. Furthermore, we can also align the predictions between the IBP verification and exhaustive verification. There should not be cases where IBP can verify (no attack can change the predictions) and exhaustive verification cannot verify (there exist an attack that can change the predictions), since IBP provides a lower bound on the true robustness accuracy (via exhaustive search). ## Reference If you use this code in your work, please cite the accompanying paper: ``` @inproceedings{huang-2019-achieving, title = "Achieving Verified Robustness to Symbol Substitutions via Interval Bound Propagation", author = "Po-Sen Huang and Robert Stanforth and Johannes Welbl and Chris Dyer and Dani Yogatama and Sven Gowal and Krishnamurthy Dvijotham and Pushmeet Kohli", booktitle = "Empirical Methods in Natural Language Processing (EMNLP)", year = "2019", pages = "4081--4091", } ``` ## Disclaimer This is not an official Google product. ================================================ FILE: examples/language/config.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Configuration parameters for sentence representation models.""" def get_config(): """Returns the default configuration as a dict.""" config = {} config['dataset'] = 'sst' # Convolutional architecture. # Format: Tuple/List for a Conv layer (filters, kernel_size, pooling_size) # Otherwise, nonlinearity. config['conv_architecture'] = ((100, 5, 1), 'relu') # Fully connected layer 1 hidden sizes (0 means no layer). config['conv_fc1'] = 0 # Fully connected layer 2 hidden sizes (0 means no layer). config['conv_fc2'] = 0 # Number of allowable perturbations. # (delta specifies the budget, i.e., how many may be used at once.) config['delta'] = 3.0 # Allow each character to be changed to another character. config['synonym_filepath'] = 'data/character_substitution_enkey_sub1.json' config['max_padded_length'] = 268 # (~1*268) Max num_perturbations. # seqlen * max_number_synonyms (total number of elementary perturbations) config['num_perturbations'] = 268 config['vocab_filename'] = 'data/sst_binary_character_vocabulary_sorted.txt' # Need to add pad for analysis (which is what is used after # utils.get_merged_vocabulary_file). config['vocab_filename_pad'] = ( 'data/sst_binary_character_vocabulary_sorted_pad.txt') config['embedding_dim'] = 150 config['delta_schedule'] = True config['verifiable_loss_schedule'] = True # Ratio between the task loss and verifiable loss. config['verifiable_loss_ratio'] = 0.75 # Aggregrated loss of the verifiable training objective # (among softmax, mean, max). config['verifiable_training_aggregation'] = 'softmax' config['data_id'] = 1 config['model_location'] = '/tmp/robust_model/checkpoint/final' return config ================================================ FILE: examples/language/data/character_substitution_enkey_sub1.json ================================================ {"z": ["x"], "y": ["t"], "x": ["s"], "w": ["d"], "v": ["c"], "u": ["8"], "t": ["f"], "s": ["e"], "r": ["g"], "q": ["s"], "p": [";"], "o": ["k"], "n": ["m"], "m": ["j"], "l": ["p"], "k": ["."], "j": ["i"], "i": ["u"], "h": ["n"], "g": ["v"], "f": ["c"], "e": ["r"], "d": ["f"], "c": ["d"], "b": ["g"], "a": ["x"]} ================================================ FILE: examples/language/data/sst_binary_character_vocabulary_sorted.txt ================================================ ! # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; = ? ` a b c d e f g h i j k l m n o p q r s t u v w x y z ================================================ FILE: examples/language/data/sst_binary_character_vocabulary_sorted_pad.txt ================================================ ! # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; = ? ` a b c d e f g h i j k l m n o p q r s t u v w x y z ================================================ FILE: examples/language/exhaustive_verification.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Functionality for exhaustive adversarial attacks on synonym perturbations. Models restored from checkpoint can be tested w.r.t their robustness to exhaustive-search adversaries, which have a fixed perturbation budget with which they can flip words to synonyms. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import copy import imp import json import pprint from absl import app from absl import flags from absl import logging import numpy as np import tensorflow.compat.v1 as tf import tensorflow_datasets as tfds import tqdm import interactive_example flags.DEFINE_boolean('character_level', True, 'Character level model.') flags.DEFINE_boolean('debug_mode', False, 'Debug mode.') flags.DEFINE_string('checkpoint_path', '/tmp/robust_model/checkpoint/final', 'Checkpoint path.') flags.DEFINE_string('dataset', 'sst', 'Dataset name. train, dev, or test.') flags.DEFINE_string('mode', 'validation', 'Dataset part. train, dev, or test.') flags.DEFINE_string('config_path', './config.py', 'Path to training configuration file.') flags.DEFINE_string('task', 'sst', 'One of snli, mnli, sick, sst.') flags.DEFINE_integer('batch_size', 30, 'Batch size.') flags.DEFINE_string('pooling', 'average', 'One of averge, sum, max, last.') flags.DEFINE_boolean('fine_tune_embeddings', True, 'Finetune embeddings.') flags.DEFINE_integer('num_oov_buckets', 1, 'Number of out-of-vocab buckets.') flags.DEFINE_integer('delta', 1, 'Maximum perturbation radius') flags.DEFINE_integer('skip_batches', 0, 'Skip this number of batches' ' for analysis.') flags.DEFINE_integer('num_examples', 100, 'Analyze this number of examples. ' ' 0 suggest the whole dataset.') flags.DEFINE_integer('truncated_len', 0, 'truncated sentence length. ' ' 0 suggest the whole sentence.') flags.DEFINE_integer('max_padded_length', 0, 'max_padded_length. ' ' 0 suggest no change.') flags.DEFINE_integer('num_perturbations', 0, 'num_perturbations. ' ' 0 suggest no change.') FLAGS = flags.FLAGS def load_synonyms(synonym_filepath=None): """Loads synonym dictionary. Returns as defaultdict(list).""" with tf.gfile.Open(synonym_filepath) as f: synonyms = json.load(f) synonyms_ = collections.defaultdict(list) synonyms_.update(synonyms) return synonyms_ def load_dataset(mode='validation', character_level=False): """Loads SST dataset. Takes data from disk/cns if it exists, otherwise out of tensorflow graph. Args: mode: string. Either train, dev, or test. character_level: bool. Whether to return character-level, or token level inputs. Returns: List of (input, output) pairs, where input is a list of strings (tokens), and output is an integer (categorical label in [0,1]). """ message = 'Loading SST {}, character_level {}'.format(mode, str(character_level)) logging.info(message) dataset = tfds.load(name='glue/sst2', split=mode) minibatch = dataset.batch(1).make_one_shot_iterator().get_next() label_list, input_list = [], [] with tf.train.SingularMonitoredSession() as session: while True: output_nodes = (minibatch['label'], minibatch['sentence']) label, sentence = session.run(output_nodes) label_list.append(label[0]) input_list.append([chr(i) for i in sentence[0]]) # zip together. dataset = [(in_, out_) for (in_, out_) in zip(input_list, label_list)] return dataset def expand_by_one_perturbation(original_tokenized_sentence, tokenized_sentence, synonym_dict): """Expands given sentence by all possible synonyms. Note that only a single synonym replacement is applied, and it is applied everywhere, i.e. for every mention of the word with the synonym. Args: original_tokenized_sentence: List[str]. List of tokens. tokenized_sentence: List[str]. List of tokens. synonym_dict: dict, mapping words (str) to lists of synonyms (list of str) Returns: new_sentences_list: List[List[str]]. Outer list is across different synonym replacements. Inner list is over (str) tokens. """ new_sentences_list = [] for i_outer, (original_token, _) in enumerate(zip( original_tokenized_sentence, tokenized_sentence)): synonyms = synonym_dict[original_token] for synonym in synonyms: # replace only one particular mention new_sentence = copy.copy(tokenized_sentence) new_sentence[i_outer] = synonym new_sentences_list.append(new_sentence) return new_sentences_list def find_up_to_depth_k_perturbations( original_tokenized_sentence, tokenized_sentence, synonym_dict, k): """Takes sentence, finds all sentences reachable using k token perturbations. Args: original_tokenized_sentence: List[str]. List of tokens. tokenized_sentence: List[str]. List of tokens. synonym_dict: dict, mapping words (str) to lists of synonyms (list of str) k: int. perturbation depth parameter. Returns: output_sentences: List[List[str]]. List of tokenised sentences. """ # Case: recursion ends - no further perturbations. if k == 0: return [tokenized_sentence] else: # Expand by one level. expanded_sentences = expand_by_one_perturbation(original_tokenized_sentence, tokenized_sentence, synonym_dict) # Call recursive function one level deeper for each expanded sentence. expanded_sentences_deeper = [] for sentence in expanded_sentences: new_sentences = find_up_to_depth_k_perturbations( original_tokenized_sentence, sentence, synonym_dict, k-1) expanded_sentences_deeper.extend(new_sentences) output_sentences = expanded_sentences + expanded_sentences_deeper output_sentences = remove_duplicates(output_sentences) return output_sentences def remove_duplicates(list_of_list_of_tokens): # Convert list of str to str. sentences = ['|'.join(s) for s in list_of_list_of_tokens] sentences = set(sentences) # Now hashable -> remove duplicates. sentences = [s.split('|') for s in sentences] # Convert to original format. return sentences def verify_exhaustively(sample, synonym_dict, sst_model, delta, truncated_len=0): """Returns True if a sample can be verified, False otherwise. Args: sample: a 2-tuple (x,y), where x is a tokenised sentence (List[str]), and y is a label (int). synonym_dict: str -> List[str]. Keys are words, values are word lists with synonyms for the key word. sst_model: InteractiveSentimentPredictor instance. Used to make predictions. delta: int. How many synonym perturbations to maximally allow. truncated_len: int. Truncate sentence to truncated_len. 0 for unchanged. Returns: verified: bool. Whether all possible perturbed version of input sentence x up to perturbation radius delta have the correct prediction. """ (x, y) = sample counter_example = None counter_prediction = None # Create (potentially long) list of perturbed sentences from x. if truncated_len > 0: x = x[: truncated_len] # Add original sentence. altered_sentences = find_up_to_depth_k_perturbations(x, x, synonym_dict, delta) altered_sentences = altered_sentences + [x] # Form batches of these altered sentences. batch = [] num_forward_passes = len(altered_sentences) for sentence in altered_sentences: any_prediction_wrong = False batch.append(sentence) # When batch_size is reached, make predictions, break if any label flip if len(batch) == sst_model.batch_size: # np array of size [batch_size] predictions, _ = sst_model.batch_predict_sentiment( batch, is_tokenised=True) # Check any prediction that is different from the true label. any_prediction_wrong = np.any(predictions != y) if any_prediction_wrong: wrong_index = np.where(predictions != y)[0].tolist()[0] counter_example = ' '.join([str(c) for c in batch[wrong_index]]) if FLAGS.debug_mode: logging.info('\nOriginal example: %s, prediction: %d', ' '.join([str(c) for c in sentence]), y) logging.info('\ncounter example: %s, prediction: %s', counter_example, predictions[wrong_index].tolist()) counter_prediction = predictions[wrong_index] # Break. No need to evaluate further. return False, counter_example, counter_prediction, num_forward_passes # Start filling up the next batch. batch = [] if not batch: # No remainder, not previously broken the loop. return True, None, None, num_forward_passes else: # Remainder -- what didn't fit into a full batch of size batch_size. # We use the first altered_sentence to pad. batch += [altered_sentences[0]]*(sst_model.batch_size-len(batch)) assert len(batch) == sst_model.batch_size predictions, _ = sst_model.batch_predict_sentiment(batch, is_tokenised=True) any_prediction_wrong = np.any(predictions != y) if any_prediction_wrong: wrong_index = np.where(predictions != y)[0].tolist()[0] counter_example = ' '.join([str(c) for c in batch[wrong_index]]) if FLAGS.debug_mode: logging.info('\nOriginal example: %s, prediction: %d', ' '.join([str(c) for c in sentence]), y) # pylint: disable=undefined-loop-variable logging.info('\ncounter example: %s, prediction: %s', counter_example, predictions[wrong_index].tolist()) counter_prediction = predictions[wrong_index] return (not any_prediction_wrong, counter_example, counter_prediction, num_forward_passes) def verify_dataset(dataset, config_dict, model_location, synonym_dict, delta): """Tries to verify against perturbation attacks up to delta.""" sst_model = interactive_example.InteractiveSentimentPredictor( config_dict, model_location, max_padded_length=FLAGS.max_padded_length, num_perturbations=FLAGS.num_perturbations) verified_list = [] # Holds boolean entries, across dataset. samples = [] labels = [] counter_examples = [] counter_predictions = [] total_num_forward_passes = [] logging.info('dataset size: %d', len(dataset)) num_examples = FLAGS.num_examples if FLAGS.num_examples else len(dataset) logging.info('skip_batches: %d', FLAGS.skip_batches) logging.info('num_examples: %d', num_examples) logging.info('new dataset size: %d', len(dataset[FLAGS.skip_batches:FLAGS.skip_batches+num_examples])) for i, sample in tqdm.tqdm(enumerate( dataset[FLAGS.skip_batches:FLAGS.skip_batches+num_examples])): if FLAGS.debug_mode: logging.info('index: %d', i) (verified_bool, counter_example, counter_prediction, num_forward_passes ) = verify_exhaustively( sample, synonym_dict, sst_model, delta, FLAGS.truncated_len) samples.append(''.join(sample[0])) labels.append(sample[1]) counter_examples.append(counter_example) counter_predictions.append(counter_prediction) total_num_forward_passes.append(num_forward_passes) else: verified_bool, _, _, num_forward_passes = verify_exhaustively( sample, synonym_dict, sst_model, delta, FLAGS.truncated_len) verified_list.append(verified_bool) verified_proportion = np.mean(verified_list) assert len(verified_list) == len( dataset[FLAGS.skip_batches:FLAGS.skip_batches+num_examples]) return (verified_proportion, verified_list, samples, counter_examples, counter_predictions, total_num_forward_passes) def example(synonym_dict, dataset, k=2): """Example usage of functions above.""" # The below example x has these synonyms. # 'decree' --> [edict, order], # 'tubes' --> 'pipes'; # 'refrigerated' --> ['cooled', 'chilled'] x = ['the', 'refrigerated', 'decree', 'tubes'] # Example: 1 perturbation. new_x = expand_by_one_perturbation(x, x, synonym_dict) pprint.pprint(sorted(new_x)) # Example: up to k perturbations. new_x = find_up_to_depth_k_perturbations(x, x, synonym_dict, k) pprint.pprint(sorted(new_x)) # Statistics: how large is the combinatorial space of perturbations? total_x = [] size_counter = collections.Counter() for (x, _) in tqdm.tqdm(dataset): new_x = find_up_to_depth_k_perturbations(x, x, synonym_dict, k) size_counter[len(new_x)] += 1 total_x.extend(new_x) # Histogram for perturbation space size, computed across dataset. pprint.pprint([x for x in sorted(size_counter.items(), key=lambda xx: xx[0])]) # Total number of inputs for forward pass if comprehensively evaluated. pprint.pprint(len(total_x)) def main(args): del args # Read the config file into a new ad-hoc module. with open(FLAGS.config_path, 'r') as config_file: config_code = config_file.read() config_module = imp.new_module('config') exec(config_code, config_module.__dict__) # pylint: disable=exec-used config = config_module.get_config() config_dict = {'task': FLAGS.task, 'batch_size': FLAGS.batch_size, 'pooling': FLAGS.pooling, 'learning_rate': 0., 'config': config, 'embedding_dim': config['embedding_dim'], 'fine_tune_embeddings': FLAGS.fine_tune_embeddings, 'num_oov_buckets': FLAGS.num_oov_buckets, 'max_grad_norm': 0.} # Maximum verification range. delta = FLAGS.delta character_level = FLAGS.character_level mode = FLAGS.mode model_location = FLAGS.checkpoint_path # Load synonyms. synonym_filepath = config['synonym_filepath'] synonym_dict = load_synonyms(synonym_filepath) # Load data. dataset = load_dataset(mode, character_level) # Compute verifiable accuracy on dataset. (verified_proportion, _, _, _, _, _) = verify_dataset(dataset, config_dict, model_location, synonym_dict, delta) logging.info('verified_proportion:') logging.info(str(verified_proportion)) logging.info({ 'delta': FLAGS.delta, 'character_level': FLAGS.character_level, 'mode': FLAGS.mode, 'checkpoint_path': FLAGS.checkpoint_path, 'verified_proportion': verified_proportion }) if __name__ == '__main__': logging.set_stderrthreshold('info') app.run(main) ================================================ FILE: examples/language/interactive_example.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Minimum code to interact with a pretrained Stanford Sentiment Treebank model. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import numpy as np from six.moves import range import tensorflow.compat.v1 as tf import robust_model SparseTensorValue = collections.namedtuple( 'SparseTensorValue', ['indices', 'values', 'dense_shape']) class InteractiveSentimentPredictor(object): """Can be used to interact with a trained sentiment analysis model.""" def __init__(self, config_dict, model_location, max_padded_length=0, num_perturbations=0): self.graph_tensor_producer = robust_model.RobustModel(**config_dict) self.batch_size = self.graph_tensor_producer.batch_size if max_padded_length: self.graph_tensor_producer.config.max_padded_length = max_padded_length if num_perturbations: self.graph_tensor_producer.config.num_perturbations = num_perturbations self.graph_tensors = self.graph_tensor_producer() network_saver = tf.train.Saver(self.graph_tensor_producer.variables) self.open_session = tf.Session() self.open_session.run(tf.tables_initializer()) network_saver.restore(self.open_session, model_location) def batch_predict_sentiment(self, list_of_sentences, is_tokenised=True): """Computes sentiment predictions for a batch of sentences. Note: the model batch size is usually hard-coded in the model (e.g. at 64). We require that len(list_of_sentences)==self.batch_size. If padding is necessary to reach as many sentences, this should happen outside of this function. Important: we assume that each sentence has the same number of tokens. Args: list_of_sentences: List[str] in case is_tokenised is False, or List[List[str]] in case is_tokenised is True. Holds inputs whose sentiment is to be classified. is_tokenised: bool. Whether sentences are already tokenised. If not, naive whitespace splitting tokenisation is applied. Returns: batch_label_predictions: np.array of shape [self.batch_size] holding integers, representing model predictions for each input. """ # Prepare inputs. tokenised_sentence_list = [] for sentence in list_of_sentences: if not is_tokenised: tokenised_sentence = sentence.lower().split(' ') else: tokenised_sentence = sentence tokenised_sentence_list.append(tokenised_sentence) length = len(tokenised_sentence_list[0]) assert all([len(x) == length for x in tokenised_sentence_list]) assert len(tokenised_sentence_list) == self.batch_size # Construct sparse tensor holding token information. indices = np.zeros([self.batch_size*length, 2]) dense_shape = [self.batch_size, length] # Loop over words. All sentences have the same length. for j, _ in enumerate(tokenised_sentence_list[0]): for i in range(self.batch_size): # Loop over samples. offset = i*length + j indices[offset, 0] = i indices[offset, 1] = j # Define sparse tensor values. tokenised_sentence_list = [word for sentence in tokenised_sentence_list # pylint:disable=g-complex-comprehension for word in sentence] values = np.array(tokenised_sentence_list) mb_tokens = SparseTensorValue(indices=indices, values=values, dense_shape=dense_shape) mb_num_tokens = np.array([length]*self.batch_size) # Fill feed_dict with input token information. feed_dict = {} feed_dict[self.graph_tensors['dev']['tokens']] = mb_tokens feed_dict[self.graph_tensors['dev']['num_tokens']] = mb_num_tokens # Generate model predictions [batch_size x n_labels]. logits = self.open_session.run(self.graph_tensors['dev']['predictions'], feed_dict) batch_label_predictions = np.argmax(logits, axis=1) return batch_label_predictions, logits def predict_sentiment(self, sentence, tokenised=False): """Computes sentiment of a sentence.""" # Create inputs to tensorflow graph. if tokenised: inputstring_tokenised = sentence else: assert isinstance(sentence, str) # Simple tokenisation. inputstring_tokenised = sentence.lower().split(' ') length = len(inputstring_tokenised) # Construct inputs to sparse tensor holding token information. indices = np.zeros([self.batch_size*length, 2]) dense_shape = [self.batch_size, length] for j, _ in enumerate(inputstring_tokenised): for i in range(self.batch_size): offset = i*length + j indices[offset, 0] = i indices[offset, 1] = j values = inputstring_tokenised*self.batch_size mb_tokens = SparseTensorValue(indices=indices, values=np.array(values), dense_shape=dense_shape) mb_num_tokens = np.array([length]*self.batch_size) # Fill feeddict with input token information. feed_dict = {} feed_dict[self.graph_tensors['dev']['tokens']] = mb_tokens feed_dict[self.graph_tensors['dev']['num_tokens']] = mb_num_tokens # Generate predictions. logits = self.open_session.run(self.graph_tensors['dev']['predictions'], feed_dict) predicted_label = np.argmax(logits, axis=1) final_prediction = predicted_label[0] # Check that prediction same everywhere (had batch of identical inputs). assert np.all(predicted_label == final_prediction) return final_prediction, logits ================================================ FILE: examples/language/models.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Models for sentence representation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import sonnet as snt import tensorflow.compat.v1 as tf def _max_pool_1d(x, pool_size=2, name='max_pool_1d'): with tf.name_scope(name, 'MaxPool1D', [x, pool_size]): return tf.squeeze( tf.nn.max_pool(tf.expand_dims(x, 1), [1, 1, pool_size, 1], [1, 1, pool_size, 1], 'VALID'), axis=1) class SentenceRepresenterConv(snt.AbstractModule): """Use stacks of 1D Convolutions to build a sentence representation.""" def __init__(self, config, keep_prob=1., pooling='max', name='sentence_rep_conv'): super(SentenceRepresenterConv, self).__init__(name=name) self._config = config self._pooling = pooling self._keep_prob = keep_prob def _build(self, padded_word_embeddings, length): x = padded_word_embeddings for layer in self._config['conv_architecture']: if isinstance(layer, tuple) or isinstance(layer, list): filters, kernel_size, pooling_size = layer conv = snt.Conv1D( output_channels=filters, kernel_shape=kernel_size) x = conv(x) if pooling_size and pooling_size > 1: x = _max_pool_1d(x, pooling_size) elif layer == 'relu': x = tf.nn.relu(x) if self._keep_prob < 1: x = tf.nn.dropout(x, keep_prob=self._keep_prob) else: raise RuntimeError('Bad layer type {} in conv'.format(layer)) # Final layer pools over the remaining sequence length to get a # fixed sized vector. if self._pooling == 'max': x = tf.reduce_max(x, axis=1) elif self._pooling == 'average': x = tf.reduce_sum(x, axis=1) lengths = tf.expand_dims(tf.cast(length, tf.float32), axis=1) x = x / lengths if self._config['conv_fc1']: fc1_layer = snt.Linear(output_size=self._config['conv_fc1']) x = tf.nn.relu(fc1_layer(x)) if self._keep_prob < 1: x = tf.nn.dropout(x, keep_prob=self._keep_prob) if self._config['conv_fc2']: fc2_layer = snt.Linear(output_size=self._config['conv_fc2']) x = tf.nn.relu(fc2_layer(x)) if self._keep_prob < 1: x = tf.nn.dropout(x, keep_prob=self._keep_prob) return x ================================================ FILE: examples/language/robust_model.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Train verifiable robust models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections from absl import logging import interval_bound_propagation as ibp import numpy as np import six import sonnet as snt import tensorflow.compat.v1 as tf import tensorflow_datasets as tfds import tensorflow_probability as tfp from tensorflow.contrib import lookup as contrib_lookup import models import utils EmbeddedDataset = collections.namedtuple( 'EmbeddedDataset', ['embedded_inputs', 'length', 'input_tokens', 'sentiment']) Dataset = collections.namedtuple( 'Dataset', ['tokens', 'num_tokens', 'sentiment']) Perturbation = collections.namedtuple( 'Perturbation', ['positions', 'tokens']) def _pad_fixed(x, axis, padded_length): """Pads a tensor to a fixed size (rather than batch-specific).""" pad_shape = x.shape.as_list() pad_shape[axis] = tf.maximum(padded_length - tf.shape(x)[axis], 0) # Pad zero as in utils.get_padded_indexes. padded = tf.concat([x, tf.zeros(dtype=x.dtype, shape=pad_shape)], axis=axis) assert axis == 1 padded = padded[:, :padded_length] padded_shape = padded.shape.as_list() padded_shape[axis] = padded_length padded.set_shape(padded_shape) return padded class GeneratedDataset(snt.AbstractModule): """A dataset wrapper for data_gen such that it behaves like sst_binary.""" def __init__(self, data_gen, batch_size, mode='train', num_examples=0, dataset_name='glue/sst2', name='generated_dataset'): super(GeneratedDataset, self).__init__(name=name) self._data_gen = data_gen self._batch_size = batch_size self._mode = mode self._shuffle = True if mode == 'train' else False self._num_examples = num_examples self._dataset_name = dataset_name def get_row_lengths(self, sparse_tensor_input): # sparse_tensor_input is a tf.SparseTensor # In RaggedTensor, row_lengths is a vector with shape `[nrows]`, # which specifies the length of each row. rt = tf.RaggedTensor.from_sparse(sparse_tensor_input) return rt.row_lengths() def _build(self): dataset = tfds.load(name=self._dataset_name, split=self._mode) minibatch = dataset.map(parse).repeat() if self._shuffle: minibatch = minibatch.shuffle(self._batch_size*100) minibatch = minibatch.batch( self._batch_size).make_one_shot_iterator().get_next() minibatch['sentiment'].set_shape([self._batch_size]) minibatch['sentence'] = tf.SparseTensor( indices=minibatch['sentence'].indices, values=minibatch['sentence'].values, dense_shape=[self._batch_size, minibatch['sentence'].dense_shape[1]]) # minibatch.sentence sparse tensor with dense shape # [batch_size x seq_length], length: [batch_size] return Dataset( tokens=minibatch['sentence'], num_tokens=self.get_row_lengths(minibatch['sentence']), sentiment=minibatch['sentiment'], ) @property def num_examples(self): return self._num_examples def parse(data_dict): """Parse dataset from _data_gen into the same format as sst_binary.""" sentiment = data_dict['label'] sentence = data_dict['sentence'] dense_chars = tf.decode_raw(sentence, tf.uint8) dense_chars.set_shape((None,)) chars = tfp.math.dense_to_sparse(dense_chars) if six.PY3: safe_chr = lambda c: '?' if c >= 128 else chr(c) else: safe_chr = chr to_char = np.vectorize(safe_chr) chars = tf.SparseTensor(indices=chars.indices, values=tf.py_func(to_char, [chars.values], tf.string), dense_shape=chars.dense_shape) return {'sentiment': sentiment, 'sentence': chars} class RobustModel(snt.AbstractModule): """Model for applying sentence representations for different tasks.""" def __init__(self, task, batch_size, pooling, learning_rate, config, embedding_dim, fine_tune_embeddings=False, num_oov_buckets=1000, max_grad_norm=5.0, name='robust_model'): super(RobustModel, self).__init__(name=name) self.config = config self.task = task self.batch_size = batch_size self.pooling = pooling self.learning_rate = learning_rate self.embedding_dim = embedding_dim self.fine_tune_embeddings = fine_tune_embeddings self.num_oov_buckets = num_oov_buckets self.max_grad_norm = max_grad_norm self.linear_classifier = None def add_representer(self, vocab_filename, padded_token=None): """Add sentence representer to the computation graph. Args: vocab_filename: the name of vocabulary files. padded_token: padded_token to the vocabulary. """ self.embed_pad = utils.EmbedAndPad( self.batch_size, [self._lines_from_file(vocab_filename)], embedding_dim=self.embedding_dim, num_oov_buckets=self.num_oov_buckets, fine_tune_embeddings=self.fine_tune_embeddings, padded_token=padded_token) self.keep_prob = tf.placeholder(tf.float32, shape=None, name='keep_prob') # Model to get a sentence representation from embeddings. self.sentence_representer = models.SentenceRepresenterConv( self.config, keep_prob=self.keep_prob, pooling=self.pooling) def add_dataset(self): """Add datasets. Returns: train_data, dev_data, test_data, num_classes """ if self.config.get('dataset', '') == 'sst': train_data = GeneratedDataset(None, self.batch_size, mode='train', num_examples=67349) dev_data = GeneratedDataset(None, self.batch_size, mode='validation', num_examples=872) test_data = GeneratedDataset(None, self.batch_size, mode='validation', num_examples=872) num_classes = 2 return train_data, dev_data, test_data, num_classes else: raise ValueError('Not supported dataset') def get_representation(self, tokens, num_tokens): if tokens.dtype == tf.float32: return self.sentence_representer(tokens, num_tokens) else: # dtype == tf.string return self.sentence_representer(self.embed_pad(tokens), num_tokens) def add_representation(self, minibatch): """Compute sentence representations. Args: minibatch: a minibatch of sequences of embeddings. Returns: joint_rep: representation of sentences or concatenation of sentence vectors. """ joint_rep = self.get_representation(minibatch.tokens, minibatch.num_tokens) result = {'representation1': joint_rep} return joint_rep, result def add_train_ops(self, num_classes, joint_rep, minibatch): """Add ops for training in the computation graph. Args: num_classes: number of classes to predict in the task. joint_rep: the joint sentence representation if the input is sentence pairs or the representation for the sentence if the input is a single sentence. minibatch: a minibatch of sequences of embeddings. Returns: train_accuracy: the accuracy on the training dataset loss: training loss. opt_step: training op. """ if self.linear_classifier is None: classifier_layers = [] classifier_layers.append(snt.Linear(num_classes)) self.linear_classifier = snt.Sequential(classifier_layers) logits = self.linear_classifier(joint_rep) # Losses and optimizer. def get_loss(logits, labels): return tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits)) loss = get_loss(logits, minibatch.sentiment) train_accuracy = utils.get_accuracy(logits, minibatch.sentiment) opt_step = self._add_optimize_op(loss) return train_accuracy, loss, opt_step def create_perturbation_ops(self, minibatch, synonym_values, vocab_table): """Perturb data_batch using synonym_values.""" data_batch = _pad_fixed( utils.get_padded_indexes(vocab_table, minibatch.tokens, self.batch_size), axis=1, padded_length=self.config['max_padded_length']) # synonym_values: [vocab_size x max_num_synonyms] # data_batch: [batch_size x seq_length] # [batch_size x seq_length x max_num_synonyms] - synonyms for each token. # Defaults to same word in case of no other synonyms. synonym_ids = tf.gather(synonym_values, data_batch, axis=0) # Split along batchsize. Elements shape: [seq_length x max_num_synonyms]. synonym_ids_per_example = tf.unstack(synonym_ids, axis=0) # Loop across batch. # synonym_ids_this_example shape: [seq_length x max_num_synonyms] sequence_positions_across_batch, values_across_batch = [], [] for i_sample, synonym_ids_this_example in enumerate( synonym_ids_per_example): # [num_nonzero, 2]. The rows are pairs of (t,s), where t is an index for # a time step, and s is an index into the max_num_synonyms dimension. nonzero_indices = tf.where(synonym_ids_this_example) # shape [num_nonzero]. Corresponding to the entries at nonzero_indices synonym_tokens = tf.gather_nd(params=synonym_ids_this_example, indices=nonzero_indices) # [num_nonzero] - Of the (t,s) pairs in nonzero_indices, pick only the # time dimension (t), corresponding to perturbation positions in the # sequence. perturbation_positions_this_example = nonzero_indices[:, 0] # The main logic is done. Now follows padding to a fixed length of # num_perturbations. However, this cannot be done with 0-padding, as it # would introduce a new (zero) vertex. Instead, we duplicate existing # tokens as perturbations (which have no effect), until we have reached a # total of num_perturbations perturbations. In this case, the padded # tokens are the original tokens from the data_batch. The padded positions # are all the positions (using range) corresponding to the padded tokens. # How often seq-length fits into maximum num perturbations padding_multiplier = tf.floordiv(self.config['num_perturbations'], tf.cast(minibatch.num_tokens[i_sample], tf.int32)) + 1 # original tokens # [seq_length] original_tokens = data_batch[i_sample, :minibatch.num_tokens[i_sample]] # [padding_multiplier * seq_length]. Repeat several times, use as padding. padding_tokens = tf.tile(original_tokens, multiples=[padding_multiplier]) synonym_tokens_padded = tf.concat([synonym_tokens, tf.cast(padding_tokens, dtype=tf.int64) ], axis=0) # Crop at exact num_perturbations size. synonym_tokens_padded = synonym_tokens_padded[ :self.config['num_perturbations']] # [seq_length] padding sequence positions with tiles of range() pad_positions = tf.range(minibatch.num_tokens[i_sample], delta=1) # [padding_multiplier*seq_length] padding_positions = tf.tile(pad_positions, multiples=[padding_multiplier]) perturbation_positions_this_example_padded = tf.concat( [perturbation_positions_this_example, tf.cast(padding_positions, dtype=tf.int64)], axis=0) # Crop at exact size num_perturbations. sequence_positions_padded = perturbation_positions_this_example_padded[ :self.config['num_perturbations']] # Collect across the batch for tf.stack later. sequence_positions_across_batch.append(sequence_positions_padded) values_across_batch.append(synonym_tokens_padded) # Both [batch_size x max_n_perturbations] perturbation_positions = tf.stack(sequence_positions_across_batch, axis=0) perturbation_tokens = tf.stack(values_across_batch, axis=0) # Explicitly setting the shape to self.config['num_perturbations'] perturbation_positions_shape = perturbation_positions.shape.as_list() perturbation_positions_shape[1] = self.config['num_perturbations'] perturbation_positions.set_shape(perturbation_positions_shape) perturbation_tokens_shape = perturbation_tokens.shape.as_list() perturbation_tokens_shape[1] = self.config['num_perturbations'] perturbation_tokens.set_shape(perturbation_tokens_shape) return Perturbation( positions=perturbation_positions, tokens=perturbation_tokens) def _add_optimize_op(self, loss): """Add ops for training.""" global_step = tf.Variable(0, trainable=False) learning_rate = tf.Variable(self.learning_rate, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), self.max_grad_norm) opt = tf.train.AdamOptimizer(learning_rate) opt_step = opt.apply_gradients(zip(grads, tvars), global_step=global_step) return opt_step def embed_dataset(self, minibatch, vocab_table): return EmbeddedDataset( embedded_inputs=_pad_fixed( self.embed_pad(minibatch.tokens), axis=1, padded_length=self.config['max_padded_length']), input_tokens=_pad_fixed( utils.get_padded_indexes(vocab_table, minibatch.tokens, self.batch_size), axis=1, padded_length=self.config['max_padded_length']), length=tf.minimum(self.config['max_padded_length'], tf.cast(minibatch.num_tokens, tf.int32)), sentiment=minibatch.sentiment) def compute_mask_vertices(self, data_batch, perturbation): """Compute perturbation masks and perbuted vertices. Args: data_batch: EmbeddedDataset object. perturbation: Perturbation object. Returns: masks: Positions where there are perturbations. vertices: The resulting embeddings of the perturbed inputs. """ # The following are all shaped (after broadcasting) as: # (batch_size, num_perturbations, seq_length, embedding_size). embedding = self.embed_pad._embeddings # pylint: disable=protected-access # (batch_size, 1, seq_length, emb_dim) original_vertices = tf.expand_dims(data_batch.embedded_inputs, axis=1) # (batch_size, num_perturbation, 1, emb_dim]) perturbation_vertices = tf.gather( embedding, tf.expand_dims(perturbation.tokens, axis=2)) # (batch_size, num_perturbations, seq_length, 1) mask = tf.expand_dims( tf.one_hot(perturbation.positions, depth=self.config['max_padded_length']), axis=3) # (batch_size, num_perturbations, seq_length, embedding_size) vertices = (1 - mask) * original_vertices + mask * perturbation_vertices return mask, vertices def preprocess_databatch(self, minibatch, vocab_table, perturbation): data_batch = self.embed_dataset(minibatch, vocab_table) mask, vertices = self.compute_mask_vertices(data_batch, perturbation) return data_batch, mask, vertices def add_verifiable_objective(self, minibatch, vocab_table, perturbation, stop_gradient=False): # pylint: disable=g-missing-docstring data_batch = self.embed_dataset(minibatch, vocab_table) _, vertices = self.compute_mask_vertices(data_batch, perturbation) def classifier(embedded_inputs): representation = self.sentence_representer(embedded_inputs, data_batch.length) return self.linear_classifier(representation) # Verification graph. network = ibp.VerifiableModelWrapper(classifier) network(data_batch.embedded_inputs) input_bounds = ibp.SimplexBounds( vertices=vertices, nominal=data_batch.embedded_inputs, r=(self.delta if not stop_gradient else self.config['delta'])) network.propagate_bounds(input_bounds) # Calculate the verifiable objective. verifiable_obj = verifiable_objective( network, data_batch.sentiment, margin=1.) return verifiable_obj def run_classification(self, inputs, labels, length): prediction = self.run_prediction(inputs, length) correct = tf.cast(tf.equal(labels, tf.argmax(prediction, 1)), dtype=tf.float32) return correct def compute_verifiable_loss(self, verifiable_obj, labels): """Compute verifiable training objective. Args: verifiable_obj: Verifiable training objective. labels: Ground truth labels. Returns: verifiable_loss: Aggregrated loss of the verifiable training objective. """ # Three options: reduce max, reduce mean, and softmax. if self.config['verifiable_training_aggregation'] == 'mean': verifiable_loss = tf.reduce_mean( verifiable_obj) # average across all target labels elif self.config['verifiable_training_aggregation'] == 'max': # Worst target label only. verifiable_loss = tf.reduce_mean(tf.reduce_max(verifiable_obj, axis=0)) elif self.config['verifiable_training_aggregation'] == 'softmax': # This assumes that entries in verifiable_obj belonging to the true class # are set to a (large) negative value, so to not affect the softmax much. # [batch_size]. Compute x-entropy against one-hot distrib. for true label. verifiable_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.transpose(verifiable_obj), labels=labels) verifiable_loss = tf.reduce_mean( verifiable_loss) # aggregation across batch else: logging.info(self.config['verifiable_training_aggregation']) raise ValueError( 'Bad input argument for verifiable_training_aggregation used.') return verifiable_loss def compute_verifiable_verified(self, verifiable_obj): # Overall upper bound is maximum over all incorrect target classes. bound = tf.reduce_max(verifiable_obj, axis=0) verified = tf.cast(bound <= 0, dtype=tf.float32) return bound, verified def run_prediction(self, inputs, length): representation = self.sentence_representer(inputs, length) prediction = self.linear_classifier(representation) return prediction def sentiment_accuracy_op(self, minibatch): """Compute accuracy of dev/test set on the task of sentiment analysis. Args: minibatch: a batch of sequences of embeddings. Returns: num_correct: the number of examples that are predicted correctly on the given dataset. """ rep = self.get_representation(minibatch.tokens, minibatch.num_tokens) logits = self.linear_classifier(rep) num_correct = utils.get_num_correct_predictions(logits, minibatch.sentiment) return num_correct def add_dev_eval_ops(self, minibatch): """Add ops for evaluating on the dev/test set. Args: minibatch: a batch of sequence of embeddings. Returns: num_correct: the number of examples that are predicted correctly. """ num_correct = self.sentiment_accuracy_op(minibatch) return num_correct def _build(self): """Build the computation graph. Returns: graph_tensors: list of ops that are to be executed during training/evaluation. """ train_data, dev_data, test_data, num_classes = self.add_dataset() train_minibatch = train_data() dev_minibatch = dev_data() test_minibatch = test_data() # Load the vocab without padded_token and add it to the add_representer # later. Otherwise, it will be sorted. vocab_filename = self.config['vocab_filename'] self.add_representer(vocab_filename, padded_token=b'') graph_tensors = self._build_graph_with_datasets( train_minibatch, dev_minibatch, test_minibatch, num_classes) graph_tensors['dev_num_examples'] = dev_data.num_examples graph_tensors['test_num_examples'] = test_data.num_examples return graph_tensors def _build_graph_with_datasets(self, train_minibatch, dev_minibatch, test_minibatch, num_classes): """Returns the training/evaluation ops.""" self.keep_prob = 1. # Using literal 1 (not placeholder) skips dropout op. self.sentence_representer._keep_prob = 1. # pylint:disable=protected-access # Build the graph as per the base class. (train_joint_rep, _) = self.add_representation(train_minibatch) (train_accuracy, loss, opt_step) = self.add_train_ops(num_classes, train_joint_rep, train_minibatch) dev_num_correct = self.add_dev_eval_ops(dev_minibatch) test_num_correct = self.add_dev_eval_ops(test_minibatch) graph_tensors = { 'loss': loss, 'train_op': opt_step, 'train_accuracy': train_accuracy, 'dev_num_correct': dev_num_correct, 'test_num_correct': test_num_correct, 'keep_prob': self.keep_prob } vocab_table = self.embed_pad.vocab_table vocab_size = self.embed_pad.vocab_size verifiable_loss_ratio = tf.constant( self.config['verifiable_loss_ratio'], dtype=tf.float32, name='verifiable_loss_ratio') self.delta = tf.constant(self.config['delta'], dtype=tf.float32, name='delta') lookup_token = tf.placeholder(tf.string, shape=None, name='lookup_token') indices = vocab_table.lookup(lookup_token) self.vocab_list = contrib_lookup.index_to_string_table_from_file( self.config['vocab_filename_pad']) lookup_token_index = tf.placeholder(tf.int64, shape=None, name='lookup_token_index') lookup_token_string = self.vocab_list.lookup(lookup_token_index) synonym_values = tf.placeholder(tf.int64, shape=[None, None], name='synonym_values') synonym_counts = tf.placeholder(tf.int64, shape=[None], name='synonym_counts') train_perturbation = self.create_perturbation_ops( train_minibatch, synonym_values, vocab_table) train_data_batch, _, _ = self.preprocess_databatch( train_minibatch, vocab_table, train_perturbation) train_words = self.vocab_list.lookup(train_data_batch.input_tokens) # [num_targets x batchsize] verifiable_obj = self.add_verifiable_objective( train_minibatch, vocab_table, train_perturbation, stop_gradient=False) train_nominal = self.run_classification(train_data_batch.embedded_inputs, train_data_batch.sentiment, train_data_batch.length) train_bound, train_verified = self.compute_verifiable_verified( verifiable_obj) verifiable_loss = self.compute_verifiable_loss(verifiable_obj, train_minibatch.sentiment) if (self.config['verifiable_loss_ratio']) > 1.0: raise ValueError('Loss ratios sum up to more than 1.0') total_loss = (1 - verifiable_loss_ratio) * graph_tensors['loss'] if self.config['verifiable_loss_ratio'] != 0: total_loss += verifiable_loss_ratio * verifiable_loss # Attack on dev/test set. dev_perturbation = self.create_perturbation_ops( dev_minibatch, synonym_values, vocab_table) # [num_targets x batchsize] dev_verifiable_obj = self.add_verifiable_objective( dev_minibatch, vocab_table, dev_perturbation, stop_gradient=True) dev_bound, dev_verified = self.compute_verifiable_verified( dev_verifiable_obj) dev_data_batch, _, _ = self.preprocess_databatch( dev_minibatch, vocab_table, dev_perturbation) test_perturbation = self.create_perturbation_ops( test_minibatch, synonym_values, vocab_table) # [num_targets x batchsize] test_verifiable_obj = self.add_verifiable_objective( test_minibatch, vocab_table, test_perturbation, stop_gradient=True) test_bound, test_verified = self.compute_verifiable_verified( test_verifiable_obj) test_data_batch, _, _ = self.preprocess_databatch( test_minibatch, vocab_table, test_perturbation) dev_words = self.vocab_list.lookup(dev_data_batch.input_tokens) test_words = self.vocab_list.lookup(test_data_batch.input_tokens) dev_nominal = self.run_classification(dev_data_batch.embedded_inputs, dev_data_batch.sentiment, dev_data_batch.length) test_nominal = self.run_classification(test_data_batch.embedded_inputs, test_data_batch.sentiment, test_data_batch.length) dev_predictions = self.run_prediction(dev_data_batch.embedded_inputs, dev_data_batch.length) test_predictions = self.run_prediction(test_data_batch.embedded_inputs, test_data_batch.length) with tf.control_dependencies([train_verified, test_verified, dev_verified]): opt_step = self._add_optimize_op(total_loss) graph_tensors['total_loss'] = total_loss graph_tensors['verifiable_loss'] = verifiable_loss graph_tensors['train_op'] = opt_step graph_tensors['indices'] = indices graph_tensors['lookup_token_index'] = lookup_token_index graph_tensors['lookup_token_string'] = lookup_token_string graph_tensors['lookup_token'] = lookup_token graph_tensors['vocab_size'] = vocab_size graph_tensors['synonym_values'] = synonym_values graph_tensors['synonym_counts'] = synonym_counts graph_tensors['verifiable_loss_ratio'] = verifiable_loss_ratio graph_tensors['delta'] = self.delta graph_tensors['train'] = { 'bound': train_bound, 'verified': train_verified, 'words': train_words, 'sentiment': train_minibatch.sentiment, 'correct': train_nominal, } graph_tensors['dev'] = { 'predictions': dev_predictions, 'data_batch': dev_data_batch, 'tokens': dev_minibatch.tokens, 'num_tokens': dev_minibatch.num_tokens, 'minibatch': dev_minibatch, 'bound': dev_bound, 'verified': dev_verified, 'words': dev_words, 'sentiment': dev_minibatch.sentiment, 'correct': dev_nominal, } graph_tensors['test'] = { 'predictions': test_predictions, 'data_batch': test_data_batch, 'tokens': test_minibatch.tokens, 'num_tokens': test_minibatch.num_tokens, 'minibatch': test_minibatch, 'bound': test_bound, 'verified': test_verified, 'words': test_words, 'sentiment': test_minibatch.sentiment, 'correct': test_nominal, } return graph_tensors def _lines_from_file(self, filename): with open(filename, 'rb') as f: return f.read().splitlines() def verifiable_objective(network, labels, margin=0.): """Computes the verifiable objective. Args: network: `ibp.VerifiableModelWrapper` for the network to verify. labels: 1D integer tensor of shape (batch_size) of labels for each input example. margin: Verifiable objective values for correct class will be forced to `-margin`, thus disregarding large negative bounds when maximising. By default this is set to 0. Returns: 2D tensor of shape (num_classes, batch_size) containing verifiable objective for each target class, for each example. """ last_layer = network.output_module # Objective, elided with final linear layer. obj_w, obj_b = targeted_objective( last_layer.module.w, last_layer.module.b, labels) # Relative bounds on the objective. per_neuron_objective = tf.maximum( obj_w * last_layer.input_bounds.lower_offset, obj_w * last_layer.input_bounds.upper_offset) verifiable_obj = tf.reduce_sum( per_neuron_objective, axis=list(range(2, per_neuron_objective.shape.ndims))) # Constant term (objective layer bias). verifiable_obj += tf.reduce_sum( obj_w * last_layer.input_bounds.nominal, axis=list(range(2, obj_w.shape.ndims))) verifiable_obj += obj_b # Filter out cases in which the target class is the correct class. # Using `margin` makes the irrelevant cases of target=correct return # a large negative value, which will be ignored by the reduce_max. num_classes = last_layer.output_bounds.shape[-1] verifiable_obj = filter_correct_class( verifiable_obj, num_classes, labels, margin=margin) return verifiable_obj def targeted_objective(final_w, final_b, labels): """Determines final layer weights for attacks targeting each class. Args: final_w: 2D tensor of shape (last_hidden_layer_size, num_classes) containing the weights for the final linear layer. final_b: 1D tensor of shape (num_classes) containing the biases for the final hidden layer. labels: 1D integer tensor of shape (batch_size) of labels for each input example. Returns: obj_w: Tensor of shape (num_classes, batch_size, last_hidden_layer_size) containing weights (to use in place of final linear layer weights) for targeted attacks. obj_b: Tensor of shape (num_classes, batch_size) containing bias (to use in place of final linear layer biases) for targeted attacks. """ # Elide objective with final linear layer. final_wt = tf.transpose(final_w) obj_w = tf.expand_dims(final_wt, axis=1) - tf.gather(final_wt, labels, axis=0) obj_b = tf.expand_dims(final_b, axis=1) - tf.gather(final_b, labels, axis=0) return obj_w, obj_b def filter_correct_class(verifiable_obj, num_classes, labels, margin): """Filters out the objective when the target class contains the true label. Args: verifiable_obj: 2D tensor of shape (num_classes, batch_size) containing verifiable objectives. num_classes: number of target classes. labels: 1D tensor of shape (batch_size) containing the labels for each example in the batch. margin: Verifiable objective values for correct class will be forced to `-margin`, thus disregarding large negative bounds when maximising. Returns: 2D tensor of shape (num_classes, batch_size) containing the corrected verifiable objective values for each (class, example). """ targets_to_filter = tf.expand_dims( tf.range(num_classes, dtype=labels.dtype), axis=1) neq = tf.not_equal(targets_to_filter, labels) verifiable_obj = tf.where(neq, verifiable_obj, -margin * tf.ones_like(verifiable_obj)) return verifiable_obj ================================================ FILE: examples/language/robust_train.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Train verifiably robust models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import imp import json import os from absl import app from absl import flags from absl import logging import numpy as np from six.moves import range import tensorflow.compat.v1 as tf import robust_model flags.DEFINE_string('config_path', 'config.py', 'Path to training configuration file.') flags.DEFINE_integer('batch_size', 40, 'Batch size.') flags.DEFINE_integer('num_train_steps', 150000, 'Number of training steps.') flags.DEFINE_integer('num_oov_buckets', 1, 'Number of out of vocabulary buckets.') flags.DEFINE_integer('report_every', 100, 'Report test loss every N batches.') flags.DEFINE_float('schedule_ratio', 0.8, 'The final delta and verifiable_loss_ratio are reached when ' 'the number of steps equals schedule_ratio * ' 'num_train_steps.') flags.DEFINE_float('learning_rate', 0.001, 'Learning rate.') flags.DEFINE_float('max_grad_norm', 5.0, 'Maximum norm of gradients.') flags.DEFINE_boolean('fine_tune_embeddings', True, 'Finetune embeddings.') flags.DEFINE_string('task', 'sst', 'One of snli, mnli, sick, sst.') flags.DEFINE_string('pooling', 'average', 'One of averge, sum, max, last.') flags.DEFINE_boolean('analysis', False, 'Analysis mode.') flags.DEFINE_string('analysis_split', 'test', 'Analysis dataset split.') flags.DEFINE_string('experiment_root', '/tmp/robust_model/', 'Path to save trained models.') flags.DEFINE_string( 'tensorboard_dir', None, 'Tensorboard folder. If not specified, set under experiment_root') FLAGS = flags.FLAGS def load_synonyms(synonym_filepath=None): synonyms = None with open(synonym_filepath) as f: synonyms = json.load(f) return synonyms def construct_synonyms(synonym_filepath): synonyms = load_synonyms(synonym_filepath) synonym_keys = list(synonyms.keys()) synonym_values = [synonyms[k] for k in synonym_keys] max_synoynm_counts = max([len(s) for s in synonym_values]) synonym_value_lens = [len(x) for x in synonym_values] # Add 0 for the first starting point. synonym_value_lens_cum = np.cumsum([0] + synonym_value_lens) synonym_values_list = [word for val in synonym_values for word in val] # pylint: disable=g-complex-comprehension return synonym_keys, max_synoynm_counts, synonym_value_lens_cum, synonym_values_list def linear_schedule(step, init_step, final_step, init_value, final_value): """Linear schedule.""" assert final_step >= init_step if init_step == final_step: return final_value rate = np.float32(step - init_step) / float(final_step - init_step) linear_value = rate * (final_value - init_value) + init_value return np.clip(linear_value, min(init_value, final_value), max(init_value, final_value)) def config_train_summary(task, train_accuracy, loss): """Add ops for summary in the computation graph. Args: task: string name of task being trained for. train_accuracy: training accuracy. loss: training loss. Returns: train_summary: summary for training. saver: tf.saver, used to save the checkpoint with the best dev accuracy. """ train_acc_summ = tf.summary.scalar(('%s_train_accuracy' % task), train_accuracy) loss_summ = tf.summary.scalar('loss', loss) train_summary = tf.summary.merge([train_acc_summ, loss_summ]) return train_summary def write_tf_summary(writer, step, tag, value): summary = tf.Summary() summary.value.add(tag=tag, simple_value=value) writer.add_summary(summary, step) def train(config_dict, synonym_filepath, batch_size, num_train_steps, schedule_ratio, report_every, checkpoint_path, tensorboard_dir): """Model training.""" graph_tensor_producer = robust_model.RobustModel(**config_dict) graph_tensors = graph_tensor_producer() synonym_keys, max_synoynm_counts, synonym_value_lens_cum, \ synonym_values_list = construct_synonyms(synonym_filepath) train_summary = config_train_summary(config_dict['task'], graph_tensors['train_accuracy'], graph_tensors['loss']) tf.gfile.MakeDirs(checkpoint_path) best_dev_accuracy = 0.0 best_test_accuracy = 0.0 best_verified_dev_accuracy = 0.0 best_verified_test_accuracy = 0.0 network_saver = tf.train.Saver(graph_tensor_producer.variables) with tf.train.SingularMonitoredSession() as session: logging.info('Initialize parameters...') writer = tf.summary.FileWriter(tensorboard_dir, session.graph) input_feed = {} # Tokenize synonyms. tokenize_synonyms = [[] for _ in range(graph_tensors['vocab_size'])] lookup_indices_keys = session.run(graph_tensors['indices'], feed_dict={graph_tensors['lookup_token']: synonym_keys}) lookup_indices_values = session.run(graph_tensors['indices'], feed_dict={ graph_tensors['lookup_token']: synonym_values_list}) for i, key_index in enumerate(lookup_indices_keys): tokenize_synonyms[key_index] = lookup_indices_values[ synonym_value_lens_cum[i]:synonym_value_lens_cum[i+1]].tolist() synonym_values_np = np.zeros([graph_tensors['vocab_size'], max_synoynm_counts]) for i in range(graph_tensors['vocab_size']): # False-safe case. No perturbations. Set it as itself. synonym_values_np[i][0] = i for j in range(len(tokenize_synonyms[i])): synonym_values_np[i][j] = tokenize_synonyms[i][j] synonym_counts_np = [len(s) for s in tokenize_synonyms] input_feed[graph_tensors['synonym_values']] = synonym_values_np input_feed[graph_tensors['synonym_counts']] = synonym_counts_np warmup_steps = 0 for step in range(num_train_steps): config = config_dict['config'] if config['delta'] > 0.0 and config['delta_schedule']: delta = linear_schedule( step, 0., schedule_ratio * num_train_steps, 0., config['delta']) input_feed[graph_tensors['delta']] = delta if (config['verifiable_loss_ratio'] > 0.0 and config['verifiable_loss_schedule']): if delta > 0.0 and warmup_steps == 0: warmup_steps = step if delta > 0.0: verifiable_loss_ratio = linear_schedule( step, warmup_steps, schedule_ratio * num_train_steps, 0., config['verifiable_loss_ratio']) else: verifiable_loss_ratio = 0.0 input_feed[ graph_tensors['verifiable_loss_ratio']] = verifiable_loss_ratio total_loss_np, loss_np, verifiable_loss_np, train_accuracy_np, \ train_bound, train_verified, \ verifiable_loss_ratio_val, delta_val, \ train_summary_py, _ = session.run( [graph_tensors['total_loss'], graph_tensors['loss'], graph_tensors['verifiable_loss'], graph_tensors['train_accuracy'], graph_tensors['train']['bound'], graph_tensors['train']['verified'], graph_tensors['verifiable_loss_ratio'], graph_tensors['delta'], train_summary, graph_tensors['train_op']], input_feed) writer.add_summary(train_summary_py, step) if step % report_every == 0 or step == num_train_steps - 0: dev_total_num_correct = 0.0 test_total_num_correct = 0.0 dev_verified_count = 0.0 test_verified_count = 0.0 dev_num_batches = graph_tensors['dev_num_examples'] // batch_size test_num_batches = graph_tensors['test_num_examples'] // batch_size dev_total_num_examples = dev_num_batches * batch_size test_total_num_examples = test_num_batches * batch_size for _ in range(dev_num_batches): correct, verified = session.run( [graph_tensors['dev_num_correct'], graph_tensors['dev']['verified']], input_feed) dev_total_num_correct += correct dev_verified_count += np.sum(verified) for _ in range(test_num_batches): correct, verified = session.run( [graph_tensors['test_num_correct'], graph_tensors['test']['verified']], input_feed) test_total_num_correct += correct test_verified_count += np.sum(verified) dev_accuracy = dev_total_num_correct / dev_total_num_examples test_accuracy = test_total_num_correct / test_total_num_examples dev_verified_accuracy = dev_verified_count / dev_total_num_examples test_verified_accuracy = test_verified_count / test_total_num_examples write_tf_summary(writer, step, tag='dev_accuracy', value=dev_accuracy) write_tf_summary(writer, step, tag='test_accuracy', value=test_accuracy) write_tf_summary(writer, step, tag='train_bound_summary', value=np.mean(train_bound)) write_tf_summary(writer, step, tag='train_verified_summary', value=np.mean(train_verified)) write_tf_summary(writer, step, tag='dev_verified_summary', value=np.mean(dev_verified_accuracy)) write_tf_summary(writer, step, tag='test_verified_summary', value=np.mean(test_verified_accuracy)) write_tf_summary(writer, step, tag='total_loss_summary', value=total_loss_np) write_tf_summary(writer, step, tag='verifiable_train_loss_summary', value=verifiable_loss_np) logging.info('verifiable_loss_ratio: %f, delta: %f', verifiable_loss_ratio_val, delta_val) logging.info('step: %d, ' 'train loss: %f, ' 'verifiable train loss: %f, ' 'train accuracy: %f, ' 'dev accuracy: %f, ' 'test accuracy: %f, ', step, loss_np, verifiable_loss_np, train_accuracy_np, dev_accuracy, test_accuracy) dev_verified_accuracy_mean = np.mean(dev_verified_accuracy) test_verified_accuracy_mean = np.mean(test_verified_accuracy) logging.info('Train Bound = %.05f, train verified: %.03f, ' 'dev verified: %.03f, test verified: %.03f', np.mean(train_bound), np.mean(train_verified), dev_verified_accuracy_mean, test_verified_accuracy_mean) if dev_accuracy > best_dev_accuracy: # Store most accurate model so far. network_saver.save(session.raw_session(), os.path.join(checkpoint_path, 'best')) best_dev_accuracy = dev_accuracy best_test_accuracy = test_accuracy logging.info('best dev acc\t%f\tbest test acc\t%f', best_dev_accuracy, best_test_accuracy) if dev_verified_accuracy_mean > best_verified_dev_accuracy: # Store model with best verified accuracy so far. network_saver.save(session.raw_session(), os.path.join(checkpoint_path, 'best_verified')) best_verified_dev_accuracy = dev_verified_accuracy_mean best_verified_test_accuracy = test_verified_accuracy_mean logging.info('best verified dev acc\t%f\tbest verified test acc\t%f', best_verified_dev_accuracy, best_verified_test_accuracy) network_saver.save(session.raw_session(), os.path.join(checkpoint_path, 'model')) writer.flush() # Store model at end of training. network_saver.save(session.raw_session(), os.path.join(checkpoint_path, 'final')) def analysis(config_dict, synonym_filepath, model_location, batch_size, batch_offset=0, total_num_batches=0, datasplit='test', delta=3.0, num_perturbations=5, max_padded_length=0): """Run analysis.""" tf.reset_default_graph() if datasplit not in ['train', 'dev', 'test']: raise ValueError('Invalid datasplit: %s' % datasplit) logging.info('model_location: %s', model_location) logging.info('num_perturbations: %d', num_perturbations) logging.info('delta: %f', delta) logging.info('Run analysis, datasplit: %s, batch %d', datasplit, batch_offset) synonym_keys, max_synoynm_counts, synonym_value_lens_cum, \ synonym_values_list = construct_synonyms(synonym_filepath) graph_tensor_producer = robust_model.RobustModel(**config_dict) # Use new batch size. graph_tensor_producer.batch_size = batch_size # Overwrite the config originally in the saved checkpoint. logging.info('old delta %f, old num_perturbations: %d', graph_tensor_producer.config['delta'], graph_tensor_producer.config['num_perturbations']) graph_tensor_producer.config['delta'] = delta graph_tensor_producer.config['num_perturbations'] = num_perturbations if max_padded_length > 0: graph_tensor_producer.config['max_padded_length'] = max_padded_length logging.info('new delta %f, num_perturbations: %d, max_padded_length: %d', graph_tensor_producer.config['delta'], graph_tensor_producer.config['num_perturbations'], graph_tensor_producer.config['max_padded_length']) logging.info('graph_tensors.config: %s', graph_tensor_producer.config) graph_tensors = graph_tensor_producer() network_saver = tf.train.Saver(graph_tensor_producer.variables) with tf.train.SingularMonitoredSession() as session: network_saver.restore(session.raw_session(), model_location) for _ in range(batch_offset): # Seek to the correct batch. session.run(graph_tensors[datasplit]['sentiment']) input_feed = {} # Tokenize synonyms. tokenize_synonyms = [[] for _ in range(graph_tensors['vocab_size'])] lookup_indices_keys = session.run(graph_tensors['indices'], feed_dict={graph_tensors['lookup_token']: synonym_keys}) lookup_indices_values = session.run(graph_tensors['indices'], feed_dict={ graph_tensors['lookup_token']: synonym_values_list}) for i, key_index in enumerate(lookup_indices_keys): tokenize_synonyms[key_index] = lookup_indices_values[ synonym_value_lens_cum[i]:synonym_value_lens_cum[i+1]].tolist() synonym_values_np = np.zeros([graph_tensors['vocab_size'], max_synoynm_counts]) for i in range(graph_tensors['vocab_size']): # False-safe case. No perturbations. Set it as itself. synonym_values_np[i][0] = i for j in range(len(tokenize_synonyms[i])): synonym_values_np[i][j] = tokenize_synonyms[i][j] synonym_counts_np = [len(s) for s in tokenize_synonyms] input_feed[graph_tensors['synonym_values']] = synonym_values_np input_feed[graph_tensors['synonym_counts']] = synonym_counts_np total_num_batches = ( graph_tensors['%s_num_examples' % datasplit] // batch_size) if total_num_batches == 0 else total_num_batches total_num_examples = total_num_batches * batch_size logging.info('total number of examples %d', total_num_examples) logging.info('total number of batches %d', total_num_batches) total_correct, total_verified = 0.0, 0.0 for ibatch in range(total_num_batches): results = session.run(graph_tensors[datasplit], input_feed) logging.info('batch: %d, %s bound = %.05f, verified: %.03f,' ' nominally correct: %.03f', ibatch, datasplit, np.mean(results['bound']), np.mean(results['verified']), np.mean(results['correct'])) total_correct += sum(results['correct']) total_verified += sum(results['verified']) total_correct /= total_num_examples total_verified /= total_num_examples logging.info('%s final correct: %.03f, verified: %.03f', datasplit, total_correct, total_verified) logging.info({ 'datasplit': datasplit, 'nominal': total_correct, 'verify': total_verified, 'delta': delta, 'num_perturbations': num_perturbations, 'model_location': model_location, 'final': True }) def main(_): # Read the config file into a new ad-hoc module. with open(FLAGS.config_path, 'r') as config_file: config_code = config_file.read() config_module = imp.new_module('config') exec(config_code, config_module.__dict__) # pylint: disable=exec-used config = config_module.get_config() config_dict = {'task': FLAGS.task, 'batch_size': FLAGS.batch_size, 'pooling': FLAGS.pooling, 'learning_rate': FLAGS.learning_rate, 'config': config, 'embedding_dim': config['embedding_dim'], 'fine_tune_embeddings': FLAGS.fine_tune_embeddings, 'num_oov_buckets': FLAGS.num_oov_buckets, 'max_grad_norm': FLAGS.max_grad_norm} if FLAGS.analysis: logging.info('Analyze model location: %s', config['model_location']) base_batch_offset = 0 analysis(config_dict, config['synonym_filepath'], config['model_location'], FLAGS.batch_size, base_batch_offset, 0, datasplit=FLAGS.analysis_split, delta=config['delta'], num_perturbations=config['num_perturbations'], max_padded_length=config['max_padded_length']) else: checkpoint_path = os.path.join(FLAGS.experiment_root, 'checkpoint') if FLAGS.tensorboard_dir is None: tensorboard_dir = os.path.join(FLAGS.experiment_root, 'tensorboard') else: tensorboard_dir = FLAGS.tensorboard_dir train(config_dict, config['synonym_filepath'], FLAGS.batch_size, num_train_steps=FLAGS.num_train_steps, schedule_ratio=FLAGS.schedule_ratio, report_every=FLAGS.report_every, checkpoint_path=checkpoint_path, tensorboard_dir=tensorboard_dir) if __name__ == '__main__': app.run(main) ================================================ FILE: examples/language/utils.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Utilities for sentence representation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tempfile from absl import logging import sonnet as snt import tensorflow as tf from tensorflow.contrib import lookup as contrib_lookup def get_padded_embeddings(embeddings, vocabulary_table, tokens, batch_size, token_indexes=None): """Reshapes and pads 'raw' word embeddings. Say we have batch of B tokenized sentences, of variable length, with a total of W tokens. For example, B = 2 and W = 3 + 4 = 7: [['The', 'cat', 'eats'], [ 'A', 'black', 'cat', 'jumps']] Since rows have variable length, this cannot be represented as a tf.Tensor. It is represented as a tf.SparseTensor, with 7 values & indexes: indices: [[0,0], [0,1], [0,2], [1,0], [1,1], [1,2], [1,3]] values: ['The', 'cat', 'eats', 'A', 'black', 'cat', 'jumps'] We have also built a vocabulary table: vocabulary table: ['cat', 'The', 'A', 'black', 'eats', 'jumps'] We also have the embeddings, a WxD matrix of floats representing each word in the vocabulary table as a normal tf.Tensor. For example, with D=3, embeddings could be: [[0.4, 0.5, -0.6], # This is the embedding for word 0 = 'cat' [0.1, -0.3, 0.6], # This is the embedding for word 1 = 'The'' [0.7, 0.8, -0.9], # This is the embedding for word 2 = 'A' [-0.1, 0.9, 0.7], # This is the embedding for word 3 = 'black' [-0.2, 0.4, 0.7], # This is the embedding for word 4 = 'eats [0.3, -0.5, 0.2]] # This is the embedding for word 5 = 'jumps' This function builds a normal tf.Tensor containing the embeddings for the tokens provided, in the correct order, with appropriate 0 padding. In our example, the returned tensor would be: [[[0.1, -0.3, 0.6], [0.4, 0.5, -0.6], [-0.2, 0.4, 0.7], [0.0, 0.0, 0.0]], [[0.7, 0.8, -0.9], [-0.1, 0.9, 0.7], [0.4, 0.5, -0.6], [0.3, -0.5, 0.2]]] Note that since the first sentence has only 3 words, the 4th embedding gets replaced by a D-dimensional vector of 0. Args: embeddings: [W, D] Tensor of floats, containing the embeddings, initialized with the same vocabulary file as vocabulary_table. vocabulary_table: a tf.contrib.lookup.LookupInterface, containing the vocabulary, initialized with the same vocabulary file as embeddings. tokens: [B, ?] SparseTensor of strings, the tokens. batch_size: Python integer. token_indexes: A Boolean, indicating whether the input tokens are token ids or string. Returns: [B, L, D] Tensor of floats: the embeddings in the correct order, appropriately padded with 0.0, where L = max(num_tokens) and B = batch_size """ embedding_dim = embeddings.get_shape()[1].value # D in docstring above. num_tokens_in_batch = tf.shape(tokens.indices)[0] # W in the docstring above. max_length = tokens.dense_shape[1] # This is L in the docstring above. # Get indices of tokens in vocabulary_table. if token_indexes is not None: indexes = token_indexes else: indexes = vocabulary_table.lookup(tokens.values) # Get word embeddings. tokens_embeddings = tf.gather(embeddings, indexes) # Shape of the return tensor. new_shape = tf.cast( tf.stack([batch_size, max_length, embedding_dim], axis=0), tf.int32) # Build the vector of indices for the return Tensor. # In the example above, indices_final would be: # [[[0,0,0], [0,0,1], [0,0,2]], # [[0,1,0], [0,1,1], [0,1,2]], # [[0,2,0], [0,2,1], [0,2,2]], # [[1,0,0], [1,0,1], [1,0,2]], # [[1,1,0], [1,1,1], [1,1,2]], # [[1,2,0], [1,2,1], [1,2,2]], # [[1,3,0], [1,3,1], [1,3,2]]] tiled = tf.tile(tokens.indices, [1, embedding_dim]) indices_tiled = tf.cast( tf.reshape(tiled, [num_tokens_in_batch * embedding_dim, 2]), tf.int32) indices_linear = tf.expand_dims( tf.tile(tf.range(0, embedding_dim), [num_tokens_in_batch]), axis=1) indices_final = tf.concat([indices_tiled, indices_linear], axis=1) # Build the dense Tensor. embeddings_padded = tf.sparse_to_dense( sparse_indices=indices_final, output_shape=new_shape, sparse_values=tf.reshape(tokens_embeddings, [num_tokens_in_batch * embedding_dim])) embeddings_padded.set_shape((batch_size, None, embedding_dim)) return embeddings_padded def get_padded_indexes(vocabulary_table, tokens, batch_size, token_indexes=None): """Get the indices of tokens from vocabulary table. Args: vocabulary_table: a tf.contrib.lookup.LookupInterface, containing the vocabulary, initialized with the same vocabulary file as embeddings. tokens: [B, ?] SparseTensor of strings, the tokens. batch_size: Python integer. token_indexes: A Boolean, indicating whether the input tokens are token ids or string. Returns: [B, L] Tensor of integers: indices of tokens in the correct order, appropriately padded with 0, where L = max(num_tokens) and B = batch_size """ num_tokens_in_batch = tf.shape(tokens.indices)[0] max_length = tokens.dense_shape[1] # Get indices of tokens in vocabulary_table. if token_indexes is not None: indexes = token_indexes else: indexes = vocabulary_table.lookup(tokens.values) # Build the dense Tensor. indexes_padded = tf.sparse_to_dense( sparse_indices=tokens.indices, output_shape=[batch_size, max_length], sparse_values=tf.reshape(indexes, [num_tokens_in_batch])) indexes_padded.set_shape((batch_size, None)) return indexes_padded class EmbedAndPad(snt.AbstractModule): """Embed and pad tokenized words. This class primary functionality is similar to get_padded_embeddings. It stores references to the embeddings and vocabulary table for convenience, so that the user does not have to keep and pass them around. """ def __init__(self, batch_size, vocabularies, embedding_dim, num_oov_buckets=1000, fine_tune_embeddings=False, padded_token=None, name='embed_and_pad'): super(EmbedAndPad, self).__init__(name=name) self._batch_size = batch_size vocab_file, vocab_size = get_merged_vocabulary_file(vocabularies, padded_token) self._vocab_size = vocab_size self._num_oov_buckets = num_oov_buckets # Load vocabulary table for index lookup. self._vocabulary_table = contrib_lookup.index_table_from_file( vocabulary_file=vocab_file, num_oov_buckets=num_oov_buckets, vocab_size=self._vocab_size) def create_initializer(initializer_range=0.02): """Creates a `truncated_normal_initializer` with the given range.""" # The default value is chosen from language/bert/modeling.py. return tf.truncated_normal_initializer(stddev=initializer_range) self._embeddings = tf.get_variable('embeddings_matrix', [self._vocab_size + num_oov_buckets, embedding_dim], trainable=fine_tune_embeddings, initializer=create_initializer()) def _build(self, tokens): padded_embeddings = get_padded_embeddings( self._embeddings, self._vocabulary_table, tokens, self._batch_size) return padded_embeddings @property def vocab_table(self): return self._vocabulary_table @property def vocab_size(self): return self._vocab_size + self._num_oov_buckets def get_accuracy(logits, labels): """Top 1 accuracy from logits and labels.""" return tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, labels, 1), tf.float32)) def get_num_correct_predictions(logits, labels): """Get the number of correct predictions over a batch.""" predictions = tf.cast(tf.argmax(logits, axis=1), tf.int64) evals = tf.equal(predictions, labels) num_correct = tf.reduce_sum(tf.cast(evals, tf.float64)) return num_correct def get_merged_vocabulary_file(vocabularies, padded_token=None): """Merges several vocabulary files into one temporary file. The TF object that loads the embedding expects a vocabulary file, to know which embeddings it should load. See tf.contrib.embedding.load_embedding_initializer. When we want to train/test on several datasets simultaneously we need to merge their vocabulary files into a single file. Args: vocabularies: Iterable of vocabularies. Each vocabulary should be a list of tokens. padded_token: If not None, add the padded_token to the first index. Returns: outfilename: Name of the merged file. Contains the union of all tokens in filenames, without duplicates, one token per line. vocabulary_size: Count of tokens in the merged file. """ uniques = [set(vocabulary) for vocabulary in vocabularies] unique_merged = frozenset().union(*uniques) unique_merged_sorted = sorted(unique_merged) if padded_token is not None: # Add padded token as 0 index. unique_merged_sorted = [padded_token] + unique_merged_sorted vocabulary_size = len(unique_merged_sorted) outfile = tempfile.NamedTemporaryFile(delete=False) outfile.write(b'\n'.join(unique_merged_sorted)) outfilename = outfile.name logging.info('Merged vocabulary file with %d tokens: %s', vocabulary_size, outfilename) outfile.close() return outfilename, vocabulary_size ================================================ FILE: examples/train.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Trains a verifiable model on Mnist or CIFAR-10.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from absl import app from absl import flags from absl import logging import interval_bound_propagation as ibp import tensorflow.compat.v1 as tf FLAGS = flags.FLAGS flags.DEFINE_enum('dataset', 'mnist', ['mnist', 'cifar10'], 'Dataset (either "mnist" or "cifar10").') flags.DEFINE_enum('model', 'tiny', ['tiny', 'small', 'medium', 'large'], 'Model size.') flags.DEFINE_string('output_dir', '/tmp/ibp_model', 'Output directory.') # Options. flags.DEFINE_integer('steps', 60001, 'Number of steps in total.') flags.DEFINE_integer('test_every_n', 2000, 'Number of steps between testing iterations.') flags.DEFINE_integer('warmup_steps', 2000, 'Number of warm-up steps.') flags.DEFINE_integer('rampup_steps', 10000, 'Number of ramp-up steps.') flags.DEFINE_integer('batch_size', 200, 'Batch size.') flags.DEFINE_float('epsilon', .3, 'Target epsilon.') flags.DEFINE_float('epsilon_train', .33, 'Train epsilon.') flags.DEFINE_string('learning_rate', '1e-3,1e-4@15000,1e-5@25000', 'Learning rate schedule of the form: ' 'initial_learning_rate[,learning:steps]*. E.g., "1e-3" or ' '"1e-3,1e-4@15000,1e-5@25000".') flags.DEFINE_float('nominal_xent_init', 1., 'Initial weight for the nominal cross-entropy.') flags.DEFINE_float('nominal_xent_final', .5, 'Final weight for the nominal cross-entropy.') flags.DEFINE_float('verified_xent_init', 0., 'Initial weight for the verified cross-entropy.') flags.DEFINE_float('verified_xent_final', .5, 'Final weight for the verified cross-entropy.') flags.DEFINE_float('crown_bound_init', 0., 'Initial weight for mixing the CROWN bound with the IBP ' 'bound in the verified cross-entropy.') flags.DEFINE_float('crown_bound_final', 0., 'Final weight for mixing the CROWN bound with the IBP ' 'bound in the verified cross-entropy.') flags.DEFINE_float('attack_xent_init', 0., 'Initial weight for the attack cross-entropy.') flags.DEFINE_float('attack_xent_final', 0., 'Initial weight for the attack cross-entropy.') def show_metrics(step_value, metric_values, loss_value=None): print('{}: {}nominal accuracy = {:.2f}%, ' 'verified = {:.2f}%, attack = {:.2f}%'.format( step_value, 'loss = {}, '.format(loss_value) if loss_value is not None else '', metric_values.nominal_accuracy * 100., metric_values.verified_accuracy * 100., metric_values.attack_accuracy * 100.)) def layers(model_size): """Returns the layer specification for a given model name.""" if model_size == 'tiny': return ( ('linear', 100), ('activation', 'relu')) elif model_size == 'small': return ( ('conv2d', (4, 4), 16, 'VALID', 2), ('activation', 'relu'), ('conv2d', (4, 4), 32, 'VALID', 1), ('activation', 'relu'), ('linear', 100), ('activation', 'relu')) elif model_size == 'medium': return ( ('conv2d', (3, 3), 32, 'VALID', 1), ('activation', 'relu'), ('conv2d', (4, 4), 32, 'VALID', 2), ('activation', 'relu'), ('conv2d', (3, 3), 64, 'VALID', 1), ('activation', 'relu'), ('conv2d', (4, 4), 64, 'VALID', 2), ('activation', 'relu'), ('linear', 512), ('activation', 'relu'), ('linear', 512), ('activation', 'relu')) elif model_size == 'large': return ( ('conv2d', (3, 3), 64, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 64, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 2), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1), ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1), ('activation', 'relu'), ('linear', 512), ('activation', 'relu')) else: raise ValueError('Unknown model: "{}"'.format(model_size)) def main(unused_args): logging.info('Training IBP on %s...', FLAGS.dataset.upper()) step = tf.train.get_or_create_global_step() # Learning rate. learning_rate = ibp.parse_learning_rate(step, FLAGS.learning_rate) # Dataset. input_bounds = (0., 1.) num_classes = 10 if FLAGS.dataset == 'mnist': data_train, data_test = tf.keras.datasets.mnist.load_data() else: assert FLAGS.dataset == 'cifar10', ( 'Unknown dataset "{}"'.format(FLAGS.dataset)) data_train, data_test = tf.keras.datasets.cifar10.load_data() data_train = (data_train[0], data_train[1].flatten()) data_test = (data_test[0], data_test[1].flatten()) data = ibp.build_dataset(data_train, batch_size=FLAGS.batch_size, sequential=False) if FLAGS.dataset == 'cifar10': data = data._replace(image=ibp.randomize( data.image, (32, 32, 3), expand_shape=(40, 40, 3), crop_shape=(32, 32, 3), vertical_flip=True)) # Base predictor network. original_predictor = ibp.DNN(num_classes, layers(FLAGS.model)) predictor = original_predictor if FLAGS.dataset == 'cifar10': mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1994, 0.2010) predictor = ibp.add_image_normalization(original_predictor, mean, std) if FLAGS.crown_bound_init > 0 or FLAGS.crown_bound_final > 0: logging.info('Using CROWN-IBP loss.') model_wrapper = ibp.crown.VerifiableModelWrapper loss_helper = ibp.crown.create_classification_losses else: model_wrapper = ibp.VerifiableModelWrapper loss_helper = ibp.create_classification_losses predictor = model_wrapper(predictor) # Training. train_losses, train_loss, _ = loss_helper( step, data.image, data.label, predictor, FLAGS.epsilon_train, loss_weights={ 'nominal': { 'init': FLAGS.nominal_xent_init, 'final': FLAGS.nominal_xent_final, 'warmup': FLAGS.verified_xent_init + FLAGS.nominal_xent_init }, 'attack': { 'init': FLAGS.attack_xent_init, 'final': FLAGS.attack_xent_final }, 'verified': { 'init': FLAGS.verified_xent_init, 'final': FLAGS.verified_xent_final, 'warmup': 0. }, 'crown_bound': { 'init': FLAGS.crown_bound_init, 'final': FLAGS.crown_bound_final, 'warmup': 0. }, }, warmup_steps=FLAGS.warmup_steps, rampup_steps=FLAGS.rampup_steps, input_bounds=input_bounds) saver = tf.train.Saver(original_predictor.get_variables()) optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(train_loss, step) # Test using while loop. def get_test_metrics(batch_size, attack_builder=ibp.UntargetedPGDAttack): """Returns the test metrics.""" num_test_batches = len(data_test[0]) // batch_size assert len(data_test[0]) % batch_size == 0, ( 'Test data is not a multiple of batch size.') def cond(i, *unused_args): return i < num_test_batches def body(i, metrics): """Compute the sum of all metrics.""" test_data = ibp.build_dataset(data_test, batch_size=batch_size, sequential=True) predictor(test_data.image, override=True, is_training=False) input_interval_bounds = ibp.IntervalBounds( tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]), tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1])) predictor.propagate_bounds(input_interval_bounds) test_specification = ibp.ClassificationSpecification( test_data.label, num_classes) test_attack = attack_builder(predictor, test_specification, FLAGS.epsilon, input_bounds=input_bounds, optimizer_builder=ibp.UnrolledAdam) test_losses = ibp.Losses(predictor, test_specification, test_attack) test_losses(test_data.label) new_metrics = [] for m, n in zip(metrics, test_losses.scalar_metrics): new_metrics.append(m + n) return i + 1, new_metrics total_count = tf.constant(0, dtype=tf.int32) total_metrics = [tf.constant(0, dtype=tf.float32) for _ in range(len(ibp.ScalarMetrics._fields))] total_count, total_metrics = tf.while_loop( cond, body, loop_vars=[total_count, total_metrics], back_prop=False, parallel_iterations=1) total_count = tf.cast(total_count, tf.float32) test_metrics = [] for m in total_metrics: test_metrics.append(m / total_count) return ibp.ScalarMetrics(*test_metrics) test_metrics = get_test_metrics( FLAGS.batch_size, ibp.UntargetedPGDAttack) summaries = [] for f in test_metrics._fields: summaries.append( tf.summary.scalar(f, getattr(test_metrics, f))) test_summaries = tf.summary.merge(summaries) test_writer = tf.summary.FileWriter(os.path.join(FLAGS.output_dir, 'test')) # Run everything. tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.train.SingularMonitoredSession(config=tf_config) as sess: for _ in range(FLAGS.steps): iteration, loss_value, _ = sess.run( [step, train_losses.scalar_losses.nominal_cross_entropy, train_op]) if iteration % FLAGS.test_every_n == 0: metric_values, summary = sess.run([test_metrics, test_summaries]) test_writer.add_summary(summary, iteration) show_metrics(iteration, metric_values, loss_value=loss_value) saver.save(sess._tf_sess(), # pylint: disable=protected-access os.path.join(FLAGS.output_dir, 'model'), global_step=FLAGS.steps - 1) if __name__ == '__main__': app.run(main) ================================================ FILE: interval_bound_propagation/__init__.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Library to train verifiably robust neural networks. For more details see paper: On the Effectiveness of Interval Bound Propagation for Training Verifiably Robust Models. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from interval_bound_propagation.src.attacks import MemoryEfficientMultiTargetedPGDAttack from interval_bound_propagation.src.attacks import MultiTargetedPGDAttack from interval_bound_propagation.src.attacks import pgd_attack from interval_bound_propagation.src.attacks import RestartedAttack from interval_bound_propagation.src.attacks import UnrolledAdam from interval_bound_propagation.src.attacks import UnrolledFGSMDescent from interval_bound_propagation.src.attacks import UnrolledGradientDescent from interval_bound_propagation.src.attacks import UnrolledSPSAAdam from interval_bound_propagation.src.attacks import UnrolledSPSAFGSMDescent from interval_bound_propagation.src.attacks import UnrolledSPSAGradientDescent from interval_bound_propagation.src.attacks import UntargetedAdaptivePGDAttack from interval_bound_propagation.src.attacks import UntargetedPGDAttack from interval_bound_propagation.src.attacks import UntargetedTop5PGDAttack from interval_bound_propagation.src.bounds import AbstractBounds from interval_bound_propagation.src.bounds import IntervalBounds import interval_bound_propagation.src.crown as crown from interval_bound_propagation.src.fastlin import RelativeSymbolicBounds from interval_bound_propagation.src.fastlin import SymbolicBounds import interval_bound_propagation.src.layer_utils as layer_utils from interval_bound_propagation.src.layers import BatchNorm from interval_bound_propagation.src.layers import ImageNorm from interval_bound_propagation.src.loss import Losses from interval_bound_propagation.src.loss import ScalarLosses from interval_bound_propagation.src.loss import ScalarMetrics from interval_bound_propagation.src.model import DNN from interval_bound_propagation.src.model import StandardModelWrapper from interval_bound_propagation.src.model import VerifiableModelWrapper from interval_bound_propagation.src.relative_bounds import RelativeIntervalBounds from interval_bound_propagation.src.simplex_bounds import SimplexBounds from interval_bound_propagation.src.specification import ClassificationSpecification from interval_bound_propagation.src.specification import LeastLikelyClassificationSpecification from interval_bound_propagation.src.specification import LinearSpecification from interval_bound_propagation.src.specification import RandomClassificationSpecification from interval_bound_propagation.src.specification import Specification from interval_bound_propagation.src.specification import TargetedClassificationSpecification from interval_bound_propagation.src.utils import add_image_normalization from interval_bound_propagation.src.utils import build_dataset from interval_bound_propagation.src.utils import create_attack from interval_bound_propagation.src.utils import create_classification_losses from interval_bound_propagation.src.utils import create_specification from interval_bound_propagation.src.utils import get_attack_builder from interval_bound_propagation.src.utils import linear_schedule from interval_bound_propagation.src.utils import parse_learning_rate from interval_bound_propagation.src.utils import randomize from interval_bound_propagation.src.utils import smooth_schedule from interval_bound_propagation.src.verifiable_wrapper import BatchFlattenWrapper from interval_bound_propagation.src.verifiable_wrapper import BatchNormWrapper from interval_bound_propagation.src.verifiable_wrapper import BatchReshapeWrapper from interval_bound_propagation.src.verifiable_wrapper import ConstWrapper from interval_bound_propagation.src.verifiable_wrapper import ImageNormWrapper from interval_bound_propagation.src.verifiable_wrapper import IncreasingMonotonicWrapper from interval_bound_propagation.src.verifiable_wrapper import LinearConv1dWrapper from interval_bound_propagation.src.verifiable_wrapper import LinearConv2dWrapper from interval_bound_propagation.src.verifiable_wrapper import LinearConvWrapper from interval_bound_propagation.src.verifiable_wrapper import LinearFCWrapper from interval_bound_propagation.src.verifiable_wrapper import ModelInputWrapper from interval_bound_propagation.src.verifiable_wrapper import PiecewiseMonotonicWrapper from interval_bound_propagation.src.verifiable_wrapper import VerifiableWrapper __version__ = '1.10' ================================================ FILE: interval_bound_propagation/src/__init__.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Library to train verifiably robust neural networks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function ================================================ FILE: interval_bound_propagation/src/attacks.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Utilities to define attacks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import collections import six import sonnet as snt import tensorflow.compat.v1 as tf nest = tf.nest @six.add_metaclass(abc.ABCMeta) class UnrolledOptimizer(object): """In graph optimizer to be used in tf.while_loop.""" def __init__(self, colocate_gradients_with_ops=False): self._colocate_gradients_with_ops = colocate_gradients_with_ops @abc.abstractmethod def minimize(self, loss, x, optim_state): """Compute a new value of `x` to minimize `loss`. Args: loss: A scalar Tensor, the value to be minimized. `loss` should be a continuous function of `x` which supports gradients, `loss = f(x)`. x: A list of Tensors, the values to be updated. This is analogous to the `var_list` argument in standard TF Optimizer. optim_state: A (possibly nested) dict, containing any state info needed for the optimizer. Returns: new_x: A list of Tensors, the same length as `x`, which are updated new_optim_state: A new state, with the same structure as `optim_state`, which have been updated. """ @abc.abstractmethod def init_state(self, x): """Returns the initial state of the optimizer. Args: x: A list of Tensors, which will be optimized. Returns: Any structured output. """ class UnrolledGradientDescent(UnrolledOptimizer): """Vanilla gradient descent optimizer.""" _State = collections.namedtuple('State', ['iteration']) # pylint: disable=invalid-name def __init__(self, lr=.1, lr_fn=None, fgsm=False, colocate_gradients_with_ops=False): super(UnrolledGradientDescent, self).__init__( colocate_gradients_with_ops=colocate_gradients_with_ops) self._lr_fn = (lambda i: lr) if lr_fn is None else lr_fn self._fgsm = fgsm def init_state(self, unused_x): return self._State(tf.constant(0, dtype=tf.int64)) def minimize(self, loss, x, optim_state): """Refer to parent class documentation.""" lr = self._lr_fn(optim_state.iteration) grads = self.gradients(loss, x) if self._fgsm: grads = [tf.sign(g) for g in grads] new_x = [None] * len(x) for i in range(len(x)): new_x[i] = x[i] - lr * grads[i] new_optim_state = self._State(optim_state.iteration + 1) return new_x, new_optim_state def gradients(self, loss, x): return tf.gradients( loss, x, colocate_gradients_with_ops=self._colocate_gradients_with_ops) # Syntactic sugar. class UnrolledFGSMDescent(UnrolledGradientDescent): """Identical to UnrolledGradientDescent but forces FGM steps.""" def __init__(self, lr=.1, lr_fn=None, colocate_gradients_with_ops=False): super(UnrolledFGSMDescent, self).__init__( lr, lr_fn, True, colocate_gradients_with_ops) class UnrolledAdam(UnrolledOptimizer): """The Adam optimizer defined in https://arxiv.org/abs/1412.6980.""" _State = collections.namedtuple('State', ['t', 'm', 'u']) # pylint: disable=invalid-name def __init__(self, lr=0.1, lr_fn=None, beta1=0.9, beta2=0.999, epsilon=1e-9, colocate_gradients_with_ops=False): super(UnrolledAdam, self).__init__( colocate_gradients_with_ops=colocate_gradients_with_ops) self._lr_fn = (lambda i: lr) if lr_fn is None else lr_fn self._beta1 = beta1 self._beta2 = beta2 self._epsilon = epsilon def init_state(self, x): return self._State( t=tf.constant(0, dtype=tf.int64), m=[tf.zeros_like(v) for v in x], u=[tf.zeros_like(v) for v in x]) def _apply_gradients(self, grads, x, optim_state): """Applies gradients.""" lr = self._lr_fn(optim_state.t) new_optim_state = self._State( t=optim_state.t + 1, m=[None] * len(x), u=[None] * len(x)) t = tf.cast(new_optim_state.t, tf.float32) new_x = [None] * len(x) for i in range(len(x)): g = grads[i] m_old = optim_state.m[i] u_old = optim_state.u[i] new_optim_state.m[i] = self._beta1 * m_old + (1. - self._beta1) * g new_optim_state.u[i] = self._beta2 * u_old + (1. - self._beta2) * g * g m_hat = new_optim_state.m[i] / (1. - tf.pow(self._beta1, t)) u_hat = new_optim_state.u[i] / (1. - tf.pow(self._beta2, t)) new_x[i] = x[i] - lr * m_hat / (tf.sqrt(u_hat) + self._epsilon) return new_x, new_optim_state def minimize(self, loss, x, optim_state): grads = self.gradients(loss, x) return self._apply_gradients(grads, x, optim_state) def gradients(self, loss, x): return tf.gradients( loss, x, colocate_gradients_with_ops=self._colocate_gradients_with_ops) def _spsa_gradients(loss_fn, x, delta=0.01, num_samples=16, num_iterations=4): """Compute gradient estimates using SPSA. Args: loss_fn: Callable that takes a single argument of shape [batch_size, ...] and returns the loss contribution of each element of the batch as a tensor of shape [batch_size]. x: List of tensors with a single element. We only support computation of the gradient of the loss with respect to x[0]. We take a list as input to keep the same API call as tf.gradients. delta: The gradients are computed by computing the loss within x - delta and x + delta. num_samples: The total number of random samples used to compute the gradient is `num_samples` times `num_iterations`. `num_samples` contributes to the gradient by tiling `x` `num_samples` times. num_iterations: The total number of random samples used to compute the gradient is `num_samples` times `num_iterations`. `num_iterations` contributes to the gradient by iterating using a `tf.while_loop`. Returns: List of tensors with a single element corresponding to the gradient of loss_fn(x[0]) with respect to x[0]. """ if len(x) != 1: raise NotImplementedError('SPSA gradients with respect to multiple ' 'variables is not supported.') # loss_fn takes a single argument. tensor = x[0] def _get_delta(x): return delta * tf.sign( tf.random_uniform(tf.shape(x), minval=-1., maxval=1., dtype=x.dtype)) # Process batch_size samples at a time. def cond(i, *_): return tf.less(i, num_iterations) def loop_body(i, total_grad): """Compute gradient estimate.""" batch_size = tf.shape(tensor)[0] # The tiled tensor has shape [num_samples, batch_size, ...] tiled_tensor = tf.expand_dims(tensor, axis=0) tiled_tensor = tf.tile(tiled_tensor, [num_samples] + [1] * len(tensor.shape)) # The tiled tensor has now shape [2, num_samples, batch_size, ...]. delta = _get_delta(tiled_tensor) tiled_tensor = tf.stack( [tiled_tensor + delta, tiled_tensor - delta], axis=0) # Compute loss with shape [2, num_samples, batch_size]. losses = loss_fn( tf.reshape(tiled_tensor, [2 * num_samples, batch_size] + tensor.shape.as_list()[1:])) losses = tf.reshape(losses, [2, num_samples, batch_size]) # Compute approximate gradient using broadcasting. shape = losses.shape.as_list() + [1] * (len(tensor.shape) - 1) shape = [(s or -1) for s in shape] # Remove None. losses = tf.reshape(losses, shape) g = tf.reduce_mean((losses[0] - losses[1]) / (2. * delta), axis=0) return [i + 1, g / num_iterations + total_grad] _, g = tf.while_loop( cond, loop_body, loop_vars=[tf.constant(0.), tf.zeros_like(tensor)], parallel_iterations=1, back_prop=False) return [g] @six.add_metaclass(abc.ABCMeta) class UnrolledSPSA(object): """Abstract class that represents an optimizer based on SPSA.""" class UnrolledSPSAGradientDescent(UnrolledGradientDescent, UnrolledSPSA): """Optimizer for gradient-free attacks in https://arxiv.org/abs/1802.05666. Gradients estimates are computed using Simultaneous Perturbation Stochastic Approximation (SPSA). """ def __init__(self, lr=0.1, lr_fn=None, fgsm=False, colocate_gradients_with_ops=False, delta=0.01, num_samples=32, num_iterations=4, loss_fn=None): super(UnrolledSPSAGradientDescent, self).__init__( lr, lr_fn, fgsm, colocate_gradients_with_ops) assert num_samples % 2 == 0, 'Number of samples must be even' self._delta = delta self._num_samples = num_samples // 2 # Since we mirror +/- delta later. self._num_iterations = num_iterations assert loss_fn is not None, 'loss_fn must be specified.' self._loss_fn = loss_fn def gradients(self, loss, x): return _spsa_gradients(self._loss_fn, x, self._delta, self._num_samples, self._num_iterations) # Syntactic sugar. class UnrolledSPSAFGSMDescent(UnrolledSPSAGradientDescent): """Identical to UnrolledSPSAGradientDescent but forces FGSM steps.""" def __init__(self, lr=.1, lr_fn=None, colocate_gradients_with_ops=False, delta=0.01, num_samples=32, num_iterations=4, loss_fn=None): super(UnrolledSPSAFGSMDescent, self).__init__( lr, lr_fn, True, colocate_gradients_with_ops, delta, num_samples, num_iterations, loss_fn) class UnrolledSPSAAdam(UnrolledAdam, UnrolledSPSA): """Optimizer for gradient-free attacks in https://arxiv.org/abs/1802.05666. Gradients estimates are computed using Simultaneous Perturbation Stochastic Approximation (SPSA), combined with the ADAM update rule. """ def __init__(self, lr=0.1, lr_fn=None, beta1=0.9, beta2=0.999, epsilon=1e-9, colocate_gradients_with_ops=False, delta=0.01, num_samples=32, num_iterations=4, loss_fn=None): super(UnrolledSPSAAdam, self).__init__(lr, lr_fn, beta1, beta2, epsilon, colocate_gradients_with_ops) assert num_samples % 2 == 0, 'Number of samples must be even' self._delta = delta self._num_samples = num_samples // 2 # Since we mirror +/- delta later. self._num_iterations = num_iterations assert loss_fn is not None, 'loss_fn must be specified.' self._loss_fn = loss_fn def gradients(self, loss, x): return _spsa_gradients(self._loss_fn, x, self._delta, self._num_samples, self._num_iterations) def _is_spsa_optimizer(cls): return issubclass(cls, UnrolledSPSA) def wrap_optimizer(cls, **default_kwargs): """Wraps an optimizer such that __init__ uses the specified kwargs.""" class WrapperUnrolledOptimizer(cls): def __init__(self, *args, **kwargs): new_kwargs = default_kwargs.copy() new_kwargs.update(kwargs) super(WrapperUnrolledOptimizer, self).__init__(*args, **new_kwargs) return WrapperUnrolledOptimizer def _project_perturbation(perturbation, epsilon, input_image, image_bounds): """Project `perturbation` onto L-infinity ball of radius `epsilon`.""" clipped_perturbation = tf.clip_by_value(perturbation, -epsilon, epsilon) new_image = tf.clip_by_value(input_image + clipped_perturbation, image_bounds[0], image_bounds[1]) return new_image - input_image def pgd_attack(loss_fn, input_image, epsilon, num_steps, optimizer=UnrolledGradientDescent(), project_perturbation=_project_perturbation, image_bounds=None, random_init=1.): """Projected gradient descent for generating adversarial images. Args: loss_fn: A callable which takes `input_image` and `label` as arguments, and returns the loss, a scalar Tensor, we will be minimized input_image: Tensor, a batch of images epsilon: float, the L-infinity norm of the maximum allowable perturbation num_steps: int, the number of steps of gradient descent optimizer: An `UnrolledOptimizer` object project_perturbation: A function, which will be used to enforce some constraint. It should have the same signature as `_project_perturbation`. Note that if you use a custom projection function, you should double-check your implementation, since an incorrect implementation will not error, and will appear to work fine. image_bounds: A pair of floats: minimum and maximum pixel value. If None (default), the bounds are assumed to be 0 and 1. random_init: Probability of starting from random location rather than nominal input image. Returns: adversarial version of `input_image`, with L-infinity difference less than epsilon, which tries to minimize loss_fn. """ image_bounds = image_bounds or (0., 1.) random_shape = [tf.shape(input_image)[0]] + [1] * (len(input_image.shape) - 1) use_random_init = tf.cast( tf.random_uniform(random_shape) < float(random_init), tf.float32) init_perturbation = use_random_init * tf.random_uniform( tf.shape(input_image), minval=-epsilon, maxval=epsilon) init_perturbation = project_perturbation(init_perturbation, epsilon, input_image, image_bounds) init_optim_state = optimizer.init_state([init_perturbation]) def loop_body(i, perturbation, flat_optim_state): """Update perturbation to input image.""" optim_state = nest.pack_sequence_as(structure=init_optim_state, flat_sequence=flat_optim_state) loss = loss_fn(input_image + perturbation) new_perturbation_list, new_optim_state = optimizer.minimize( loss, [perturbation], optim_state) projected_perturbation = project_perturbation( new_perturbation_list[0], epsilon, input_image, image_bounds) return i + 1, projected_perturbation, nest.flatten(new_optim_state) def cond(i, *_): return tf.less(i, num_steps) flat_init_optim_state = nest.flatten(init_optim_state) _, final_perturbation, _ = tf.while_loop( cond, loop_body, loop_vars=[tf.constant(0.), init_perturbation, flat_init_optim_state], parallel_iterations=1, back_prop=False) adversarial_image = input_image + final_perturbation return tf.stop_gradient(adversarial_image) @six.add_metaclass(abc.ABCMeta) class Attack(snt.AbstractModule): """Defines an attack as a Sonnet module.""" def __init__(self, predictor, specification, name, predictor_kwargs=None): super(Attack, self).__init__(name=name) self._predictor = predictor self._specification = specification if predictor_kwargs is None: self._kwargs = {'intermediate': {}, 'final': {}} else: self._kwargs = predictor_kwargs self._forced_mode = None self._target_class = None def _eval_fn(self, x, mode='intermediate'): """Runs the logits corresponding to `x`. Args: x: input to the predictor network. mode: Either "intermediate" or "final". Selects the desired predictor arguments. Returns: Tensor of logits. """ if self._forced_mode is not None: mode = self._forced_mode return self._predictor(x, **self._kwargs[mode]) @abc.abstractmethod def _build(self, inputs, labels): """Returns the adversarial attack around inputs.""" @abc.abstractproperty def logits(self): """Returns the logits corresponding to the best attack.""" @abc.abstractproperty def attack(self): """Returns the best attack.""" @abc.abstractproperty def success(self): """Returns whether the attack was successful.""" def force_mode(self, mode): """Only used by RestartedAttack to force the evaluation mode.""" self._forced_mode = mode @property def target_class(self): """Returns the target class if this attack is a targeted attacks.""" return self._target_class @target_class.setter def target_class(self, t): self._target_class = t @six.add_metaclass(abc.ABCMeta) class PGDAttack(Attack): """Defines a PGD attack.""" def __init__(self, predictor, specification, epsilon, lr=.1, lr_fn=None, num_steps=20, num_restarts=1, input_bounds=(0., 1.), random_init=1., optimizer_builder=UnrolledGradientDescent, project_perturbation=_project_perturbation, predictor_kwargs=None): super(PGDAttack, self).__init__(predictor, specification, name='pgd', predictor_kwargs=predictor_kwargs) self._num_steps = num_steps self._num_restarts = num_restarts self._epsilon = epsilon self._lr = lr self._lr_fn = lr_fn self._input_bounds = input_bounds self._random_init = random_init self._optimizer_builder = optimizer_builder self._project_perturbation = project_perturbation # Helper functions. def prepare_inputs(self, inputs): """Tiles inputs according to number of restarts.""" batch_size = tf.shape(inputs)[0] input_shape = list(inputs.shape.as_list()[1:]) duplicated_inputs = tf.expand_dims(inputs, axis=0) # Shape is [num_restarts, batch_size, ...] duplicated_inputs = tf.tile( duplicated_inputs, [self._num_restarts, 1] + [1] * len(input_shape)) # Shape is [num_restarts * batch_size, ...] duplicated_inputs = tf.reshape( duplicated_inputs, [self._num_restarts * batch_size] + input_shape) return batch_size, input_shape, duplicated_inputs def prepare_labels(self, labels): """Tiles labels according to number of restarts.""" return tf.tile(labels, [self._num_restarts]) def find_worst_attack(self, objective_fn, adversarial_input, batch_size, input_shape): """Returns the attack that maximizes objective_fn.""" adversarial_objective = objective_fn(adversarial_input) adversarial_objective = tf.reshape(adversarial_objective, [-1, batch_size]) adversarial_input = tf.reshape(adversarial_input, [-1, batch_size] + input_shape) i = tf.argmax(adversarial_objective, axis=0) j = tf.cast(tf.range(tf.shape(adversarial_objective)[1]), i.dtype) ij = tf.stack([i, j], axis=1) return tf.gather_nd(adversarial_input, ij) def _maximize_margin(bounds): # Bounds has shape [num_restarts, batch_size, num_specs]. return tf.reduce_max(bounds, axis=-1) def _any_greater(bounds): # Bounds has shape [batch_size, num_specs]. bounds = tf.reduce_max(bounds, axis=-1) return bounds > 0. def _maximize_topk_hinge_margin(bounds, k=5, margin=.1): # Bounds has shape [num_restarts, batch_size, num_specs]. b = tf.nn.top_k(bounds, k=k, sorted=False).values return tf.reduce_sum(tf.minimum(b, margin), axis=-1) def _topk_greater(bounds, k=5): # Bounds has shape [batch_size, num_specs]. b = tf.nn.top_k(bounds, k=k, sorted=False).values return tf.reduce_min(b, axis=-1) > 0. class UntargetedPGDAttack(PGDAttack): """Defines an untargeted PGD attack.""" def __init__(self, predictor, specification, epsilon, lr=.1, lr_fn=None, num_steps=20, num_restarts=1, input_bounds=(0., 1.), random_init=1., optimizer_builder=UnrolledGradientDescent, project_perturbation=_project_perturbation, objective_fn=_maximize_margin, success_fn=_any_greater, predictor_kwargs=None): super(UntargetedPGDAttack, self).__init__( predictor, specification, epsilon, lr, lr_fn, num_steps, num_restarts, input_bounds, random_init, optimizer_builder, project_perturbation, predictor_kwargs) self._objective_fn = objective_fn self._success_fn = success_fn def _build(self, inputs, labels): batch_size, input_shape, duplicated_inputs = self.prepare_inputs(inputs) duplicated_labels = self.prepare_labels(labels) # Define objectives. def objective_fn(x): model_logits = self._eval_fn(x) # [restarts * batch_size, output]. model_logits = tf.reshape( model_logits, [self._num_restarts, batch_size, -1]) bounds = self._specification.evaluate(model_logits) # Output has dimension [num_restarts, batch_size]. return self._objective_fn(bounds) # Only used for SPSA. # The input to this loss is the perturbation (not the image). # The first dimension corresponds to the number of SPSA samples. # Shape of perturbations is [num_samples, restarts * batch_size, ...] def spsa_loss_fn(perturbation): """Computes the loss per SPSA sample.""" x = tf.reshape( perturbation + tf.expand_dims(duplicated_inputs, axis=0), [-1] + duplicated_inputs.shape.as_list()[1:]) model_logits = self._eval_fn(x) num_outputs = tf.shape(model_logits)[1] model_logits = tf.reshape( model_logits, [-1, batch_size, num_outputs]) bounds = self._specification.evaluate(model_logits) losses = -self._objective_fn(bounds) return tf.reshape(losses, [-1]) def reduced_loss_fn(x): # Pick worse attack, output has shape [num_restarts, batch_size]. return -tf.reduce_sum(objective_fn(x)) # Use targeted attacks as specified by the specification. if _is_spsa_optimizer(self._optimizer_builder): optimizer = self._optimizer_builder(lr=self._lr, lr_fn=self._lr_fn, loss_fn=spsa_loss_fn) else: optimizer = self._optimizer_builder(lr=self._lr, lr_fn=self._lr_fn) adversarial_input = pgd_attack( reduced_loss_fn, duplicated_inputs, epsilon=self._epsilon, num_steps=self._num_steps, image_bounds=self._input_bounds, random_init=self._random_init, optimizer=optimizer, project_perturbation=self._project_perturbation) adversarial_input = self.adapt(duplicated_inputs, adversarial_input, duplicated_labels) self._attack = self.find_worst_attack(objective_fn, adversarial_input, batch_size, input_shape) self._logits = self._eval_fn(self._attack, mode='final') self._success = self._success_fn(self._specification.evaluate(self._logits)) return self._attack @property def logits(self): self._ensure_is_connected() return self._logits @property def attack(self): self._ensure_is_connected() return self._attack @property def success(self): self._ensure_is_connected() return self._success def adapt(self, original_inputs, adversarial_inputs, labels): """Function called after PGD to adapt adversarial examples.""" return adversarial_inputs class UntargetedTop5PGDAttack(UntargetedPGDAttack): """Defines an untargeted PGD attack on top-5.""" def __init__(self, predictor, specification, epsilon, lr=.1, lr_fn=None, num_steps=20, num_restarts=1, input_bounds=(0., 1.), random_init=1., optimizer_builder=UnrolledGradientDescent, project_perturbation=_project_perturbation, objective_fn=_maximize_topk_hinge_margin, predictor_kwargs=None): super(UntargetedTop5PGDAttack, self).__init__( predictor, specification, epsilon, lr=lr, lr_fn=lr_fn, num_steps=num_steps, num_restarts=num_restarts, input_bounds=input_bounds, random_init=random_init, optimizer_builder=optimizer_builder, project_perturbation=project_perturbation, objective_fn=objective_fn, success_fn=_topk_greater, predictor_kwargs=predictor_kwargs) class UntargetedAdaptivePGDAttack(UntargetedPGDAttack): """Uses an adaptive scheme to pick attacks that are just strong enough.""" def adapt(self, original_inputs, adversarial_inputs, labels): """Runs binary search to find the first misclassified input.""" batch_size = tf.shape(original_inputs)[0] binary_search_iterations = 10 def cond(i, *_): return tf.less(i, binary_search_iterations) def get(m): m = tf.reshape(m, [batch_size] + [1] * (len(original_inputs.shape) - 1)) return (adversarial_inputs - original_inputs) * m + original_inputs def is_attack_successful(m): logits = self._eval_fn(get(m)) return self._success_fn(self._specification.evaluate(logits)) def loop_body(i, lower, upper): m = (lower + upper) * .5 success = is_attack_successful(m) new_lower = tf.where(success, lower, m) new_upper = tf.where(success, m, upper) return i + 1, new_lower, new_upper lower = tf.zeros(shape=[batch_size]) upper = tf.ones(shape=[batch_size]) _, lower, upper = tf.while_loop( cond, loop_body, loop_vars=[tf.constant(0.), lower, upper], parallel_iterations=1, back_prop=False) # If lower is incorrectly classified, pick lower; otherwise pick upper. success = is_attack_successful(lower) return get(tf.where(success, lower, upper)) class MultiTargetedPGDAttack(PGDAttack): """Runs targeted attacks for each specification.""" def __init__(self, predictor, specification, epsilon, lr=.1, lr_fn=None, num_steps=20, num_restarts=1, input_bounds=(0., 1.), random_init=1., optimizer_builder=UnrolledGradientDescent, project_perturbation=_project_perturbation, max_specifications=0, random_specifications=False, predictor_kwargs=None): super(MultiTargetedPGDAttack, self).__init__( predictor, specification, epsilon, lr=lr, lr_fn=lr_fn, num_steps=num_steps, num_restarts=num_restarts, input_bounds=input_bounds, random_init=random_init, optimizer_builder=optimizer_builder, project_perturbation=project_perturbation, predictor_kwargs=predictor_kwargs) self._max_specifications = max_specifications self._random_specifications = random_specifications def _build(self, inputs, labels): batch_size = tf.shape(inputs)[0] num_specs = self._specification.num_specifications if self._max_specifications > 0 and self._max_specifications < num_specs: model_logits = self._eval_fn(inputs) bounds = self._specification.evaluate(model_logits) _, idx = tf.math.top_k(bounds, k=self._max_specifications, sorted=False) if self._random_specifications: idx = tf.random.uniform(shape=tf.shape(idx), maxval=self._specification.num_specifications, dtype=idx.dtype) idx = tf.tile(tf.expand_dims(idx, 0), [self._num_restarts, 1, 1]) select_fn = lambda x: tf.gather(x, idx, batch_dims=len(idx.shape) - 1) else: select_fn = lambda x: x input_shape = list(inputs.shape.as_list()[1:]) duplicated_inputs = tf.expand_dims(inputs, axis=0) # Shape is [num_restarts * num_specifications, batch_size, ...] duplicated_inputs = tf.tile( duplicated_inputs, [self._num_restarts * num_specs, 1] + [1] * len(input_shape)) # Shape is [num_restarts * num_specifications * batch_size, ...] duplicated_inputs = tf.reshape(duplicated_inputs, [-1] + input_shape) def objective_fn(x): # Output has shape [restarts * num_specs * batch_size, output]. model_logits = self._eval_fn(x) model_logits = tf.reshape( model_logits, [self._num_restarts, num_specs, batch_size, -1]) # Output has shape [num_restarts, batch_size, num_specs]. return self._specification.evaluate(model_logits) def reduced_loss_fn(x): # Negate as we minimize. return -tf.reduce_sum(select_fn(objective_fn(x))) # Use targeted attacks as specified by the specification. if _is_spsa_optimizer(self._optimizer_builder): raise ValueError('"UnrolledSPSA*" unsupported in ' 'MultiTargetedPGDAttack') optimizer = self._optimizer_builder(lr=self._lr, lr_fn=self._lr_fn) adversarial_input = pgd_attack( reduced_loss_fn, duplicated_inputs, epsilon=self._epsilon, num_steps=self._num_steps, image_bounds=self._input_bounds, random_init=self._random_init, optimizer=optimizer, project_perturbation=self._project_perturbation) # Get best attack. adversarial_objective = objective_fn(adversarial_input) adversarial_objective = tf.transpose(adversarial_objective, [0, 2, 1]) adversarial_objective = tf.reshape(adversarial_objective, [-1, batch_size]) adversarial_input = tf.reshape(adversarial_input, [-1, batch_size] + input_shape) i = tf.argmax(adversarial_objective, axis=0) j = tf.cast(tf.range(tf.shape(adversarial_objective)[1]), i.dtype) ij = tf.stack([i, j], axis=1) self._attack = tf.gather_nd(adversarial_input, ij) self._logits = self._eval_fn(self._attack, mode='final') # Count the number of sample that violate any specification. bounds = tf.reduce_max(self._specification.evaluate(self._logits), axis=1) self._success = (bounds > 0.) return self._attack @property def logits(self): self._ensure_is_connected() return self._logits @property def attack(self): self._ensure_is_connected() return self._attack @property def success(self): self._ensure_is_connected() return self._success class MemoryEfficientMultiTargetedPGDAttack(PGDAttack): """Defines a targeted PGD attack for each specification using while_loop.""" def __init__(self, predictor, specification, epsilon, lr=.1, lr_fn=None, num_steps=20, num_restarts=1, input_bounds=(0., 1.), random_init=1., optimizer_builder=UnrolledGradientDescent, project_perturbation=_project_perturbation, max_specifications=0, random_specifications=False, predictor_kwargs=None): super(MemoryEfficientMultiTargetedPGDAttack, self).__init__( predictor, specification, epsilon, lr=lr, lr_fn=lr_fn, num_steps=num_steps, num_restarts=num_restarts, input_bounds=input_bounds, random_init=random_init, optimizer_builder=optimizer_builder, project_perturbation=project_perturbation, predictor_kwargs=predictor_kwargs) self._max_specifications = max_specifications self._random_specifications = random_specifications def _build(self, inputs, labels): batch_size, input_shape, duplicated_inputs = self.prepare_inputs(inputs) if (self._max_specifications > 0 and self._max_specifications < self._specification.num_specifications): num_specs = self._max_specifications model_logits = self._eval_fn(inputs) bounds = self._specification.evaluate(model_logits) _, idx = tf.math.top_k(bounds, k=num_specs, sorted=False) if self._random_specifications: idx = tf.random.uniform(shape=tf.shape(idx), maxval=self._specification.num_specifications, dtype=idx.dtype) idx = tf.tile(tf.expand_dims(idx, 0), [self._num_restarts, 1, 1]) def select_fn(x, i): return tf.squeeze( tf.gather(x, tf.expand_dims(idx[:, :, i], -1), batch_dims=len(idx.shape) - 1), axis=-1) else: num_specs = self._specification.num_specifications select_fn = lambda x, i: x[:, :, i] def objective_fn(x): model_logits = self._eval_fn(x) # [restarts * batch_size, output]. model_logits = tf.reshape( model_logits, [self._num_restarts, batch_size, -1]) # Output has dimension [num_restarts, batch_size, num_specifications]. return self._specification.evaluate(model_logits) def flat_objective_fn(x): return _maximize_margin(objective_fn(x)) def build_loss_fn(idx): def _reduced_loss_fn(x): # Pick worse attack, output has shape [num_restarts, batch_size]. return -tf.reduce_sum(select_fn(objective_fn(x), idx)) return _reduced_loss_fn if _is_spsa_optimizer(self._optimizer_builder): raise ValueError('"UnrolledSPSA*" unsupported in ' 'MultiTargetedPGDAttack') optimizer = self._optimizer_builder(lr=self._lr, lr_fn=self._lr_fn) # Run a separate PGD attack for each specification. def cond(spec_idx, unused_attack, success): # If we are already successful, we break. return tf.logical_and(spec_idx < num_specs, tf.logical_not(tf.reduce_all(success))) def body(spec_idx, attack, success): """Runs a separate PGD attack for each specification.""" adversarial_input = pgd_attack( build_loss_fn(spec_idx), duplicated_inputs, epsilon=self._epsilon, num_steps=self._num_steps, image_bounds=self._input_bounds, random_init=self._random_init, optimizer=optimizer, project_perturbation=self._project_perturbation) new_attack = self.find_worst_attack(flat_objective_fn, adversarial_input, batch_size, input_shape) new_logits = self._eval_fn(new_attack) # Count the number of sample that violate any specification. new_success = _any_greater(self._specification.evaluate(new_logits)) # The first iteration always sets the attack and logits. use_new_values = tf.logical_or(tf.equal(spec_idx, 0), new_success) print_op = tf.print('Processed specification #', spec_idx) with tf.control_dependencies([print_op]): new_spec_idx = spec_idx + 1 return (new_spec_idx, tf.where(use_new_values, new_attack, attack), tf.logical_or(success, new_success)) _, self._attack, self._success = tf.while_loop( cond, body, back_prop=False, parallel_iterations=1, loop_vars=[ tf.constant(0, dtype=tf.int32), inputs, tf.zeros([tf.shape(inputs)[0]], dtype=tf.bool), ]) self._logits = self._eval_fn(self._attack, mode='final') return self._attack @property def logits(self): self._ensure_is_connected() return self._logits @property def attack(self): self._ensure_is_connected() return self._attack @property def success(self): self._ensure_is_connected() return self._success class RestartedAttack(Attack): """Wraps an attack to run it multiple times using a tf.while_loop.""" def __init__(self, inner_attack, num_restarts=1): super(RestartedAttack, self).__init__( inner_attack._predictor, # pylint: disable=protected-access inner_attack._specification, # pylint: disable=protected-access name='restarted_' + inner_attack.module_name, predictor_kwargs=inner_attack._kwargs) # pylint: disable=protected-access self._inner_attack = inner_attack self._num_restarts = num_restarts # Prevent the inner attack from updating batch normalization statistics. self._inner_attack.force_mode('intermediate') def _build(self, inputs, labels): def cond(i, unused_attack, success): # If we are already successful, we break. return tf.logical_and(i < self._num_restarts, tf.logical_not(tf.reduce_all(success))) def body(i, attack, success): new_attack = self._inner_attack(inputs, labels) new_success = self._inner_attack.success # The first iteration always sets the attack. use_new_values = tf.logical_or(tf.equal(i, 0), new_success) return (i + 1, tf.where(use_new_values, new_attack, attack), tf.logical_or(success, new_success)) _, self._attack, self._success = tf.while_loop( cond, body, back_prop=False, parallel_iterations=1, loop_vars=[ tf.constant(0, dtype=tf.int32), inputs, tf.zeros([tf.shape(inputs)[0]], dtype=tf.bool), ]) self._logits = self._eval_fn(self._attack, mode='final') return self._attack @property def logits(self): self._ensure_is_connected() return self._logits @property def attack(self): self._ensure_is_connected() return self._attack @property def success(self): self._ensure_is_connected() return self._success ================================================ FILE: interval_bound_propagation/src/bounds.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Definition of input bounds to each layer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import itertools import six import sonnet as snt import tensorflow.compat.v1 as tf @six.add_metaclass(abc.ABCMeta) class AbstractBounds(object): """Abstract bounds class.""" def __init__(self): self._update_cache_op = None @classmethod @abc.abstractmethod def convert(cls, bounds): """Converts another bound type to this type.""" @abc.abstractproperty def shape(self): """Returns shape (as list) of the tensor, including batch dimension.""" def concretize(self): return self def _raise_not_implemented(self, name): raise NotImplementedError( '{} modules are not supported by "{}".'.format( name, self.__class__.__name__)) def apply_linear(self, wrapper, w, b): # pylint: disable=unused-argument self._raise_not_implemented('snt.Linear') def apply_conv1d(self, wrapper, w, b, padding, stride): # pylint: disable=unused-argument self._raise_not_implemented('snt.Conv1D') def apply_conv2d(self, wrapper, w, b, padding, strides): # pylint: disable=unused-argument self._raise_not_implemented('snt.Conv2D') def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): # pylint: disable=unused-argument self._raise_not_implemented(fn.__name__) def apply_piecewise_monotonic_fn(self, wrapper, fn, boundaries, *args): # pylint: disable=unused-argument self._raise_not_implemented(fn.__name__) def apply_batch_norm(self, wrapper, mean, variance, scale, bias, epsilon): # pylint: disable=unused-argument self._raise_not_implemented('ibp.BatchNorm') def apply_batch_reshape(self, wrapper, shape): # pylint: disable=unused-argument self._raise_not_implemented('snt.BatchReshape') def apply_softmax(self, wrapper): # pylint: disable=unused-argument self._raise_not_implemented('tf.nn.softmax') @property def update_cache_op(self): """TF op to update cached bounds for re-use across session.run calls.""" if self._update_cache_op is None: raise ValueError('Bounds not cached: enable_caching() not called.') return self._update_cache_op def enable_caching(self): """Enables caching the bounds for re-use across session.run calls.""" if self._update_cache_op is not None: raise ValueError('Bounds already cached: enable_caching() called twice.') self._update_cache_op = self._set_up_cache() def _set_up_cache(self): """Replace fields with cached versions. Returns: TensorFlow op to update the cache. """ return tf.no_op() # By default, don't cache. def _cache_with_update_op(self, tensor): """Creates non-trainable variable to cache the tensor across sess.run calls. Args: tensor: Tensor to cache. Returns: cached_tensor: Non-trainable variable to contain the cached value of `tensor`. update_op: TensorFlow op to re-evaluate `tensor` and assign the result to `cached_tensor`. """ cached_tensor = tf.get_variable( tensor.name.replace(':', '__') + '_ibp_cache', shape=tensor.shape, dtype=tensor.dtype, trainable=False) update_op = tf.assign(cached_tensor, tensor) return cached_tensor, update_op class IntervalBounds(AbstractBounds): """Axis-aligned bounding box.""" def __init__(self, lower, upper): super(IntervalBounds, self).__init__() self._lower = lower self._upper = upper @property def lower(self): return self._lower @property def upper(self): return self._upper @property def shape(self): return self.lower.shape.as_list() def __iter__(self): yield self.lower yield self.upper @classmethod def convert(cls, bounds): if isinstance(bounds, tf.Tensor): return cls(bounds, bounds) bounds = bounds.concretize() if not isinstance(bounds, cls): raise ValueError('Cannot convert "{}" to "{}"'.format(bounds, cls.__name__)) return bounds def apply_linear(self, wrapper, w, b): return self._affine(w, b, tf.matmul) def apply_conv1d(self, wrapper, w, b, padding, stride): return self._affine(w, b, tf.nn.conv1d, padding=padding, stride=stride) def apply_conv2d(self, wrapper, w, b, padding, strides): return self._affine(w, b, tf.nn.convolution, padding=padding, strides=strides) def _affine(self, w, b, fn, **kwargs): c = (self.lower + self.upper) / 2. r = (self.upper - self.lower) / 2. c = fn(c, w, **kwargs) if b is not None: c = c + b r = fn(r, tf.abs(w), **kwargs) return IntervalBounds(c - r, c + r) def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): args_lower = [self.lower] + [a.lower for a in args] args_upper = [self.upper] + [a.upper for a in args] return IntervalBounds(fn(*args_lower), fn(*args_upper)) def apply_piecewise_monotonic_fn(self, wrapper, fn, boundaries, *args): valid_values = [] for a in [self] + list(args): vs = [] vs.append(a.lower) vs.append(a.upper) for b in boundaries: vs.append( tf.maximum(a.lower, tf.minimum(a.upper, b * tf.ones_like(a.lower)))) valid_values.append(vs) outputs = [] for inputs in itertools.product(*valid_values): outputs.append(fn(*inputs)) outputs = tf.stack(outputs, axis=-1) return IntervalBounds(tf.reduce_min(outputs, axis=-1), tf.reduce_max(outputs, axis=-1)) def apply_batch_norm(self, wrapper, mean, variance, scale, bias, epsilon): # Element-wise multiplier. multiplier = tf.rsqrt(variance + epsilon) if scale is not None: multiplier *= scale w = multiplier # Element-wise bias. b = -multiplier * mean if bias is not None: b += bias b = tf.squeeze(b, axis=0) # Because the scale might be negative, we need to apply a strategy similar # to linear. c = (self.lower + self.upper) / 2. r = (self.upper - self.lower) / 2. c = tf.multiply(c, w) + b r = tf.multiply(r, tf.abs(w)) return IntervalBounds(c - r, c + r) def apply_batch_reshape(self, wrapper, shape): return IntervalBounds(snt.BatchReshape(shape)(self.lower), snt.BatchReshape(shape)(self.upper)) def apply_softmax(self, wrapper): ub = self.upper lb = self.lower # Keep diagonal and take opposite bound for non-diagonals. lbs = tf.matrix_diag(lb) + tf.expand_dims(ub, axis=-2) - tf.matrix_diag(ub) ubs = tf.matrix_diag(ub) + tf.expand_dims(lb, axis=-2) - tf.matrix_diag(lb) # Get diagonal entries after softmax operation. ubs = tf.matrix_diag_part(tf.nn.softmax(ubs)) lbs = tf.matrix_diag_part(tf.nn.softmax(lbs)) return IntervalBounds(lbs, ubs) def _set_up_cache(self): self._lower, update_lower_op = self._cache_with_update_op(self._lower) self._upper, update_upper_op = self._cache_with_update_op(self._upper) return tf.group([update_lower_op, update_upper_op]) ================================================ FILE: interval_bound_propagation/src/crown.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """CROWN-IBP implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections from absl import logging from interval_bound_propagation.src import bounds from interval_bound_propagation.src import fastlin from interval_bound_propagation.src import loss from interval_bound_propagation.src import model from interval_bound_propagation.src import specification as specification_lib from interval_bound_propagation.src import utils from interval_bound_propagation.src import verifiable_wrapper import tensorflow.compat.v1 as tf class BackwardBounds(bounds.AbstractBounds): """Implementation of backward bound propagation used by CROWN.""" def __init__(self, lower, upper): super(BackwardBounds, self).__init__() # Setting "lower" or "upper" to None will avoid creating the computation # graph for CROWN lower or upper bounds. For verifiable training, only the # upper bound is necessary. self._lower = lower self._upper = upper @property def lower(self): return self._lower @property def upper(self): return self._upper @property def shape(self): return self.lower.shape.as_list() def concretize(self): """Returns lower and upper interval bounds.""" lb = ub = None if self.lower is not None: lb = ( tf.einsum('nsi,ni->ns', self._reshape_to_rank(tf.maximum(self.lower.w, 0), 3), self._reshape_to_rank(self.lower.lower, 2)) + tf.einsum('nsi,ni->ns', self._reshape_to_rank(tf.minimum(self.lower.w, 0), 3), self._reshape_to_rank(self.lower.upper, 2))) lb += self.lower.b if self.upper is not None: ub = ( tf.einsum('nsi,ni->ns', self._reshape_to_rank(tf.maximum(self.upper.w, 0), 3), self._reshape_to_rank(self.upper.upper, 2)) + tf.einsum('nsi,ni->ns', self._reshape_to_rank(tf.minimum(self.upper.w, 0), 3), self._reshape_to_rank(self.upper.lower, 2))) ub += self.upper.b return bounds.IntervalBounds(lb, ub) @classmethod def convert(cls, other_bounds): if isinstance(other_bounds, cls): return other_bounds raise RuntimeError('BackwardBounds does not support conversion from any ' 'other bound type.') def apply_linear(self, wrapper, w, b): """Propagate CROWN bounds backward through a linear layer.""" def _linear_propagate(bound): """Propagate one side of the bound.""" new_bound_w = tf.einsum('nsk,lk->nsl', bound.w, w) if b is not None: bias = tf.tensordot(bound.w, b, axes=1) return fastlin.LinearExpression(w=new_bound_w, b=bias + bound.b, lower=wrapper.input_bounds.lower, upper=wrapper.input_bounds.upper) ub_expr = _linear_propagate(self.upper) if self.upper else None lb_expr = _linear_propagate(self.lower) if self.lower else None return BackwardBounds(lb_expr, ub_expr) def apply_conv2d(self, wrapper, w, b, padding, strides): """Propagate CROWN bounds backward through a convolution layer.""" def _conv2d_propagate(bound): """Propagate one side of the bound.""" s = tf.shape(bound.w) # Variable bound.w has shape (batch_size, num_specs, H, W, C), # resize it to (batch_size * num_specs, H, W, C) for batch processing. effective_batch_size = tf.reshape(s[0] * s[1], [1]) batched_shape = tf.concat([effective_batch_size, s[2:]], 0) # The output of a deconvolution is the input shape of the corresponding # convolution. output_shape = wrapper.input_bounds.lower.shape batched_output_shape = tf.concat([effective_batch_size, output_shape[1:]], 0) # Batched transpose convolution for efficiency. bound_batch = tf.nn.conv2d_transpose(tf.reshape(bound.w, batched_shape), filter=w, output_shape=batched_output_shape, strides=[1] + list(strides) + [1], padding=padding) # Reshape results to (batch_size, num_specs, new_H, new_W, new_C). new_shape = tf.concat( [tf.reshape(s[0], [1]), tf.reshape(s[1], [1]), output_shape[1:]], 0) new_bound_w = tf.reshape(bound_batch, new_shape) # If this convolution has bias, multiplies it with current w. bias = 0 if b is not None: # Variable bound.w has dimension (batch_size, num_specs, H, W, C), # accumulate H and W, and do a dot product for each channel C. bias = tf.tensordot(tf.reduce_sum(bound.w, [2, 3]), b, axes=1) return fastlin.LinearExpression(w=new_bound_w, b=bias + bound.b, lower=wrapper.input_bounds.lower, upper=wrapper.input_bounds.upper) ub_expr = _conv2d_propagate(self.upper) if self.upper else None lb_expr = _conv2d_propagate(self.lower) if self.lower else None return BackwardBounds(lb_expr, ub_expr) def _get_monotonic_fn_bound(self, wrapper, fn): """Compute CROWN upper and lower linear bounds for a given function fn.""" # Get lower and upper bounds from forward IBP pass. lb, ub = wrapper.input_bounds.lower, wrapper.input_bounds.upper if fn.__name__ == 'relu': # CROWN upper and lower linear bounds for ReLU. f_lb = tf.minimum(lb, 0) f_ub = tf.maximum(ub, 0) # When both ub and lb are very close to 0 we might have NaN issue, # so we have to avoid this happening. f_ub = tf.maximum(f_ub, f_lb + 1e-8) # CROWN upper/lower scaling matrices and biases. ub_scaling_matrix = f_ub / (f_ub - f_lb) ub_bias = -f_lb * ub_scaling_matrix # Expand dimension for using broadcast later. ub_scaling_matrix = tf.expand_dims(ub_scaling_matrix, 1) lb_scaling_matrix = tf.cast(tf.greater(ub_scaling_matrix, .5), dtype=tf.float32) lb_bias = 0. # For 'apply' fn we need to differentiate them through the wrapper. elif isinstance(wrapper, verifiable_wrapper.ImageNormWrapper): inner_module = wrapper.inner_module ub_scaling_matrix = lb_scaling_matrix = inner_module.scale ub_bias = - inner_module.offset * inner_module.scale lb_bias = ub_bias else: raise NotImplementedError('monotonic fn {} is not supported ' 'by BackwardBounds'.format(fn.__name__)) return ub_scaling_matrix, lb_scaling_matrix, ub_bias, lb_bias def apply_increasing_monotonic_fn(self, wrapper, fn, *args): """Propagate CROWN bounds backward through a increasing monotonic fn.""" # Function _get_monotonic_fn_bound returns matrix and bias term for linear # relaxation. (ub_scaling_matrix, lb_scaling_matrix, ub_bias, lb_bias) = self._get_monotonic_fn_bound(wrapper, fn) def _propagate_monotonic_fn(bound, ub_mult, lb_mult): # Matrix multiplication by a diagonal matrix. new_bound_w = ub_mult * ub_scaling_matrix + lb_mult * lb_scaling_matrix # Matrix vector product for the bias term. ub_bias or lb_bias might be 0 # or a constant, or need broadcast. They will be handled optimally. b = self._matvec(ub_mult, ub_bias) + self._matvec(lb_mult, lb_bias) return fastlin.LinearExpression(w=new_bound_w, b=bound.b + b, lower=wrapper.input_bounds.lower, upper=wrapper.input_bounds.upper) # Multiplies w to upper or lower scaling terms according to its sign. ub_expr = _propagate_monotonic_fn( self.upper, tf.maximum(self.upper.w, 0), tf.minimum(self.upper.w, 0)) if self.upper else None lb_expr = _propagate_monotonic_fn( self.lower, tf.minimum(self.lower.w, 0), tf.maximum(self.lower.w, 0)) if self.lower else None return BackwardBounds(lb_expr, ub_expr) def apply_batch_reshape(self, wrapper, shape): """Propagate CROWN bounds backward through a reshape layer.""" input_shape = wrapper.input_bounds.lower.shape[1:] def _propagate_batch_flatten(bound): new_bound_w = tf.reshape( bound.w, tf.concat([tf.shape(bound.w)[:2], input_shape], 0)) return fastlin.LinearExpression(w=new_bound_w, b=bound.b, lower=wrapper.input_bounds.lower, upper=wrapper.input_bounds.upper) ub_expr = _propagate_batch_flatten(self.upper) if self.upper else None lb_expr = _propagate_batch_flatten(self.lower) if self.lower else None return BackwardBounds(lb_expr, ub_expr) @staticmethod def _reshape_to_rank(a, rank): """Reshapes to the given rank while keeping the first (rank-1) dims.""" shape = tf.concat([tf.shape(a)[0:(rank - 1)], [-1]], axis=-1) return tf.reshape(a, shape) @staticmethod def _matvec(a, b): """Specialized matvec detecting the case where b is 0 or constant.""" if isinstance(b, int) or isinstance(b, float): if b == 0: # For efficiency we directly return constant 0, no graph generated. return 0 else: # Broadcasting a constant. return a * b elif len(b.shape) == 1: # Need to broadcast against all examples in the batch. This can be done # using an einsum "tf.einsum('ns...c,c->ns', a, b)" but it currently # triggers a compiler bug on TPUs, thus we use the following instead. return tf.einsum('nsc,c->ns', tf.reduce_sum(a, [2, 3]), b) else: # Normal 1D or 3D mat-vec product. return tf.einsum('nsi,ni->ns', BackwardBounds._reshape_to_rank(a, 3), BackwardBounds._reshape_to_rank(b, 2)) ScalarMetrics = collections.namedtuple('ScalarMetrics', [ 'nominal_accuracy', # Verified accuracy using pure IBP bounds. 'verified_accuracy', # Verified accuracy using CROWN and IBP mixture. 'crown_ibp_verified_accuracy', 'attack_accuracy', 'attack_success']) ScalarLosses = collections.namedtuple('ScalarLosses', [ 'nominal_cross_entropy', 'attack_cross_entropy', 'verified_loss']) class Losses(loss.Losses): """Helper to compute CROWN-IBP losses.""" def __init__(self, predictor, specification=None, pgd_attack=None, interval_bounds_loss_type='xent', interval_bounds_hinge_margin=10., label_smoothing=0., use_crown_ibp=False, crown_bound_schedule=None): super(Losses, self).__init__(predictor, specification, pgd_attack, interval_bounds_loss_type, interval_bounds_hinge_margin, label_smoothing) self._use_crown_ibp = use_crown_ibp self._crown_bound_schedule = crown_bound_schedule def _get_specification_bounds(self): """Get upper bounds on specification. Used for building verified loss.""" ibp_bounds = self._specification(self._predictor.modules) # Compute verified accuracy using IBP bounds. v = tf.reduce_max(ibp_bounds, axis=1) self._interval_bounds_accuracy = tf.reduce_mean( tf.cast(v <= 0., tf.float32)) # CROWN-IBP bounds. if self._use_crown_ibp: logging.info('CROWN-IBP active') def _build_crown_ibp_bounds(): """Create the computationally expensive CROWN bounds for tf.cond.""" predictor = self._predictor # CROWN is computed backwards so we need to start with a # initial bound related to the specification. init_crown_bounds = create_initial_backward_bounds(self._specification, predictor.modules) # Now propagate the specification matrix layer by layer; # we only need the CROWN upper bound, do not need lower bound. crown_bound = predictor.propagate_bound_backward(init_crown_bounds, compute_upper=True, compute_lower=False) # A linear mixture of the two bounds with a schedule. return self._crown_bound_schedule * crown_bound.upper + \ (1. - self._crown_bound_schedule) * ibp_bounds # If the coefficient for CROWN bound is close to 0, compute IBP only. mixture_bounds = tf.cond(self._crown_bound_schedule < 1e-6, lambda: ibp_bounds, _build_crown_ibp_bounds) v = tf.reduce_max(mixture_bounds, axis=1) self._crown_ibp_accuracy = tf.reduce_mean(tf.cast(v <= 0., tf.float32)) else: mixture_bounds = ibp_bounds self._crown_ibp_accuracy = tf.constant(0.) return mixture_bounds @property def scalar_metrics(self): self._ensure_is_connected() return ScalarMetrics(self._nominal_accuracy, self._interval_bounds_accuracy, self._crown_ibp_accuracy, self._attack_accuracy, self._attack_success) @property def scalar_losses(self): self._ensure_is_connected() return ScalarLosses(self._cross_entropy, self._attack_cross_entropy, self._verified_loss) class VerifiableModelWrapper(model.VerifiableModelWrapper): """Model wrapper with CROWN-IBP backward bound propagation.""" def _propagate(self, current_module, current_bounds): """Propagate CROWN bounds in a backwards manner.""" # Construct bounds for this layer. if isinstance(current_module, verifiable_wrapper.ModelInputWrapper): if current_module.index != 0: raise NotImplementedError('CROWN backpropagation does not support ' 'multiple inputs.') return current_bounds # Propagate the bounds through the current layer. new_bounds = current_module.propagate_bounds(current_bounds) prev_modules = self._module_depends_on[current_module] # We assume that each module only depends on one module. if len(prev_modules) != 1: raise NotImplementedError('CROWN for non-sequential networks is not ' 'implemented.') return self._propagate(prev_modules[0], new_bounds) def propagate_bound_backward(self, initial_bound, compute_upper=True, compute_lower=False): """Propagates CROWN bounds backward through the network. This function assumes that we have obtained bounds for all intermediate layers using IBP. Currently only sequential networks are implemented. Args: initial_bound: A BackwardBounds object containing the initial matrices and biases to start bound propagation. compute_upper: Set to True to construct the computation graph for the CROWN upper bound. For verified training, only the upper bound is needed. Default is True. compute_lower: Set to True to construct the computation graph for the CROWN lower bound. Default is False. Returns: IntervalBound instance corresponding to bounds on the specification. """ if (not compute_upper) and (not compute_lower): raise ValueError('At least one of "compute_upper" or "compute_lower" ' 'needs to be True') self._ensure_is_connected() # We start bound propagation from the logit layer. logit_layer = self._produced_by[self._logits.name] # If only one of ub or lb is needed, we set the unnecessary one to None. ub = initial_bound.upper if compute_upper else None lb = initial_bound.lower if compute_lower else None bound = BackwardBounds(lb, ub) crown_bound = self._propagate(logit_layer, bound) return crown_bound.concretize() def create_initial_backward_bounds(spec, modules): """Create the initial BackwardBounds according to specification.""" last_bounds = bounds.IntervalBounds.convert(modules[-1].input_bounds) if isinstance(spec, specification_lib.ClassificationSpecification): c_correct = tf.expand_dims( tf.one_hot(spec.correct_idx[:, 1], spec.num_specifications + 1), 1) c_wrong = tf.one_hot(spec.wrong_idx[:, :, 1], spec.num_specifications + 1) c = c_wrong - c_correct b = tf.zeros(spec.num_specifications) lb = ub = fastlin.LinearExpression(w=c, b=b, lower=last_bounds.lower, upper=last_bounds.upper) elif isinstance(spec, specification_lib.LinearSpecification): b = spec.d if spec.d is not None else tf.zeros(spec.num_specifications) lb = ub = fastlin.LinearExpression(w=spec.c, b=b, lower=last_bounds.lower, upper=last_bounds.upper) else: raise ValueError('Unknown specification class type "{}"'.format(str(spec))) return BackwardBounds(lb, ub) def create_classification_losses( global_step, inputs, label, predictor_network, epsilon, loss_weights, warmup_steps=0, rampup_steps=-1, input_bounds=(0., 1.), options=None): """Create the training loss for CROWN-IBP.""" def _is_loss_active(init, final, warmup=None): return init > 0. or final > 0. or (warmup is not None and warmup > 0.) if 'crown_bound' in loss_weights: schedule = utils.build_loss_schedule(global_step, warmup_steps, rampup_steps, **loss_weights.get('crown_bound')) use_crown_ibp = _is_loss_active(**loss_weights.get('crown_bound')) else: schedule = None use_crown_ibp = False # Use the loss builder for CROWN-IBP with additional kwargs. def _loss_builder(*args, **kwargs): kwargs.update(dict(use_crown_ibp=use_crown_ibp, crown_bound_schedule=schedule)) return Losses(*args, **kwargs) return utils.create_classification_losses( global_step, inputs, label, predictor_network, epsilon, loss_weights, warmup_steps, rampup_steps, input_bounds, loss_builder=_loss_builder, options=options) ================================================ FILE: interval_bound_propagation/src/fastlin.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Fast-Lin symbolic bound calculation for common neural network layers. The Fast-Lin algorithm expresses lower and upper bounds of each layer of a neural network as a symbolic linear expression in the input neurons, relaxing the ReLU layers to retain linearity at the expense of tightness. Reference: "Towards Fast Computation of Certified Robustness for ReLU Networks", https://arxiv.org/pdf/1804.09699.pdf. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections from absl import logging from interval_bound_propagation.src import bounds as basic_bounds from interval_bound_propagation.src import relative_bounds import sonnet as snt import tensorflow.compat.v1 as tf # Holds the linear expressions serving as bounds. # w: [batch_size, input_size, output_shape] storing the weights. # b: [batch_size, output_shape] storing the bias. # lower: [batch_size, input_size] storing the lower bounds on inputs. # upper: [batch_size, input_size] storing the upper bounds on inputs. # `lower` and `upper` tensors are always flattened representations of the # original inputs. LinearExpression = collections.namedtuple( 'LinearExpression', ['w', 'b', 'lower', 'upper']) class SymbolicBounds(basic_bounds.AbstractBounds): """Fast-Lin bounds (https://arxiv.org/abs/1804.09699).""" def __init__(self, lower, upper): super(SymbolicBounds, self).__init__() self._lower = lower self._upper = upper self._prior_bounds = None self._concretized = None @property def lower(self): return self._lower @property def upper(self): return self._upper @property def shape(self): return self.lower.b.shape.as_list() def concretize(self): """Returns lower and upper interval bounds.""" if self._concretized is None: # Construct once and cache. lb, ub = self._concretize_bounds(self.lower, self.upper) # Apply intersections with prior runs. if self._prior_bounds is not None: lb = tf.maximum(lb, self._prior_bounds.lower) ub = tf.minimum(ub, self._prior_bounds.upper) self._concretized = basic_bounds.IntervalBounds(lb, ub) return self._concretized def with_priors(self, existing_bounds): if existing_bounds is not None: self._prior_bounds = existing_bounds.concretize() # These priors are applied the next time concretize() is called. self._concretized = None return self @classmethod def convert(cls, bounds): if isinstance(bounds, cls): return bounds if isinstance(bounds, tf.Tensor): bounds = basic_bounds.IntervalBounds(bounds, bounds) bounds = bounds.concretize() if not isinstance(bounds, basic_bounds.IntervalBounds): raise ValueError('Cannot convert "{}" to "SymbolicBounds"'.format(bounds)) lower, upper = cls._initial_symbolic_bounds(bounds.lower, bounds.upper) return cls(lower, upper) def apply_linear(self, wrapper, w, b): w_pos = tf.maximum(w, 0) w_neg = tf.minimum(w, 0) lb = self._add_expression( self._scale_expression(self.lower, w_pos), self._scale_expression(self.upper, w_neg) ) lb = self._add_bias(lb, b) ub = self._add_expression( self._scale_expression(self.lower, w_neg), self._scale_expression(self.upper, w_pos) ) ub = self._add_bias(ub, b) return SymbolicBounds(lb, ub).with_priors(wrapper.output_bounds) def apply_conv1d(self, wrapper, w, b, padding, stride): w_pos = tf.maximum(w, 0) w_neg = tf.minimum(w, 0) lb = self._add_expression( self._conv1d_expression(self.lower, w_pos, padding, stride), self._conv1d_expression(self.upper, w_neg, padding, stride)) lb = self._add_bias(lb, b) ub = self._add_expression( self._conv1d_expression(self.upper, w_pos, padding, stride), self._conv1d_expression(self.lower, w_neg, padding, stride)) ub = self._add_bias(ub, b) return SymbolicBounds(lb, ub).with_priors(wrapper.output_bounds) def apply_conv2d(self, wrapper, w, b, padding, strides): w_pos = tf.maximum(w, 0) w_neg = tf.minimum(w, 0) lb = self._add_expression( self._conv2d_expression(self.lower, w_pos, padding, strides), self._conv2d_expression(self.upper, w_neg, padding, strides)) lb = self._add_bias(lb, b) ub = self._add_expression( self._conv2d_expression(self.upper, w_pos, padding, strides), self._conv2d_expression(self.lower, w_neg, padding, strides)) ub = self._add_bias(ub, b) return SymbolicBounds(lb, ub).with_priors(wrapper.output_bounds) def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): if fn.__name__ != 'relu': # Fallback to regular interval bound propagation for unsupported # operations. logging.warn('"%s" is not supported by SymbolicBounds. ' 'Fallback on IntervalBounds.', fn.__name__) interval_bounds = basic_bounds.IntervalBounds.convert(self) converted_args = [basic_bounds.IntervalBounds.convert(b) for b in args] interval_bounds = interval_bounds._increasing_monotonic_fn( # pylint: disable=protected-access fn, *converted_args) return self.convert(interval_bounds) concrete = self.concretize() lb, ub = concrete.lower, concrete.upper is_ambiguous = tf.logical_and(ub > 0, lb < 0) # Ensure denominator is always positive, even when not needed. ambiguous_denom = tf.where(is_ambiguous, ub - lb, tf.ones_like(ub)) scale = tf.where( is_ambiguous, ub / ambiguous_denom, tf.where(lb >= 0, tf.ones_like(lb), tf.zeros_like(lb))) bias = tf.where(is_ambiguous, -lb, tf.zeros_like(lb)) lb_out = LinearExpression( w=tf.expand_dims(scale, 1) * self.lower.w, b=scale * self.lower.b, lower=self.lower.lower, upper=self.lower.upper) ub_out = LinearExpression( w=tf.expand_dims(scale, 1) * self.upper.w, b=scale * (self.upper.b + bias), lower=self.upper.lower, upper=self.upper.upper) return SymbolicBounds(lb_out, ub_out).with_priors(wrapper.output_bounds) def apply_batch_reshape(self, wrapper, shape): return SymbolicBounds(self._batch_reshape_expression(self.lower, shape), self._batch_reshape_expression(self.upper, shape) ).with_priors(wrapper.output_bounds) # Helper methods. @staticmethod def _add_bias(expr, b): """Add bias b to a linear expression.""" if b is None: return expr return LinearExpression(w=expr.w, b=expr.b + b, lower=expr.lower, upper=expr.upper) @staticmethod def _add_expression(expr_a, expr_b): """Add two expression together.""" return LinearExpression(w=expr_a.w + expr_b.w, b=expr_a.b + expr_b.b, lower=expr_a.lower, upper=expr_b.upper) @staticmethod def _scale_expression(expr, w): """Scale a linear expression by w.""" b = tf.matmul(expr.b, w) w = tf.tensordot(expr.w, w, axes=1) return LinearExpression(w=w, b=b, lower=expr.lower, upper=expr.upper) @staticmethod def _conv1d_expression(expr, w, padding, stride): """Scale a linear expression by w (through a convolutional layer).""" b = tf.nn.conv1d(expr.b, w, padding=padding, stride=stride) shape = tf.concat([[tf.reduce_prod(tf.shape(expr.w)[:2])], tf.shape(expr.w)[2:]], axis=0) w = tf.nn.conv1d(tf.reshape(expr.w, shape), w, padding=padding, stride=stride) shape = tf.concat([tf.shape(expr.w)[:2], tf.shape(w)[1:]], axis=0) w = tf.reshape(w, shape) return LinearExpression(w=w, b=b, lower=expr.lower, upper=expr.upper) @staticmethod def _conv2d_expression(expr, w, padding, strides): """Scale a linear expression by w (through a convolutional layer).""" b = tf.nn.convolution(expr.b, w, padding=padding, strides=strides) shape = tf.concat([[tf.reduce_prod(tf.shape(expr.w)[:2])], tf.shape(expr.w)[2:]], axis=0) w = tf.nn.convolution(tf.reshape(expr.w, shape), w, padding=padding, strides=strides) shape = tf.concat([tf.shape(expr.w)[:2], tf.shape(w)[1:]], axis=0) w = tf.reshape(w, shape) return LinearExpression(w=w, b=b, lower=expr.lower, upper=expr.upper) @staticmethod def _batch_reshape_expression(expr, shape): w = snt.BatchReshape(shape, preserve_dims=2)(expr.w) b = snt.BatchReshape(shape)(expr.b) return LinearExpression(w=w, b=b, lower=expr.lower, upper=expr.upper) @staticmethod def _concretize_bounds(lower, upper): """Returns lower and upper interval bounds.""" if len(lower.b.shape) == 2: equation = 'ijk,ij->ik' elif len(lower.b.shape) == 3: equation = 'ijnc,ij->inc' elif len(lower.b.shape) == 4: equation = 'ijhwc,ij->ihwc' else: raise NotImplementedError('Shape unsupported: {}'.format(lower.b.shape)) lb = (tf.einsum(equation, tf.maximum(lower.w, 0), lower.lower) + tf.einsum(equation, tf.minimum(lower.w, 0), lower.upper) + lower.b) ub = (tf.einsum(equation, tf.maximum(upper.w, 0), upper.upper) + tf.einsum(equation, tf.minimum(upper.w, 0), upper.lower) + upper.b) return lb, ub @staticmethod def _initial_symbolic_bounds(lb, ub): """Returns symbolic bounds for the given interval bounds.""" batch_size = tf.shape(lb)[0] input_shape = lb.shape[1:] zero = tf.zeros_like(lb) lb = snt.BatchFlatten()(lb) ub = snt.BatchFlatten()(ub) input_size = tf.shape(lb)[1] output_shape = tf.concat([[input_size], input_shape], axis=0) identity = tf.reshape(tf.eye(input_size), output_shape) identity = tf.expand_dims(identity, 0) identity = tf.tile(identity, [batch_size] + [1] * (len(input_shape) + 1)) expr = LinearExpression(w=identity, b=zero, lower=lb, upper=ub) return expr, expr class RelativeSymbolicBounds(SymbolicBounds): """Relative-to-nominal variant of Fast-Lin bounds.""" def __init__(self, lower_offset, upper_offset, nominal): super(RelativeSymbolicBounds, self).__init__(lower_offset, upper_offset) self._nominal = nominal def concretize(self): """Returns lower and upper interval bounds.""" if self._concretized is None: # Construct once and cache. lb_offset, ub_offset = self._concretize_bounds(self.lower, self.upper) # Apply intersections with prior runs. if self._prior_bounds is not None: lb_offset = tf.maximum(lb_offset, self._prior_bounds.lower_offset) ub_offset = tf.minimum(ub_offset, self._prior_bounds.upper_offset) self._concretized = relative_bounds.RelativeIntervalBounds( lb_offset, ub_offset, self._nominal) return self._concretized @classmethod def convert(cls, bounds): if isinstance(bounds, cls): return bounds if isinstance(bounds, tf.Tensor): bounds = relative_bounds.RelativeIntervalBounds( tf.zeros_like(bounds), tf.zeros_like(bounds), bounds) bounds = bounds.concretize() if not isinstance(bounds, relative_bounds.RelativeIntervalBounds): raise ValueError( 'Cannot convert "{}" to "RelativeSymbolicBounds"'.format(bounds)) lower, upper = cls._initial_symbolic_bounds(bounds.lower_offset, bounds.upper_offset) return cls(lower, upper, bounds.nominal) def apply_linear(self, wrapper, w, b): bounds_out = super(RelativeSymbolicBounds, self).apply_linear( wrapper, w, b=None) nominal_out = tf.matmul(self._nominal, w) if b is not None: nominal_out += b return RelativeSymbolicBounds( bounds_out.lower, bounds_out.upper, nominal_out).with_priors( wrapper.output_bounds) def apply_conv1d(self, wrapper, w, b, padding, stride): bounds_out = super(RelativeSymbolicBounds, self).apply_conv1d( wrapper, w, b=None, padding=padding, stride=stride) nominal_out = tf.nn.conv1d(self._nominal, w, padding=padding, stride=stride) if b is not None: nominal_out += b return RelativeSymbolicBounds( bounds_out.lower, bounds_out.upper, nominal_out).with_priors( wrapper.output_bounds) def apply_conv2d(self, wrapper, w, b, padding, strides): bounds_out = super(RelativeSymbolicBounds, self).apply_conv2d( wrapper, w, b=None, padding=padding, strides=strides) nominal_out = tf.nn.convolution(self._nominal, w, padding=padding, strides=strides) if b is not None: nominal_out += b return RelativeSymbolicBounds( bounds_out.lower, bounds_out.upper, nominal_out).with_priors( wrapper.output_bounds) def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): if fn.__name__ != 'relu': # Fallback to regular interval bound propagation for unsupported # operations. logging.warn('"%s" is not supported by RelativeSymbolicBounds. ' 'Fallback on RelativeIntervalBounds.', fn.__name__) interval_bounds = relative_bounds.RelativeIntervalBounds.convert(self) converted_args = [relative_bounds.RelativeIntervalBounds.convert(b) for b in args] interval_bounds = interval_bounds._increasing_monotonic_fn( # pylint: disable=protected-access fn, *converted_args) return self.convert(interval_bounds) concrete = self.concretize() lb, ub = concrete.lower_offset, concrete.upper_offset is_ambiguous = tf.logical_and(ub > -self._nominal, lb < -self._nominal) # Ensure denominator is always positive, even when not needed. ambiguous_denom = tf.where(is_ambiguous, ub - lb, tf.ones_like(ub)) scale = tf.where( is_ambiguous, (self._nominal + ub) / ambiguous_denom, tf.where(lb >= -self._nominal, tf.ones_like(lb), tf.zeros_like(lb))) scale_complement = tf.where( is_ambiguous, -(self._nominal + lb) / ambiguous_denom, tf.where(lb >= -self._nominal, tf.zeros_like(lb), tf.ones_like(lb))) # Need lb_out.b = scale * (nom_in + lb_in.b) - nom_out # and ub_out.b = scale * (nom_in + ub_in.b - min(nom_in + lb, 0)) - nom_out lower_bias = (scale * (tf.minimum(self._nominal, 0.)) + scale_complement * tf.minimum(-self._nominal, 0.)) upper_bias = (scale * tf.maximum(tf.minimum(-self._nominal, 0.) - lb, tf.minimum(self._nominal, 0.)) + scale_complement * tf.minimum(-self._nominal, 0.)) lb_out = LinearExpression( w=tf.expand_dims(scale, 1) * self.lower.w, b=scale * self.lower.b + lower_bias, lower=self.lower.lower, upper=self.lower.upper) ub_out = LinearExpression( w=tf.expand_dims(scale, 1) * self.upper.w, b=scale * self.upper.b + upper_bias, lower=self.upper.lower, upper=self.upper.upper) nominal_out = tf.nn.relu(self._nominal) return RelativeSymbolicBounds( lb_out, ub_out, nominal_out).with_priors(wrapper.output_bounds) def apply_batch_reshape(self, wrapper, shape): bounds_out = super(RelativeSymbolicBounds, self).apply_batch_reshape( wrapper, shape) nominal_out = snt.BatchReshape(shape)(self._nominal) return RelativeSymbolicBounds( bounds_out.lower, bounds_out.upper, nominal_out).with_priors( wrapper.output_bounds) ================================================ FILE: interval_bound_propagation/src/layer_utils.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Graph construction for dual verification.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from interval_bound_propagation.src import layers import sonnet as snt import tensorflow.compat.v1 as tf def conv_output_shape(input_shape, w, padding, strides): """Calculates the output shape of the given N-D convolution. Args: input_shape: Integer list of length N+1 specifying the non-batch dimensions of the inputs: [input_height, input_width, input_channels]. w: (N+2)D tensor of shape (kernel_height, kernel_width, input_channels, output_channels) containing weights for the convolution. padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. strides: Integer list of length N: `[vertical_stride, horizontal_stride]`. Returns: Integer list of length N+1 specifying the non-batch dimensions of the outputs: [output_height, output_width, output_channels]. Raises: ValueError: if an unsupported convolution dimensionality is encountered. """ # Connect a convolution (never to be run) to infer the output's # spatial structure. dummy_inputs = tf.zeros(dtype=w.dtype, shape=([1] + input_shape)) if len(w.shape) == 4: dummy_outputs = tf.nn.convolution(dummy_inputs, w, padding=padding, strides=strides) elif len(w.shape) == 3: dummy_outputs = tf.nn.conv1d(dummy_inputs, w, padding=padding, stride=strides[0]) else: raise ValueError() return dummy_outputs.shape.as_list()[1:] def materialise_conv(w, b, input_shape, padding, strides): """Converts an N-D convolution to an equivalent linear layer. Args: w: (N+2)D tensor of shape (kernel_height, kernel_width, input_channels, output_channels) containing the convolution weights. b: 1D tensor of shape (output_channels) containing the convolution biases, or `None` if no biases. input_shape: Integer list of length N+1 specifying the non-batch dimensions of the inputs: [input_height, input_width, input_channels]. padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. strides: Integer list of length N: `[vertical_stride, horizontal_stride]`. Returns: w: 2D tensor of shape (input_height * input_width * input_channels, output_height * output_width * output_channels) containing weights. b: 1D tensor of shape (output_height * output_width * output_channels) containing biases, or `None` if no biases. Raises: ValueError: if an unsupported convolution dimensionality is encountered. """ if len(input_shape) == 3: return _materialise_conv2d(w, b, input_shape[0], input_shape[1], padding, strides) elif len(input_shape) == 2: return _materialise_conv1d(w, b, input_shape[0], padding, strides[0]) else: raise ValueError() def _materialise_conv2d(w, b, input_height, input_width, padding, strides): """Converts a convolution to an equivalent linear layer. Args: w: 4D tensor of shape (kernel_height, kernel_width, input_channels, output_channels) containing the convolution weights. b: 1D tensor of shape (output_channels) containing the convolution biases, or `None` if no biases. input_height: height of the input tensor. input_width: width of the input tensor. padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. strides: Integer list of `[vertical_stride, horizontal_stride]`. Returns: w: 2D tensor of shape (input_height * input_width * input_channels, output_height * output_width * output_channels) containing weights. b: 1D tensor of shape (output_height * output_width * output_channels) containing biases, or `None` if no biases. """ kernel_height = w.shape[0].value kernel_width = w.shape[1].value input_channels = w.shape[2].value output_channels = w.shape[3].value # Temporarily move the input_channels dimension to output_channels. w = tf.reshape(w, shape=(kernel_height, kernel_width, 1, input_channels * output_channels)) # Apply the convolution to elementary (i.e. one-hot) inputs. diagonal_input = tf.reshape( tf.eye(input_height * input_width, dtype=w.dtype), shape=[input_height * input_width, input_height, input_width, 1]) conv = tf.nn.convolution( diagonal_input, w, padding=padding, strides=strides) output_height = conv.shape[1].value output_width = conv.shape[2].value # conv is of shape (input_height * input_width, output_height, output_width, # input_channels * output_channels). # Reshape it to (input_height * input_width * input_channels, # output_height * output_width * output_channels). w = tf.reshape(conv, shape=( [input_height * input_width, output_height, output_width, input_channels, output_channels])) w = tf.transpose(w, perm=[0, 3, 1, 2, 4]) w = tf.reshape(w, shape=( [input_height * input_width * input_channels, output_height * output_width * output_channels])) # Broadcast b over spatial dimensions. b = tf.tile(b, [output_height * output_width]) if b is not None else None return w, b def _materialise_conv1d(w, b, input_length, padding, stride): """Converts a convolution to an equivalent linear layer. Args: w: 3D tensor of shape (kernel_length, input_channels, output_channels) containing the convolution weights. b: 1D tensor of shape (output_channels) containing the convolution biases, or `None` if no biases. input_length: length of the input tensor. padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. stride: Integer stride. Returns: w: 2D tensor of shape (input_length * input_channels, output_length * output_channels) containing weights. b: 1D tensor of shape (output_length * output_channels) containing biases, or `None` if no biases. """ kernel_length = w.shape[0].value input_channels = w.shape[1].value output_channels = w.shape[2].value # Temporarily move the input_channels dimension to output_channels. w = tf.reshape(w, shape=(kernel_length, 1, input_channels * output_channels)) # Apply the convolution to elementary (i.e. one-hot) inputs. diagonal_input = tf.reshape( tf.eye(input_length, dtype=w.dtype), shape=[input_length, input_length, 1]) conv = tf.nn.conv1d( diagonal_input, w, padding=padding, stride=stride) output_length = conv.shape[1].value # conv is of shape (input_length, output_length, # input_channels * output_channels). # Reshape it to (input_length * input_channels, # output_length * output_channels). w = tf.reshape(conv, shape=( [input_length, output_length, input_channels, output_channels])) w = tf.transpose(w, perm=[0, 2, 1, 3]) w = tf.reshape(w, shape=( [input_length * input_channels, output_length * output_channels])) # Broadcast b over spatial dimensions. b = tf.tile(b, [output_length]) if b is not None else None return w, b def decode_batchnorm(batchnorm_module): """Calculates the neuron-wise multipliers and biases of the batch norm layer. Note that, in the case of a convolution, the returned bias will have spatial dimensions. Args: batchnorm_module: `snt.BatchNorm` module. Returns: w: 1D tensor of shape (output_size) or 3D tensor of shape (output_height, output_width, output_channels) containing neuron-wise multipliers for the batch norm layer. b: 1D tensor of shape (output_size) or 3D tensor of shape (output_height, output_width, output_channels) containing neuron-wise biases for the batch norm layer. """ if isinstance(batchnorm_module, layers.BatchNorm): mean = batchnorm_module.mean variance = batchnorm_module.variance variance_epsilon = batchnorm_module.epsilon scale = batchnorm_module.scale offset = batchnorm_module.bias else: assert isinstance(batchnorm_module, snt.BatchNorm) mean = batchnorm_module.moving_mean variance = batchnorm_module.moving_variance variance_epsilon = batchnorm_module._eps # pylint: disable=protected-access try: scale = batchnorm_module.gamma except snt.Error: scale = None try: offset = batchnorm_module.beta except snt.Error: offset = None w = tf.rsqrt(variance + variance_epsilon) if scale is not None: w *= scale b = -w * mean if offset is not None: b += offset # Batchnorm vars have a redundant leading dim. w = tf.squeeze(w, axis=0) b = tf.squeeze(b, axis=0) return w, b def combine_with_batchnorm(w, b, batchnorm_module): """Combines a linear layer and a batch norm into a single linear layer. Calculates the weights and biases of the linear layer formed by applying the specified linear layer followed by the batch norm. Note that, in the case of a convolution, the returned bias will have spatial dimensions. Args: w: 2D tensor of shape (input_size, output_size) or 4D tensor of shape (kernel_height, kernel_width, input_channels, output_channels) containing weights for the linear layer. b: 1D tensor of shape (output_size) or (output_channels) containing biases for the linear layer, or `None` if no bias. batchnorm_module: `snt.BatchNorm` module. Returns: w: 2D tensor of shape (input_size, output_size) or 4D tensor of shape (kernel_height, kernel_width, input_channels, output_channels) containing weights for the combined layer. b: 1D tensor of shape (output_size) or 3D tensor of shape (output_height, output_width, output_channels) containing biases for the combined layer. """ if b is None: b = tf.zeros(dtype=w.dtype, shape=()) w_bn, b_bn = decode_batchnorm(batchnorm_module) return w * w_bn, b * w_bn + b_bn ================================================ FILE: interval_bound_propagation/src/layers.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Additional Sonnet modules.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import sonnet as snt import tensorflow.compat.v1 as tf # Slightly altered version of snt.BatchNorm that allows to easily grab which # mean and variance are currently in use (whether the last _build was # invoked with is_training=True or False). # Modifications include: # - Removing fused option (which we do not support). # - Removing test_local_stats (which we do not support). # - Providing a mean and variance property. # - Provides scale, bias properties that return None if there are none. class BatchNorm(snt.BatchNorm): """Batch normalization module, including optional affine transformation.""" def __init__(self, axis=None, offset=True, scale=False, decay_rate=0.999, eps=1e-3, initializers=None, partitioners=None, regularizers=None, update_ops_collection=None, name='batch_norm'): """Constructs a BatchNorm module. See original code for more details.""" super(BatchNorm, self).__init__( axis=axis, offset=offset, scale=scale, decay_rate=decay_rate, eps=eps, initializers=initializers, partitioners=partitioners, regularizers=regularizers, fused=False, update_ops_collection=update_ops_collection, name=name) def _build_statistics(self, input_batch, axis, use_batch_stats, stat_dtype): """Builds the statistics part of the graph when using moving variance.""" self._mean, self._variance = super(BatchNorm, self)._build_statistics( input_batch, axis, use_batch_stats, stat_dtype) return self._mean, self._variance def _build(self, input_batch, is_training=True, test_local_stats=False, reuse=False): """Connects the BatchNorm module into the graph. Args: input_batch: A Tensor of arbitrary dimension. By default, the final dimension is not reduced over when computing the minibatch statistics. is_training: A boolean to indicate if the module should be connected in training mode, meaning the moving averages are updated. Can be a Tensor. test_local_stats: A boolean to indicate if the statistics should be from the local batch. When is_training is True, test_local_stats is not used. reuse: If True, the statistics computed by previous call to _build are used and is_training is ignored. Otherwise, behaves like a normal batch normalization layer. Returns: A tensor with the same shape as `input_batch`. Raises: ValueError: If `axis` is not valid for the input shape or has negative entries. """ if reuse: self._ensure_is_connected() return tf.nn.batch_normalization( input_batch, self._mean, self._variance, self._beta, self._gamma, self._eps, name='batch_norm') else: return super(BatchNorm, self)._build(input_batch, is_training, test_local_stats=test_local_stats) @property def scale(self): self._ensure_is_connected() return tf.stop_gradient(self._gamma) if self._gamma is not None else None @property def bias(self): self._ensure_is_connected() return tf.stop_gradient(self._beta) if self._beta is not None else None @property def mean(self): self._ensure_is_connected() return tf.stop_gradient(self._mean) @property def variance(self): self._ensure_is_connected() return tf.stop_gradient(self._variance) @property def epsilon(self): self._ensure_is_connected() return self._eps class ImageNorm(snt.AbstractModule): """Module that does per channel normalization.""" def __init__(self, mean, std, name='image_norm'): """Constructs a module that does (x[:, :, c] - mean[c]) / std[c].""" super(ImageNorm, self).__init__(name=name) if isinstance(mean, float): mean = [mean] if isinstance(std, float): std = [std] scale = [] for s in std: if s <= 0.: raise ValueError('Cannot use negative standard deviations.') scale.append(1. / s) with self._enter_variable_scope(): # Using broadcasting. self._scale = tf.constant(scale, dtype=tf.float32) self._offset = tf.constant(mean, dtype=tf.float32) def _build(self, inputs): return self.apply(inputs) @property def scale(self): return self._scale @property def offset(self): return self._offset # Provide a function that allows to use the IncreasingMonotonicWrapper. def apply(self, inputs): return (inputs - self._offset) * self._scale ================================================ FILE: interval_bound_propagation/src/loss.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Helper to keep track of the different losses.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import sonnet as snt import tensorflow.compat.v1 as tf # Used to pick the least violated specification. _BIG_NUMBER = 1e25 ScalarMetrics = collections.namedtuple('ScalarMetrics', [ 'nominal_accuracy', 'verified_accuracy', 'attack_accuracy', 'attack_success']) ScalarLosses = collections.namedtuple('ScalarLosses', [ 'nominal_cross_entropy', 'attack_cross_entropy', 'verified_loss']) class Losses(snt.AbstractModule): """Helper to compute our losses.""" def __init__(self, predictor, specification=None, pgd_attack=None, interval_bounds_loss_type='xent', interval_bounds_hinge_margin=10., label_smoothing=0.): super(Losses, self).__init__(name='losses') self._predictor = predictor self._specification = specification self._attack = pgd_attack # Loss type can be any combination of: # xent: cross-entropy loss # hinge: hinge loss # softplus: softplus loss # with # all: using all specifications. # most: using only the specification that is the most violated. # least: using only the specification that is the least violated. # random_n: using a random subset of the specifications. # E.g.: "xent_max" or "hinge_random_3". tokens = interval_bounds_loss_type.split('_', 1) if len(tokens) == 1: loss_type, loss_mode = tokens[0], 'all' else: loss_type, loss_mode = tokens if loss_mode.startswith('random'): loss_mode, num_samples = loss_mode.split('_', 1) self._interval_bounds_loss_n = int(num_samples) if loss_type not in ('xent', 'hinge', 'softplus'): raise ValueError('interval_bounds_loss_type must be either "xent", ' '"hinge" or "softplus".') if loss_mode not in ('all', 'most', 'random', 'least'): raise ValueError('interval_bounds_loss_type must be followed by either ' '"all", "most", "random_N" or "least".') self._interval_bounds_loss_type = loss_type self._interval_bounds_loss_mode = loss_mode self._interval_bounds_hinge_margin = interval_bounds_hinge_margin self._label_smoothing = label_smoothing def _build(self, labels): self._build_nominal_loss(labels) self._build_verified_loss(labels) self._build_attack_loss(labels) def _build_nominal_loss(self, labels): """Build natural cross-entropy loss on clean data.""" # Cross-entropy. nominal_logits = self._predictor.logits if self._label_smoothing > 0: num_classes = nominal_logits.shape[1].value one_hot_labels = tf.one_hot(labels, num_classes) smooth_positives = 1. - self._label_smoothing smooth_negatives = self._label_smoothing / num_classes one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives nominal_cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels=one_hot_labels, logits=nominal_logits) self._one_hot_labels = one_hot_labels else: nominal_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=nominal_logits) self._cross_entropy = tf.reduce_mean(nominal_cross_entropy) # Accuracy. nominal_correct_examples = tf.equal(labels, tf.argmax(nominal_logits, 1)) self._nominal_accuracy = tf.reduce_mean( tf.cast(nominal_correct_examples, tf.float32)) def _get_specification_bounds(self): """Get upper bounds on specification. Used for building verified loss.""" ibp_bounds = self._specification(self._predictor.modules) # Compute verified accuracy using IBP bounds. v = tf.reduce_max(ibp_bounds, axis=1) self._interval_bounds_accuracy = tf.reduce_mean( tf.cast(v <= 0., tf.float32)) return ibp_bounds def _build_verified_loss(self, labels): """Build verified loss using an upper bound on specification.""" if not self._specification: self._verified_loss = tf.constant(0.) self._interval_bounds_accuracy = tf.constant(0.) return # Interval bounds. bounds = self._get_specification_bounds() # Select specifications. if self._interval_bounds_loss_mode == 'all': pass # Keep bounds the way it is. elif self._interval_bounds_loss_mode == 'most': bounds = tf.reduce_max(bounds, axis=1, keepdims=True) elif self._interval_bounds_loss_mode == 'random': idx = tf.random.uniform( [tf.shape(bounds)[0], self._interval_bounds_loss_n], 0, tf.shape(bounds)[1], dtype=tf.int32) bounds = tf.batch_gather(bounds, idx) else: assert self._interval_bounds_loss_mode == 'least' # This picks the least violated contraint. mask = tf.cast(bounds < 0., tf.float32) smallest_violation = tf.reduce_min( bounds + mask * _BIG_NUMBER, axis=1, keepdims=True) has_violations = tf.less( tf.reduce_sum(mask, axis=1, keepdims=True) + .5, tf.cast(tf.shape(bounds)[1], tf.float32)) largest_bounds = tf.reduce_max(bounds, axis=1, keepdims=True) bounds = tf.where(has_violations, smallest_violation, largest_bounds) if self._interval_bounds_loss_type == 'xent': v = tf.concat( [bounds, tf.zeros([tf.shape(bounds)[0], 1], dtype=bounds.dtype)], axis=1) l = tf.concat( [tf.zeros_like(bounds), tf.ones([tf.shape(bounds)[0], 1], dtype=bounds.dtype)], axis=1) self._verified_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.stop_gradient(l), logits=v)) elif self._interval_bounds_loss_type == 'softplus': self._verified_loss = tf.reduce_mean( tf.nn.softplus(bounds + self._interval_bounds_hinge_margin)) else: assert self._interval_bounds_loss_type == 'hinge' self._verified_loss = tf.reduce_mean( tf.maximum(bounds, -self._interval_bounds_hinge_margin)) def _build_attack_loss(self, labels): """Build adversarial loss using PGD attack.""" # PGD attack. if not self._attack: self._attack_accuracy = tf.constant(0.) self._attack_success = tf.constant(1.) self._attack_cross_entropy = tf.constant(0.) return if not isinstance(self._predictor.inputs, tf.Tensor): raise ValueError('Multiple inputs is not supported.') self._attack(self._predictor.inputs, labels) correct_examples = tf.equal(labels, tf.argmax(self._attack.logits, 1)) self._attack_accuracy = tf.reduce_mean( tf.cast(correct_examples, tf.float32)) self._attack_success = tf.reduce_mean( tf.cast(self._attack.success, tf.float32)) if self._label_smoothing > 0: attack_cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels=self._one_hot_labels, logits=self._attack.logits) else: attack_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=self._attack.logits) self._attack_cross_entropy = tf.reduce_mean(attack_cross_entropy) @property def scalar_metrics(self): self._ensure_is_connected() return ScalarMetrics(self._nominal_accuracy, self._interval_bounds_accuracy, self._attack_accuracy, self._attack_success) @property def scalar_losses(self): self._ensure_is_connected() return ScalarLosses(self._cross_entropy, self._attack_cross_entropy, self._verified_loss) ================================================ FILE: interval_bound_propagation/src/model.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Sonnet modules that represent the predictor network.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections from absl import logging from interval_bound_propagation.src import layers from interval_bound_propagation.src import verifiable_wrapper import numpy as np import sonnet as snt import tensorflow.compat.v1 as tf # Set of supported activations. Must be monotonic and attributes of `tf.nn`. _ALLOWED_ACTIVATIONS = set([ 'elu', 'leaky_relu', 'relu', 'relu6', 'selu', 'sigmoid', 'softplus', 'softsign', 'tanh', ]) # Mapping between graph node ops and their TensorFlow function. _MONOTONIC_NODE_OPS = { 'Elu': tf.nn.elu, 'LeakyRelu': tf.nn.leaky_relu, 'Relu': tf.nn.relu, 'Relu6': tf.nn.relu6, 'Selu': tf.nn.selu, 'Sigmoid': tf.nn.sigmoid, 'Softplus': tf.nn.softplus, 'Softsign': tf.nn.softsign, 'Tanh': tf.nn.tanh, } class VerifiableModelWrapper(snt.AbstractModule): """Wraps a predictor network.""" def __init__(self, net_builder, name='verifiable_predictor'): """Constructor for the verifiable model. Args: net_builder: A callable that returns output logits from an input. net_builder must accept two arguments: the input (as the first argument) and is_training (as the second). name: Sonnet module name. """ super(VerifiableModelWrapper, self).__init__(name=name) self._net_builder = net_builder @property def wrapped_network(self): return self._net_builder @property def output_size(self): self._ensure_is_connected() return self._num_classes @property def logits(self): self._ensure_is_connected() return self._logits @property def inputs(self): self._ensure_is_connected() return self._inputs @property def input_wrappers(self): self._ensure_is_connected() return self._model_inputs @property def modules(self): self._ensure_is_connected() return self._modules def dependencies(self, module): self._ensure_is_connected() return self._module_depends_on[module] @property def output_module(self): self._ensure_is_connected() return self._produced_by[self._logits.name] def fanout_of(self, node): """Looks up fan-out for a given node. Args: node: `ibp.VerifiableWrapper` occurring in the network either as an operation, or as the initial input. Returns: Number of times `node` occurs as the input of another operation within the network, or 1 if `node` is the overall output. """ return self._fanouts[node] def _build(self, *z0, **kwargs): """Outputs logits from input z0. Args: *z0: inputs as `Tensor`. **kwargs: Other arguments passed directly to the _build() function of the wrapper model. Assumes the possible presence of `override` (defaults to False). However, if False, this function does not update any internal state and reuses any components computed by a previous call to _build(). If there were no previous calls to _build(), behaves as if it was set to True. Returns: logits resulting from using z0 as inputs. """ override = not self.is_connected if 'override' in kwargs: override = kwargs['override'] or override del kwargs['override'] if override: self._inputs = z0[0] if len(z0) == 1 else z0 # Build underlying verifiable modules. self._model_inputs = [] self._modules = [] self._produced_by = {} # Connection graph. self._fanouts = collections.Counter() for i, z in enumerate(z0): self._model_inputs.append(verifiable_wrapper.ModelInputWrapper(i)) self._produced_by[z.name] = self._model_inputs[-1] self._module_depends_on = collections.defaultdict(list) self._output_by_module = {} with snt.observe_connections(self._observer): logits = self._net_builder(*z0, **kwargs) # Logits might be produced by a non-Sonnet module. self._backtrack(logits, max_depth=100) # Log analysis. for m in self._modules: logging.info('Found: %s', m) output_shape = self._output_by_module[m].shape.as_list()[1:] logging.info(' Output shape: %s => %d units', output_shape, np.prod(output_shape)) for depends in self._module_depends_on[m]: logging.info(' Depends on: %s', depends) logging.info('Final logits produced by: %s', self._produced_by[logits.name]) self._logits = logits self._num_classes = logits.shape[-1].value else: # Must have been connected once before. self._ensure_is_connected() logits = self._net_builder(*z0, **kwargs) return logits def _observer(self, subgraph): input_nodes = self._inputs_for_observed_module(subgraph) if input_nodes is None: # We do not fail as we want to allow higher-level Sonnet components. # In practice, the rest of the logic will fail if we are unable to # connect all low-level modules. logging.warn('Unprocessed module "%s"', str(subgraph.module)) return if subgraph.outputs in input_nodes: # The Sonnet module is just returning its input as its output. # This may happen with a reshape in which the shape does not change. return self._add_module(self._wrapper_for_observed_module(subgraph), subgraph.outputs, *input_nodes) def _inputs_for_observed_module(self, subgraph): """Extracts input tensors from a connected Sonnet module. This default implementation supports common layer types, but should be overridden if custom layer types are to be supported. Args: subgraph: `snt.ConnectedSubGraph` specifying the Sonnet module being connected, and its inputs and outputs. Returns: List of input tensors, or None if not a supported Sonnet module. """ m = subgraph.module # Only support a few operations for now. if not (isinstance(m, snt.BatchReshape) or isinstance(m, snt.Linear) or isinstance(m, snt.Conv1D) or isinstance(m, snt.Conv2D) or isinstance(m, snt.BatchNorm) or isinstance(m, layers.ImageNorm)): return None if isinstance(m, snt.BatchNorm): return subgraph.inputs['input_batch'], else: return subgraph.inputs['inputs'], def _wrapper_for_observed_module(self, subgraph): """Creates a wrapper for a connected Sonnet module. This default implementation supports common layer types, but should be overridden if custom layer types are to be supported. Args: subgraph: `snt.ConnectedSubGraph` specifying the Sonnet module being connected, and its inputs and outputs. Returns: `ibp.VerifiableWrapper` for the Sonnet module. """ m = subgraph.module if isinstance(m, snt.BatchReshape): shape = subgraph.outputs.get_shape()[1:].as_list() return verifiable_wrapper.BatchReshapeWrapper(m, shape) elif isinstance(m, snt.Linear): return verifiable_wrapper.LinearFCWrapper(m) elif isinstance(m, snt.Conv1D): return verifiable_wrapper.LinearConv1dWrapper(m) elif isinstance(m, snt.Conv2D): return verifiable_wrapper.LinearConv2dWrapper(m) elif isinstance(m, layers.ImageNorm): return verifiable_wrapper.ImageNormWrapper(m) else: assert isinstance(m, snt.BatchNorm) return verifiable_wrapper.BatchNormWrapper(m) def _backtrack(self, node, max_depth=100): if node.name not in self._produced_by: if max_depth <= 0: raise ValueError('Unable to backtrack through the graph. ' 'Consider using more basic Sonnet modules.') self._wrap_node(node, max_depth=(max_depth - 1)) self._fanouts[self._produced_by[node.name]] += 1 def _wrap_node(self, node, **kwargs): """Adds an IBP wrapper for the node, and backtracks through its inputs. This default implementation supports common layer types, but should be overridden if custom layer types are to be supported. Implementations should create a `ibp.VerifiableWrapper` and then invoke `self._add_module(wrapper, node, *input_node, **kwargs)`. Args: node: TensorFlow graph node to wrap for IBP. **kwargs: Context to pass to `self._add_module`. """ # Group all unary monotonic ops at the end. if node.op.type in ('Add', 'AddV2', 'Mul', 'Sub', 'Maximum', 'Minimum'): input_node0 = node.op.inputs[0] input_node1 = node.op.inputs[1] if node.op.type in ('Add', 'AddV2'): w = verifiable_wrapper.IncreasingMonotonicWrapper(tf.add) elif node.op.type == 'Mul': w = verifiable_wrapper.PiecewiseMonotonicWrapper(tf.multiply) elif node.op.type == 'Sub': w = verifiable_wrapper.PiecewiseMonotonicWrapper(tf.subtract) elif node.op.type == 'Maximum': w = verifiable_wrapper.IncreasingMonotonicWrapper(tf.maximum) elif node.op.type == 'Minimum': w = verifiable_wrapper.IncreasingMonotonicWrapper(tf.minimum) self._add_module(w, node, input_node0, input_node1, **kwargs) return elif node.op.type == 'ConcatV2': num_inputs = node.op.get_attr('N') assert num_inputs == len(node.op.inputs) - 1 inputs = node.op.inputs[:num_inputs] axis = node.op.inputs[num_inputs] def concat(*args): return tf.concat(args, axis=axis) self._add_module( verifiable_wrapper.IncreasingMonotonicWrapper(concat, axis=axis), node, *inputs, **kwargs) return elif node.op.type == 'Softmax': input_node = node.op.inputs[0] self._add_module(verifiable_wrapper.SoftmaxWrapper(), node, input_node, **kwargs) return elif node.op.type == 'Const': self._add_module(verifiable_wrapper.ConstWrapper(node), node, **kwargs) return # The rest are all unary monotonic ops. parameters = dict() if node.op.type in _MONOTONIC_NODE_OPS: input_node = node.op.inputs[0] # Leaky ReLUs are a special case since they have a second argument. if node.op.type == 'LeakyRelu': parameters = dict(alpha=node.op.get_attr('alpha')) # Use function definition instead of lambda for clarity. def leaky_relu(x): return tf.nn.leaky_relu(x, **parameters) fn = leaky_relu else: fn = _MONOTONIC_NODE_OPS[node.op.type] elif node.op.type in ('Mean', 'Max', 'Sum', 'Min'): # reduce_mean/max have two inputs. The first one should be produced by a # upstream node, while the two one should represent the axis. input_node = node.op.inputs[0] parameters = dict(axis=node.op.inputs[1], keep_dims=node.op.get_attr('keep_dims')) # Use function definition instead of lambda for clarity. def reduce_max(x): return tf.reduce_max(x, **parameters) def reduce_mean(x): return tf.reduce_mean(x, **parameters) def reduce_min(x): return tf.reduce_min(x, **parameters) def reduce_sum(x): return tf.reduce_sum(x, **parameters) fn = dict( Max=reduce_max, Mean=reduce_mean, Sum=reduce_sum, Min=reduce_min)[node.op.type] elif node.op.type == 'ExpandDims': input_node = node.op.inputs[0] parameters = dict(axis=node.op.inputs[1]) def expand_dims(x): return tf.expand_dims(x, **parameters) fn = expand_dims elif node.op.type == 'Transpose': input_node = node.op.inputs[0] parameters = dict(perm=node.op.inputs[1]) def transpose(x): return tf.transpose(x, **parameters) fn = transpose elif node.op.type == 'Squeeze': input_node = node.op.inputs[0] parameters = dict(axis=node.op.get_attr('squeeze_dims')) def squeeze(x): return tf.squeeze(x, **parameters) fn = squeeze elif node.op.type == 'Pad': input_node = node.op.inputs[0] parameters = dict(paddings=node.op.inputs[1]) def pad(x): return tf.pad(x, **parameters) fn = pad elif node.op.type in ('MaxPool', 'AvgPool'): input_node = node.op.inputs[0] parameters = dict( ksize=node.op.get_attr('ksize'), strides=node.op.get_attr('strides'), padding=node.op.get_attr('padding'), data_format=node.op.get_attr('data_format'), ) if node.op.type == 'MaxPool': def max_pool(x): return tf.nn.max_pool(x, **parameters) fn = max_pool elif node.op.type == 'AvgPool': def avg_pool(x): return tf.nn.avg_pool(x, **parameters) fn = avg_pool elif node.op.type == 'Reshape': input_node = node.op.inputs[0] parameters = dict(shape=node.op.inputs[1]) def reshape(x): return tf.reshape(x, **parameters) fn = reshape elif node.op.type == 'Identity': input_node = node.op.inputs[0] def identity(x): return tf.identity(x) fn = identity elif node.op.type == 'MatrixDiag': input_node = node.op.inputs[0] def matrix_diag(x): return tf.matrix_diag(x) fn = matrix_diag elif node.op.type == 'Slice': input_node = node.op.inputs[0] parameters = dict( begin=node.op.inputs[1], size=node.op.inputs[2], ) def regular_slice(x): return tf.slice(x, **parameters) fn = regular_slice elif node.op.type == 'StridedSlice': input_node = node.op.inputs[0] parameters = dict( begin=node.op.inputs[1], end=node.op.inputs[2], strides=node.op.inputs[3], begin_mask=node.op.get_attr('begin_mask'), end_mask=node.op.get_attr('end_mask'), ellipsis_mask=node.op.get_attr('ellipsis_mask'), new_axis_mask=node.op.get_attr('new_axis_mask'), shrink_axis_mask=node.op.get_attr('shrink_axis_mask'), ) def strided_slice(x): return tf.strided_slice(x, **parameters) fn = strided_slice elif node.op.type == 'Fill': input_node = node.op.inputs[1] # Shape is the first argument. dims = node.op.inputs[0] parameters = dict(dims=dims) def fill(x): return tf.fill(dims, x) fn = fill elif node.op.type == 'RealDiv': # The denominator is assumed to be constant but is permitted to be # example-dependent, for example a sequence's length prior to padding. input_node = node.op.inputs[0] denom = node.op.inputs[1] parameters = dict(denom=denom) def quotient(x): return x / denom fn = quotient else: raise NotImplementedError( 'Unsupported operation: "{}" with\n{}.'.format(node.op.type, node.op)) self._add_module( verifiable_wrapper.IncreasingMonotonicWrapper(fn, **parameters), node, input_node, **kwargs) def _add_module(self, wrapper, node, *input_nodes, **kwargs): """Adds the given node wrapper, first backtracking through its inputs. Args: wrapper: `ibp.VerifiableWrapper` for the node. node: TensorFlow graph node. *input_nodes: Input nodes for `node`. **kwargs: Contains the `max_depth` argument for recursive _backtrack call. """ for input_node in input_nodes: self._backtrack(input_node, **kwargs) self._modules.append(wrapper) self._produced_by[node.name] = self._modules[-1] self._module_depends_on[self._modules[-1]].extend( [self._produced_by[input_node.name] for input_node in input_nodes]) self._output_by_module[self._modules[-1]] = node def propagate_bounds(self, *input_bounds): """Propagates input bounds through the network. Args: *input_bounds: `AbstractBounds` instance corresponding to z0. Returns: The final output bounds corresponding to the output logits. """ self._ensure_is_connected() def _get_bounds(input_module): """Retrieves the bounds corresponding to a module.""" # All bounds need to be canonicalized to the same type. In particular, we # need to handle the case of constant bounds specially. We convert them # to the same type as input_bounds. if isinstance(input_module, verifiable_wrapper.ConstWrapper): return input_bounds[0].convert(input_module.output_bounds) return input_module.output_bounds # Initialise inputs' bounds. for model_input in self._model_inputs: model_input.output_bounds = input_bounds[model_input.index] # By construction, this list is topologically sorted. for m in self._modules: # Construct combined input bounds. upstream_bounds = [_get_bounds(b) for b in self._module_depends_on[m]] m.propagate_bounds(*upstream_bounds) # We assume that the last module is the final output layer. return self._produced_by[self._logits.name].output_bounds class StandardModelWrapper(snt.AbstractModule): """Wraps a predictor network that keeps track of inputs and logits.""" def __init__(self, net_builder, name='verifiable_predictor'): """Constructor for a non-verifiable model. This wrapper can be used to seamlessly use loss.py and utils.py without IBP verification. Args: net_builder: A callable that returns output logits from an input. net_builder must accept two arguments: the input (as the first argument) and is_training (as the second). name: Sonnet module name. """ super(StandardModelWrapper, self).__init__(name=name) self._net_builder = net_builder @property def wrapped_network(self): return self._net_builder @property def output_size(self): self._ensure_is_connected() return self._num_classes @property def logits(self): self._ensure_is_connected() return self._logits @property def inputs(self): self._ensure_is_connected() return self._inputs @property def modules(self): raise RuntimeError('Model is not wrapped by a VerifiableModelWrapper. ' 'Bounds cannot be propagated.') def propagate_bounds(self, *input_bounds): raise RuntimeError('Model is not wrapped by a VerifiableModelWrapper. ' 'Bounds cannot be propagated.') def _build(self, *z0, **kwargs): """Outputs logits from input z0. Args: *z0: inputs as `Tensor`. **kwargs: Other arguments passed directly to the _build() function of the wrapper model. Assumes the possible presence of `override` (defaults to False). However, if False, this function does not update any internal state and reuses any components computed by a previous call to _build(). If there were no previous calls to _build(), behaves as if it was set to True. Returns: logits resulting from using z0 as inputs. """ override = not self.is_connected if 'override' in kwargs: override = kwargs['override'] or override del kwargs['override'] if override: self._inputs = z0[0] if len(z0) == 1 else z0 logits = self._net_builder(*z0, **kwargs) self._logits = logits self._num_classes = logits.shape[-1].value else: # Must have been connected once before. self._ensure_is_connected() logits = self._net_builder(*z0, **kwargs) return logits class DNN(snt.AbstractModule): """Simple feed-forward neural network.""" def __init__(self, num_classes, layer_types, l2_regularization_scale=0., name='predictor'): """Constructor for the DNN. Args: num_classes: Output size. layer_types: Iterable of tuples. Each tuple must be one of the following: * ('conv2d', (kernel_height, width), channels, padding, stride) * ('linear', output_size) * ('batch_normalization',) * ('activation', activation) Convolutional layers must precede all linear layers. l2_regularization_scale: Scale of the L2 regularization on the weights of each layer. name: Sonnet module name. """ super(DNN, self).__init__(name=name) self._layer_types = list(layer_types) self._layer_types.append(('linear', num_classes)) if l2_regularization_scale > 0.: regularizer = tf.keras.regularizers.l2(l=0.5*l2_regularization_scale) self._regularizers = {'w': regularizer} else: self._regularizers = None # The following allows to reuse previous batch norm statistics. self._batch_norms = {} def _build(self, z0, is_training=True, test_local_stats=False, reuse=False): """Outputs logits.""" zk = z0 conv2d_id = 0 linear_id = 0 name = None for spec in self._layer_types: if spec[0] == 'conv2d': if linear_id > 0: raise ValueError('Convolutional layers must precede fully connected ' 'layers.') name = 'conv2d_{}'.format(conv2d_id) conv2d_id += 1 (_, (kernel_height, kernel_width), channels, padding, stride) = spec m = snt.Conv2D(output_channels=channels, kernel_shape=(kernel_height, kernel_width), padding=padding, stride=stride, use_bias=True, regularizers=self._regularizers, initializers=_create_conv2d_initializer( zk.get_shape().as_list()[1:], channels, (kernel_height, kernel_width)), name=name) zk = m(zk) elif spec[0] == 'linear': must_flatten = (linear_id == 0 and len(zk.shape) > 2) if must_flatten: zk = snt.BatchFlatten()(zk) name = 'linear_{}'.format(linear_id) linear_id += 1 output_size = spec[1] m = snt.Linear(output_size, regularizers=self._regularizers, initializers=_create_linear_initializer( np.prod(zk.get_shape().as_list()[1:]), output_size), name=name) zk = m(zk) elif spec[0] == 'batch_normalization': if name is None: raise ValueError('Batch normalization only supported after linear ' 'layers.') name += '_batch_norm' m = layers.BatchNorm(name=name) if reuse: if m.scope_name not in self._batch_norms: raise ValueError('Cannot set reuse to True without connecting the ' 'module once before.') m = self._batch_norms[m.scope_name] else: self._batch_norms[m.scope_name] = m zk = m(zk, is_training=is_training, test_local_stats=test_local_stats, reuse=reuse) elif spec[0] == 'activation': if spec[1] not in _ALLOWED_ACTIVATIONS: raise NotImplementedError( 'Only the following activations are supported {}'.format( list(_ALLOWED_ACTIVATIONS))) name = None m = getattr(tf.nn, spec[1]) zk = m(zk) return zk def _create_conv2d_initializer( input_shape, output_channels, kernel_shape, dtype=tf.float32): # pylint: disable=unused-argument """Returns a default initializer for the weights of a convolutional module.""" return { 'w': tf.orthogonal_initializer(), 'b': tf.zeros_initializer(dtype=dtype), } def _create_linear_initializer(input_size, output_size, dtype=tf.float32): # pylint: disable=unused-argument """Returns a default initializer for the weights of a linear module.""" return { 'w': tf.orthogonal_initializer(), 'b': tf.zeros_initializer(dtype=dtype), } ================================================ FILE: interval_bound_propagation/src/relative_bounds.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Interval bounds expressed relative to a nominal value.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from interval_bound_propagation.src import bounds as basic_bounds import sonnet as snt import tensorflow.compat.v1 as tf class RelativeIntervalBounds(basic_bounds.AbstractBounds): """Upper and lower bounds, as a delta relative to nominal values.""" def __init__(self, lower_offset, upper_offset, nominal): super(RelativeIntervalBounds, self).__init__() self._lower_offset = lower_offset self._upper_offset = upper_offset self._nominal = nominal @property def lower_offset(self): """Returns lower bounds, expressed relative to nominal values.""" return self._lower_offset @property def upper_offset(self): """Returns upper bounds, expressed relative to nominal values.""" return self._upper_offset @property def nominal(self): return self._nominal @property def lower(self): """Returns absolute lower bounds.""" return self.nominal + self.lower_offset @property def upper(self): """Returns absolute upper bounds.""" return self.nominal + self.upper_offset @property def shape(self): return self.lower_offset.shape.as_list() @classmethod def convert(cls, bounds): if isinstance(bounds, tf.Tensor): return cls(tf.zeros_like(bounds), tf.zeros_like(bounds), bounds) bounds = bounds.concretize() if not isinstance(bounds, cls): raise ValueError('Cannot convert "{}" to "{}"'.format(bounds, cls.__name__)) return bounds def apply_batch_reshape(self, wrapper, shape): """Propagates the bounds through a reshape. Args: wrapper: Contains prior bounds from a previous iteration. shape: output shape, excluding the batch dimension. Returns: Output bounds. """ reshape = snt.BatchReshape(shape) return RelativeIntervalBounds( reshape(self.lower_offset), reshape(self.upper_offset), reshape(self.nominal)) def apply_linear(self, wrapper, w, b): """Propagates the bounds through a linear layer. Args: wrapper: Contains prior bounds from a previous iteration. w: 2D tensor of shape (input_size, output_size) containing weights for the linear layer. b: 1D tensor of shape (output_size) containing biases for the linear layer, or `None` if no bias. Returns: Output bounds. """ w_pos = tf.maximum(w, 0) w_neg = tf.minimum(w, 0) lb = (tf.matmul(self.lower_offset, w_pos) + tf.matmul(self.upper_offset, w_neg)) ub = (tf.matmul(self.upper_offset, w_pos) + tf.matmul(self.lower_offset, w_neg)) nominal_out = tf.matmul(self.nominal, w) if b is not None: nominal_out += b return RelativeIntervalBounds(lb, ub, nominal_out) def apply_conv1d(self, wrapper, w, b, padding, stride): """Propagates the bounds through a 1D convolution layer. Args: wrapper: Contains prior bounds from a previous iteration. w: 3D tensor of shape (kernel_length, input_channels, output_channels) containing weights for the convolution. b: 1D tensor of shape (output_channels) containing biases for the convolution, or `None` if no bias. padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. stride: Integer stride. Returns: Output bounds. """ w_pos = tf.maximum(w, 0) w_neg = tf.minimum(w, 0) lb = (tf.nn.conv1d(self.lower_offset, w_pos, padding=padding, stride=stride) + tf.nn.conv1d(self.upper_offset, w_neg, padding=padding, stride=stride)) ub = (tf.nn.conv1d(self.upper_offset, w_pos, padding=padding, stride=stride) + tf.nn.conv1d(self.lower_offset, w_neg, padding=padding, stride=stride)) nominal_out = tf.nn.conv1d(self.nominal, w, padding=padding, stride=stride) if b is not None: nominal_out += b return RelativeIntervalBounds(lb, ub, nominal_out) def apply_conv2d(self, wrapper, w, b, padding, strides): """Propagates the bounds through a 2D convolution layer. Args: wrapper: Contains prior bounds from a previous iteration. w: 4D tensor of shape (kernel_height, kernel_width, input_channels, output_channels) containing weights for the convolution. b: 1D tensor of shape (output_channels) containing biases for the convolution, or `None` if no bias. padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. strides: Integer list of length N: `[vertical_stride, horizontal_stride]`. Returns: Output bounds. """ w_pos = tf.maximum(w, 0) w_neg = tf.minimum(w, 0) lb = (tf.nn.convolution(self.lower_offset, w_pos, padding=padding, strides=strides) + tf.nn.convolution(self.upper_offset, w_neg, padding=padding, strides=strides)) ub = (tf.nn.convolution(self.upper_offset, w_pos, padding=padding, strides=strides) + tf.nn.convolution(self.lower_offset, w_neg, padding=padding, strides=strides)) nominal_out = tf.nn.convolution(self.nominal, w, padding=padding, strides=strides) if b is not None: nominal_out += b return RelativeIntervalBounds(lb, ub, nominal_out) def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): """Propagates the bounds through a non-linear activation layer or `add` op. Args: wrapper: Contains prior bounds from a previous iteration. fn: String specifying non-linear activation function. May be one of: sig, relu, tanh, elu, leaky_relu. Anything else denotes identity. *args: Other inputs' bounds, for a multi-input node (e.g. Add). **parameters: Optional parameters if activation is parameterised, e.g. `{'alpha': 0.2}` for leaky ReLu. Returns: Output bounds. """ if fn.__name__ in ('add', 'reduce_mean', 'reduce_sum', 'avg_pool'): return RelativeIntervalBounds( fn(self.lower_offset, *[bounds.lower_offset for bounds in args]), fn(self.upper_offset, *[bounds.upper_offset for bounds in args]), fn(self.nominal, *[bounds.nominal for bounds in args])) else: assert not args, 'unary function expected' nominal_out = fn(self.nominal) if fn.__name__ == 'reduce_max': lb, ub = _maxpool_bounds(fn, None, None, self.lower_offset, self.upper_offset, nominal_in=self.nominal, nominal_out=nominal_out) elif fn.__name__ == 'max_pool': lb, ub = _maxpool_bounds(fn, parameters['ksize'][1:-1], parameters['strides'][1:-1], self.lower_offset, self.upper_offset, nominal_in=self.nominal, nominal_out=nominal_out) else: lb, ub = _activation_bounds(fn, self.lower_offset, self.upper_offset, nominal_in=self.nominal, parameters=parameters) return RelativeIntervalBounds(lb, ub, nominal_out) def apply_batch_norm(self, wrapper, mean, variance, scale, bias, epsilon): """Propagates the bounds through a batch norm layer. Args: wrapper: Contains prior bounds from a previous iteration. mean: Learnt batch mean. variance: Learnt batch variance. scale: Trained component-wise scale variable. bias: Trained component-wise bias variable. epsilon: Epsilon for avoiding instability when `variance` is very small. Returns: Output bounds. """ lb = tf.nn.batch_normalization(self.lower_offset, tf.zeros_like(mean), variance, None, scale, epsilon) ub = tf.nn.batch_normalization(self.upper_offset, tf.zeros_like(mean), variance, None, scale, epsilon) # It's just possible that the batchnorm's scale is negative. lb, ub = tf.minimum(lb, ub), tf.maximum(lb, ub) nominal_out = tf.nn.batch_normalization(self.nominal, mean, variance, bias, scale, epsilon) return RelativeIntervalBounds(lb, ub, nominal_out) def _set_up_cache(self): self._lower_offset, update_lower = self._cache_with_update_op( self._lower_offset) self._upper_offset, update_upper = self._cache_with_update_op( self._upper_offset) return tf.group([update_lower, update_upper]) def _maxpool_bounds(module, kernel_shape, strides, lb_in, ub_in, nominal_in, nominal_out): """Calculates naive bounds on output of an N-D max pool layer. Args: module: Callable for max-pool operation. kernel_shape: Integer list of `[kernel_height, kernel_width]`, or `None` to aggregate over the layer`s entire spatial extent. strides: Integer list of `[vertical_stride, horizontal_stride]`. lb_in: (N+2)D tensor of shape (batch_size, input_height, input_width, layer_channels) containing lower bounds on the inputs to the max pool layer. ub_in: (N+2)D tensor of shape (batch_size, input_height, input_width, layer_channels) containing upper bounds on the inputs to the max pool layer. nominal_in: (N+2)D tensor of shape (batch_size, input_height, input_width, layer_channels) containing nominal input values. Inputs bounds are interpreted relative to this. nominal_out: (N+2)D tensor of shape (batch_size, output_height,output_width, layer_channels) containing nominal input values. The returned output bounds are expressed relative to this. Returns: lb_out: (N+2)D tensor of shape (batch_size, output_height, output_width, layer_channels) with lower bounds on the outputs of the max pool layer. ub_out: (N+2)D tensor of shape (batch_size, output_height, output_width, layer_channels) with upper bounds on the outputs of the max pool layer. """ if kernel_shape is None: nominal_out = tf.reduce_max(nominal_in, axis=list(range(1, nominal_in.shape.ndims-1)), keepdims=True) return (module((nominal_in - nominal_out) + lb_in), module((nominal_in - nominal_out) + ub_in)) else: # Must perform the max on absolute bounds, as the kernels may overlap. # TODO(stanforth) investigate a more numerically stable implementation del strides return (module(nominal_in + lb_in) - nominal_out, module(nominal_in + ub_in) - nominal_out) def _activation_bounds(nl_fun, lb_in, ub_in, nominal_in, parameters=None): """Calculates naive bounds on output of an activation layer. Inputs bounds are interpreted relative to `nominal_in`, and the returned output bounds are expressed relative to `nominal_out=nl(nominal_in)`. Args: nl_fun: Callable implementing the activation function itself. lb_in: (N+2)D tensor of shape (batch_size, layer_height, layer_width, layer_channels) containing lower bounds on the pre-activations. ub_in: (N+2)D tensor of shape (batch_size, layer_height, layer_width, layer_channels) containing upper bounds on the pre-activations. nominal_in: (N+2)D tensor of shape (batch_size, input_height, input_width, layer_channels) containing nominal input values. parameters: Optional parameter dict if activation is parameterised, e.g. `{'alpha': 0.2}` for leaky ReLu. Returns: lb_out: 2D tensor of shape (batch_size, layer_size) or 4D tensor of shape (batch_size, layer_height, layer_width, layer_channels) with lower bounds on the activations. ub_out: 2D tensor of shape (batch_size, layer_size) or 4D tensor of shape (batch_size, layer_height, layer_width, layer_channels) with upper bounds on the activations. """ if nl_fun.__name__ == 'relu': return ( tf.maximum(tf.minimum(nominal_in, 0.) + lb_in, tf.minimum(-nominal_in, 0.)), # pylint:disable=invalid-unary-operand-type tf.maximum(tf.minimum(nominal_in, 0.) + ub_in, tf.minimum(-nominal_in, 0.))) # pylint:disable=invalid-unary-operand-type elif nl_fun.__name__ == 'leaky_relu': alpha = parameters['alpha'] return ( tf.maximum( lb_in + tf.minimum(nominal_in, 0.) * (1. - alpha), alpha * lb_in + tf.minimum(-nominal_in, 0.) * (1. - alpha)), # pylint:disable=invalid-unary-operand-type tf.maximum( ub_in + tf.minimum(nominal_in, 0.) * (1. - alpha), alpha * ub_in + tf.minimum(-nominal_in, 0.) * (1. - alpha))) # pylint:disable=invalid-unary-operand-type else: nominal_out = nl_fun(nominal_in) return (nl_fun(nominal_in + lb_in) - nominal_out, nl_fun(nominal_in + ub_in) - nominal_out) ================================================ FILE: interval_bound_propagation/src/simplex_bounds.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Naive bound calculation for common neural network layers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from interval_bound_propagation.src import bounds as basic_bounds from interval_bound_propagation.src import relative_bounds import sonnet as snt import tensorflow.compat.v1 as tf class SimplexBounds(basic_bounds.AbstractBounds): """Specifies a bounding simplex within an embedding space.""" def __init__(self, vertices, nominal, r): """Initialises the simplex bounds. Args: vertices: Tensor of shape (num_vertices, *input_shape) or of shape (batch_size, num_vertices, *input_shape) containing the vertices in embedding space. nominal: Tensor of shape (batch_size, *input_shape) specifying the unperturbed inputs in embedding space, where `*input_shape` denotes either (embedding_size,) for flat input (e.g. bag-of-words) or (input_length, embedding_channels) for sequence input. r: Scalar specifying the dilation factor of the simplex. The dilated simplex will have vertices `nominal + r * (vertices-nominal)`. """ super(SimplexBounds, self).__init__() self._vertices = vertices self._nominal = nominal self._r = r @property def vertices(self): return self._vertices @property def nominal(self): return self._nominal @property def r(self): return self._r @property def shape(self): return self.nominal.shape.as_list() @classmethod def convert(cls, bounds): if not isinstance(bounds, cls): raise ValueError('Cannot convert "{}" to "{}"'.format(bounds, cls.__name__)) return bounds def apply_batch_reshape(self, wrapper, shape): reshape = snt.BatchReshape(shape) if self.vertices.shape.ndims == self.nominal.shape.ndims: reshape_vertices = reshape else: reshape_vertices = snt.BatchReshape(shape, preserve_dims=2) return SimplexBounds(reshape_vertices(self.vertices), reshape(self.nominal), self.r) def apply_linear(self, wrapper, w, b): mapped_centres = tf.matmul(self.nominal, w) mapped_vertices = tf.tensordot(self.vertices, w, axes=1) lb, ub = _simplex_bounds(mapped_vertices, mapped_centres, self.r, -2) nominal_out = tf.matmul(self.nominal, w) if b is not None: nominal_out += b return relative_bounds.RelativeIntervalBounds(lb, ub, nominal_out) def apply_conv1d(self, wrapper, w, b, padding, stride): mapped_centres = tf.nn.conv1d(self.nominal, w, padding=padding, stride=stride) if self.vertices.shape.ndims == 3: # `self.vertices` has no batch dimension; its shape is # (num_vertices, input_length, embedding_channels). mapped_vertices = tf.nn.conv1d(self.vertices, w, padding=padding, stride=stride) elif self.vertices.shape.ndims == 4: # `self.vertices` has shape # (batch_size, num_vertices, input_length, embedding_channels). # Vertices are different for each example in the batch, # e.g. for word perturbations. mapped_vertices = snt.BatchApply( lambda x: tf.nn.conv1d(x, w, padding=padding, stride=stride))( self.vertices) else: raise ValueError('"vertices" must have either 3 or 4 dimensions.') lb, ub = _simplex_bounds(mapped_vertices, mapped_centres, self.r, -3) nominal_out = tf.nn.conv1d(self.nominal, w, padding=padding, stride=stride) if b is not None: nominal_out += b return relative_bounds.RelativeIntervalBounds(lb, ub, nominal_out) def apply_conv2d(self, wrapper, w, b, padding, strides): mapped_centres = tf.nn.convolution(self.nominal, w, padding=padding, strides=strides) if self.vertices.shape.ndims == 4: # `self.vertices` has no batch dimension; its shape is # (num_vertices, input_height, input_width, input_channels). mapped_vertices = tf.nn.convolution(self.vertices, w, padding=padding, strides=strides) elif self.vertices.shape.ndims == 5: # `self.vertices` has shape # (batch_size, num_vertices, input_height, input_width, input_channels). # Vertices are different for each example in the batch. mapped_vertices = snt.BatchApply( lambda x: tf.nn.convolution(x, w, padding=padding, strides=strides))( self.vertices) else: raise ValueError('"vertices" must have either 4 or 5 dimensions.') lb, ub = _simplex_bounds(mapped_vertices, mapped_centres, self.r, -4) nominal_out = tf.nn.convolution(self.nominal, w, padding=padding, strides=strides) if b is not None: nominal_out += b return relative_bounds.RelativeIntervalBounds(lb, ub, nominal_out) def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): if fn.__name__ in ('add', 'reduce_mean', 'reduce_sum', 'avg_pool'): if self.vertices.shape.ndims == self.nominal.shape.ndims: vertices_fn = fn else: vertices_fn = snt.BatchApply(fn, n_dims=2) return SimplexBounds( vertices_fn(self.vertices, *[bounds.vertices for bounds in args]), fn(self.nominal, *[bounds.nominal for bounds in args]), self.r) elif fn.__name__ == 'quotient': return SimplexBounds( self.vertices / tf.expand_dims(parameters['denom'], axis=1), fn(self.nominal), self.r) else: return super(SimplexBounds, self).apply_increasing_monotonic_fn( wrapper, fn, *args, **parameters) def _simplex_bounds(mapped_vertices, mapped_centres, r, axis): """Calculates naive bounds on the given layer-mapped vertices. Args: mapped_vertices: Tensor of shape (num_vertices, *output_shape) or of shape (batch_size, num_vertices, *output_shape) containing the vertices in the layer's output space. mapped_centres: Tensor of shape (batch_size, *output_shape) containing the layer's nominal outputs. r: Scalar in [0, 1) specifying the radius (in vocab space) of the simplex. axis: Index of the `num_vertices` dimension of `mapped_vertices`. Returns: lb_out: Tensor of shape (batch_size, *output_shape) with lower bounds on the outputs of the affine layer. ub_out: Tensor of shape (batch_size, *output_shape) with upper bounds on the outputs of the affine layer. """ # Use the negative of r, instead of the complement of r, as # we're shifting the input domain to be centred at the origin. lb_out = -r * mapped_centres + r * tf.reduce_min(mapped_vertices, axis=axis) ub_out = -r * mapped_centres + r * tf.reduce_max(mapped_vertices, axis=axis) return lb_out, ub_out ================================================ FILE: interval_bound_propagation/src/specification.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Defines the output specifications.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc from absl import logging from interval_bound_propagation.src import bounds as bounds_lib from interval_bound_propagation.src import verifiable_wrapper import six import sonnet as snt import tensorflow.compat.v1 as tf @six.add_metaclass(abc.ABCMeta) class Specification(snt.AbstractModule): """Defines a specification.""" def __init__(self, name, collapse=True): super(Specification, self).__init__(name=name) self._collapse = collapse @abc.abstractmethod def _build(self, modules): """Computes the worst-case specification value.""" @abc.abstractmethod def evaluate(self, logits): """Computes the specification value. Args: logits: The logits Tensor can have different shapes, i.e., [batch_size, num_classes]: The output should be [batch_size, num_specs]. [num_restarts, batch_size, num_classes]: The output should be [num_restarts, batch_size, num_specs]. Used by UntargetedPGDAttack. [num_restarts, num_specs, batch_size, num_classes]: The output should be [num_restarts, batch_size, num_specs]. For this case, the specifications must be evaluated individually for each column (axis = 1). Used by MultiTargetedPGDAttack. Returns: The specification values evaluated at the network output. """ @abc.abstractproperty def num_specifications(self): """Returns the number of specifications.""" @property def collapse(self): return self._collapse class LinearSpecification(Specification): """Linear specifications: c^T * z_K + d <= 0.""" def __init__(self, c, d=None, prune_irrelevant=True, collapse=True): """Builds a linear specification module.""" super(LinearSpecification, self).__init__(name='specs', collapse=collapse) # c has shape [batch_size, num_specifications, num_outputs] # d has shape [batch_size, num_specifications] # Some specifications may be irrelevant (not a function of the output). # We automatically remove them for clarity. We expect the number of # irrelevant specs to be equal for all elements of a batch. # Shape is [batch_size, num_specifications] if prune_irrelevant: irrelevant = tf.equal(tf.reduce_sum( tf.cast(tf.abs(c) > 1e-6, tf.int32), axis=-1, keepdims=True), 0) batch_size = tf.shape(c)[0] num_outputs = tf.shape(c)[2] irrelevant = tf.tile(irrelevant, [1, 1, num_outputs]) self._c = tf.reshape( tf.boolean_mask(c, tf.logical_not(irrelevant)), [batch_size, -1, num_outputs]) else: self._c = c self._d = d def _build(self, modules): """Outputs specification value.""" # inputs have shape [batch_size, num_outputs]. if not (self.collapse and isinstance(modules[-1], verifiable_wrapper.LinearFCWrapper)): logging.info('Elision of last layer disabled.') bounds = modules[-1].output_bounds w = self._c b = self._d else: logging.info('Elision of last layer active.') # Collapse the last layer. bounds = modules[-1].input_bounds w = modules[-1].module.w b = modules[-1].module.b w = tf.einsum('ijk,lk->ijl', self._c, w) b = tf.einsum('ijk,k->ij', self._c, b) if self._d is not None: b += self._d # Maximize z * w + b s.t. lower <= z <= upper. bounds = bounds_lib.IntervalBounds.convert(bounds) c = (bounds.lower + bounds.upper) / 2. r = (bounds.upper - bounds.lower) / 2. c = tf.einsum('ij,ikj->ik', c, w) if b is not None: c += b r = tf.einsum('ij,ikj->ik', r, tf.abs(w)) # output has shape [batch_size, num_specifications]. return c + r def evaluate(self, logits): if len(logits.shape) == 2: output = tf.einsum('ij,ikj->ik', logits, self._c) elif len(logits.shape) == 3: output = tf.einsum('rij,ikj->rik', logits, self._c) else: assert len(logits.shape) == 4 output = tf.einsum('rsbo,bso->rbs', logits, self._c) if self._d is not None: output += self._d return output @property def num_specifications(self): return tf.shape(self._c)[1] @property def c(self): return self._c @property def d(self): return self._d class ClassificationSpecification(Specification): """Creates a linear specification that corresponds to a classification. This class is not a standard LinearSpecification as it does not materialize the c and d tensors. """ def __init__(self, label, num_classes, collapse=True): super(ClassificationSpecification, self).__init__(name='specs', collapse=collapse) self._label = label self._num_classes = num_classes # Precompute indices. with self._enter_variable_scope(): indices = [] for i in range(self._num_classes): indices.append(list(range(i)) + list(range(i + 1, self._num_classes))) indices = tf.constant(indices, dtype=tf.int32) self._correct_idx, self._wrong_idx = self._build_indices(label, indices) def _build(self, modules): if not (self.collapse and isinstance(modules[-1], verifiable_wrapper.LinearFCWrapper)): logging.info('Elision of last layer disabled.') bounds = modules[-1].output_bounds bounds = bounds_lib.IntervalBounds.convert(bounds) correct_class_logit = tf.gather_nd(bounds.lower, self._correct_idx) wrong_class_logits = tf.gather_nd(bounds.upper, self._wrong_idx) return wrong_class_logits - tf.expand_dims(correct_class_logit, 1) logging.info('Elision of last layer active.') bounds = modules[-1].input_bounds bounds = bounds_lib.IntervalBounds.convert(bounds) batch_size = tf.shape(bounds.lower)[0] w = modules[-1].module.w b = modules[-1].module.b w_t = tf.tile(tf.expand_dims(tf.transpose(w), 0), [batch_size, 1, 1]) b_t = tf.tile(tf.expand_dims(b, 0), [batch_size, 1]) w_correct = tf.expand_dims(tf.gather_nd(w_t, self._correct_idx), -1) b_correct = tf.expand_dims(tf.gather_nd(b_t, self._correct_idx), 1) w_wrong = tf.transpose(tf.gather_nd(w_t, self._wrong_idx), [0, 2, 1]) b_wrong = tf.gather_nd(b_t, self._wrong_idx) w = w_wrong - w_correct b = b_wrong - b_correct # Maximize z * w + b s.t. lower <= z <= upper. c = (bounds.lower + bounds.upper) / 2. r = (bounds.upper - bounds.lower) / 2. c = tf.einsum('ij,ijk->ik', c, w) if b is not None: c += b r = tf.einsum('ij,ijk->ik', r, tf.abs(w)) return c + r def evaluate(self, logits): if len(logits.shape) == 2: correct_class_logit = tf.gather_nd(logits, self._correct_idx) correct_class_logit = tf.expand_dims(correct_class_logit, -1) wrong_class_logits = tf.gather_nd(logits, self._wrong_idx) elif len(logits.shape) == 3: # [num_restarts, batch_size, num_classes] to # [num_restarts, batch_size, num_specs] logits = tf.transpose(logits, [1, 2, 0]) # Put restart dimension last. correct_class_logit = tf.gather_nd(logits, self._correct_idx) correct_class_logit = tf.transpose(correct_class_logit) correct_class_logit = tf.expand_dims(correct_class_logit, -1) wrong_class_logits = tf.gather_nd(logits, self._wrong_idx) wrong_class_logits = tf.transpose(wrong_class_logits, [2, 0, 1]) else: assert len(logits.shape) == 4 # [num_restarts, num_specs, batch_size, num_classes] to # [num_restarts, batch_size, num_specs]. logits = tf.transpose(logits, [2, 3, 1, 0]) correct_class_logit = tf.gather_nd(logits, self._correct_idx) correct_class_logit = tf.transpose(correct_class_logit, [2, 0, 1]) batch_size = tf.shape(logits)[0] wrong_idx = tf.concat([ self._wrong_idx, tf.tile(tf.reshape(tf.range(self.num_specifications, dtype=tf.int32), [1, self.num_specifications, 1]), [batch_size, 1, 1])], axis=-1) wrong_class_logits = tf.gather_nd(logits, wrong_idx) wrong_class_logits = tf.transpose(wrong_class_logits, [2, 0, 1]) return wrong_class_logits - correct_class_logit @property def num_specifications(self): return self._num_classes - 1 @property def correct_idx(self): return self._correct_idx @property def wrong_idx(self): return self._wrong_idx def _build_indices(self, label, indices): batch_size = tf.shape(label)[0] i = tf.range(batch_size, dtype=tf.int32) correct_idx = tf.stack([i, tf.cast(label, tf.int32)], axis=1) wrong_idx = tf.stack([ tf.tile(tf.reshape(i, [batch_size, 1]), [1, self._num_classes - 1]), tf.gather(indices, label), ], axis=2) return correct_idx, wrong_idx class TargetedClassificationSpecification(ClassificationSpecification): """Defines a specification that compares the true class with another.""" def __init__(self, label, num_classes, target_class, collapse=True): super(TargetedClassificationSpecification, self).__init__( label, num_classes, collapse=collapse) batch_size = tf.shape(label)[0] if len(target_class.shape) == 1: target_class = tf.reshape(target_class, [batch_size, 1]) self._num_specifications = target_class.shape[1].value if self._num_specifications is None: raise ValueError('Cannot retrieve the number of target classes') self._target_class = target_class i = tf.range(batch_size, dtype=tf.int32) self._wrong_idx = tf.stack([ tf.tile(tf.reshape(i, [batch_size, 1]), [1, self.num_specifications]), target_class ], axis=2) @property def target_class(self): """Returns the target class index.""" return self._target_class @property def num_specifications(self): return self._num_specifications class RandomClassificationSpecification(TargetedClassificationSpecification): """Creates a single random specification that targets a random class.""" def __init__(self, label, num_classes, num_targets=1, seed=None, collapse=True): # Overwrite the target indices. Each session.run() call gets new target # indices, the indices should remain the same across restarts. batch_size = tf.shape(label)[0] j = tf.random.uniform(shape=(batch_size, num_targets), minval=1, maxval=num_classes, dtype=tf.int32, seed=seed) target_class = tf.mod(tf.cast(tf.expand_dims(label, -1), tf.int32) + j, num_classes) super(RandomClassificationSpecification, self).__init__( label, num_classes, target_class, collapse=collapse) class LeastLikelyClassificationSpecification( TargetedClassificationSpecification): """Creates a single specification that targets the least likely class.""" def __init__(self, label, num_classes, logits, num_targets=1, collapse=True): # Do not target the true class. If the true class is the least likely to # be predicted, it is fine to target any other class as the attack will # be successful anyways. j = tf.nn.top_k(-logits, k=num_targets, sorted=False).indices l = tf.expand_dims(label, 1) target_class = tf.mod( j + tf.cast(tf.equal(j, tf.cast(l, tf.int32)), tf.int32), num_classes) super(LeastLikelyClassificationSpecification, self).__init__( label, num_classes, target_class, collapse=collapse) ================================================ FILE: interval_bound_propagation/src/utils.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Helpers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import re from absl import logging from interval_bound_propagation.src import attacks from interval_bound_propagation.src import bounds from interval_bound_propagation.src import layers from interval_bound_propagation.src import loss from interval_bound_propagation.src import specification import numpy as np import tensorflow.compat.v1 as tf # Defines a dataset sample.""" Sample = collections.namedtuple('Sample', ['image', 'label']) def build_dataset(raw_data, batch_size=50, sequential=True): """Builds a dataset from raw NumPy tensors.""" images, labels = raw_data # We need width, height and channel. if len(images.shape) == 3: images = np.expand_dims(images, -1) samples = Sample(images.astype(np.float32) / 255., labels.astype(np.int64)) data = tf.data.Dataset.from_tensor_slices(samples) if not sequential: data = data.shuffle(1000) return data.repeat().batch(batch_size).make_one_shot_iterator().get_next() def randomize(images, init_shape, expand_shape=None, crop_shape=None, vertical_flip=False): """Returns a function that randomly translates and flips images.""" def random_image(image): """Randmly translates and flips images.""" image = tf.reshape(image, init_shape) current_shape = init_shape if expand_shape is not None and expand_shape != current_shape: if expand_shape[-1] != current_shape[-1]: raise ValueError('Number channels is not specified correctly.') image = tf.image.resize_image_with_crop_or_pad( image, expand_shape[0], expand_shape[1]) current_shape = expand_shape if crop_shape is not None and crop_shape != current_shape: image = tf.random_crop(image, crop_shape) if vertical_flip: image = tf.image.random_flip_left_right(image) return image return tf.map_fn(random_image, images) def linear_schedule(step, init_step, final_step, init_value, final_value): """Linear schedule.""" assert final_step >= init_step if init_step == final_step: return final_value rate = tf.cast(step - init_step, tf.float32) / float(final_step - init_step) linear_value = rate * (final_value - init_value) + init_value return tf.clip_by_value(linear_value, min(init_value, final_value), max(init_value, final_value)) def smooth_schedule(step, init_step, final_step, init_value, final_value, mid_point=.25, beta=4.): """Smooth schedule that slowly morphs into a linear schedule.""" assert final_value > init_value assert final_step >= init_step assert beta >= 2. assert mid_point >= 0. and mid_point <= 1. mid_step = int((final_step - init_step) * mid_point) + init_step if mid_step <= init_step: alpha = 1. else: t = (mid_step - init_step) ** (beta - 1.) alpha = (final_value - init_value) / ((final_step - mid_step) * beta * t + (mid_step - init_step) * t) mid_value = alpha * (mid_step - init_step) ** beta + init_value # Tensorflow operation. is_ramp = tf.cast(step > init_step, tf.float32) is_linear = tf.cast(step >= mid_step, tf.float32) return (is_ramp * ( (1. - is_linear) * ( init_value + alpha * tf.pow(tf.cast(step - init_step, tf.float32), beta)) + is_linear * linear_schedule( step, mid_step, final_step, mid_value, final_value)) + (1. - is_ramp) * init_value) def build_loss_schedule(step, warmup_steps, rampup_steps, init, final, warmup=None): """Linear schedule builder. Args: step: Current step number. warmup_steps: When step < warmup_steps, set value to warmup. rampup_steps: Ramp up schedule value from init to final in rampup_step. init: Initial schedule value after warmup_steps. final: Final schedule value after warmup_steps + rampup_steps. warmup: Schedule value before warmup_steps. When set to None, the warmup period value is set to init. Returns: A schedule tensor. """ if warmup is None and init == final: return init if rampup_steps < 0: if warmup is not None: return tf.cond(step < warmup_steps, lambda: tf.constant(warmup), lambda: tf.constant(final)) return final schedule = linear_schedule( step, warmup_steps, warmup_steps + rampup_steps, init, final) if warmup is not None: # Set the value to warmup during warmup process. return tf.cond(step < warmup_steps, lambda: tf.constant(warmup), lambda: schedule) return schedule def add_image_normalization(model, mean, std): def _model(x, *args, **kwargs): return model(layers.ImageNorm(mean, std)(x), *args, **kwargs) return _model def create_specification(label, num_classes, logits, specification_type='one_vs_all', collapse=True): """Creates a specification of the desired type.""" def _num_targets(name): tokens = name.rsplit('_', 1) return int(tokens[1]) if len(tokens) > 1 else 1 if specification_type == 'one_vs_all': return specification.ClassificationSpecification(label, num_classes, collapse=collapse) elif specification_type.startswith('random'): return specification.RandomClassificationSpecification( label, num_classes, _num_targets(specification_type), collapse=collapse) elif specification_type.startswith('least_likely'): return specification.LeastLikelyClassificationSpecification( label, num_classes, logits, _num_targets(specification_type), collapse=collapse) else: raise ValueError('Unknown specification type: "{}"'.format( specification_type)) def create_classification_losses( global_step, inputs, label, predictor_network, epsilon, loss_weights, warmup_steps=0, rampup_steps=-1, input_bounds=(0., 1.), loss_builder=loss.Losses, options=None): """Create the training loss.""" # Whether to elide the last linear layer with the specification. elide = True # Which loss to use for the IBP loss. loss_type = 'xent' # If the loss_type is 'hinge', which margin to use. loss_margin = 10. # Amount of label smoothing. label_smoothing = 0. # If True, batch normalization stops training after warm-up. is_training_off_after = -1 # If True, epsilon changes more smoothly. smooth_epsilon_schedule = False # Either 'one_vs_all', 'random_n', 'least_likely_n' or 'none'. verified_specification = 'one_vs_all' # Attack options. attack_specification = 'UntargetedPGDAttack_7x1x1_UnrolledAdam_.1' attack_scheduled = False attack_random_init = 1. # Model arguments. nominal_args = dict(is_training=True, test_local_stats=False, reuse=False) attack_args = { 'intermediate': dict(is_training=False, test_local_stats=False, reuse=True), 'final': dict(is_training=False, test_local_stats=False, reuse=True), } if options is not None: elide = options.get('elide_last_layer', elide) loss_type = options.get('verified_loss_type', loss_type) loss_margin = options.get('verified_loss_margin', loss_type) label_smoothing = options.get('label_smoothing', label_smoothing) is_training_off_after = options.get( 'is_training_off_after', is_training_off_after) smooth_epsilon_schedule = options.get( 'smooth_epsilon_schedule', smooth_epsilon_schedule) verified_specification = options.get( 'verified_specification', verified_specification) attack_specification = options.get( 'attack_specification', attack_specification) attack_scheduled = options.get('attack_scheduled', attack_scheduled) attack_random_init = options.get('attack_random_init', attack_random_init) nominal_args = dict(options.get('nominal_args', nominal_args)) attack_args = dict(options.get('attack_args', attack_args)) def _get_schedule(init, final, warmup=None): return build_loss_schedule(global_step, warmup_steps, rampup_steps, init, final, warmup) def _is_loss_active(init, final, warmup=None): return init > 0. or final > 0. or (warmup is not None and warmup > 0.) nominal_xent = _get_schedule(**loss_weights.get('nominal')) attack_xent = _get_schedule(**loss_weights.get('attack')) use_attack = _is_loss_active(**loss_weights.get('attack')) verified_loss = _get_schedule(**loss_weights.get('verified')) use_verification = _is_loss_active(**loss_weights.get('verified')) if verified_specification == 'none': use_verification = False weight_mixture = loss.ScalarLosses( nominal_cross_entropy=nominal_xent, attack_cross_entropy=attack_xent, verified_loss=verified_loss) # Ramp-up. if rampup_steps < 0: train_epsilon = tf.constant(epsilon) else: if smooth_epsilon_schedule: train_epsilon = smooth_schedule( global_step, warmup_steps, warmup_steps + rampup_steps, 0., epsilon) else: train_epsilon = linear_schedule( global_step, warmup_steps, warmup_steps + rampup_steps, 0., epsilon) # Set is_training according to options. if is_training_off_after >= 0: is_training = global_step < is_training_off_after else: is_training = True # If the build arguments want training off, we set is_training to False. # Otherwise, we respect the is_training_off_after option. def _update_is_training(kwargs): if 'is_training' in kwargs: kwargs['is_training'] &= is_training _update_is_training(nominal_args) _update_is_training(attack_args['intermediate']) _update_is_training(attack_args['final']) logits = predictor_network(inputs, override=True, **nominal_args) num_classes = predictor_network.output_size if use_verification: logging.info('Verification active.') input_interval_bounds = bounds.IntervalBounds( tf.maximum(inputs - train_epsilon, input_bounds[0]), tf.minimum(inputs + train_epsilon, input_bounds[1])) predictor_network.propagate_bounds(input_interval_bounds) spec = create_specification(label, num_classes, logits, verified_specification, collapse=elide) else: logging.info('Verification disabled.') spec = None if use_attack: logging.info('Attack active.') pgd_attack = create_attack( attack_specification, predictor_network, label, train_epsilon if attack_scheduled else epsilon, input_bounds=input_bounds, random_init=attack_random_init, predictor_kwargs=attack_args) else: logging.info('Attack disabled.') pgd_attack = None losses = loss_builder(predictor_network, spec, pgd_attack, interval_bounds_loss_type=loss_type, interval_bounds_hinge_margin=loss_margin, label_smoothing=label_smoothing) losses(label) train_loss = sum(l * w for l, w in zip(losses.scalar_losses, weight_mixture)) # Add a regularization loss. regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) train_loss = train_loss + tf.reduce_sum(regularizers) return losses, train_loss, train_epsilon # Additional helper code to build specific PGD attacks. def get_attack_builder(logits, label, name='UntargetedPGDAttack', random_seed=None, manual_target_class=None): """Returns a callable with the same arguments as PGDAttack. In addition to the callable, this function also returns the targeted class indices as a Tensor with the same shape as label. Usage is as follows: logits = model(inputs) attack_cls, specification, target_class = get_attack_builder(logits, labels) # target_class is None, if attack_cls is not a targeted attack. attack_instance = attack_cls(model, specification, epsilon) perturbed_inputs = attack_instance(inputs, labels) Args: logits: Tensor of nominal logits of shape [batch_size, num_classes]. label: Tensor of labels of shape [batch_size]. name: Name of a PGDAttack class or any of "RandomMoreLikelyPGDAttack", "RandomMostLikelyPGDAttack", "LeastLikelyMoreLikelyPGDAttack", "LeastLikelyMostLikelyPGDAttack", "ManualMoreLikelyPGDAttack", "ManualMostLikelyPGDAttack". Any attack name can be postfixed by "Xent" to use the cross-entropy loss rather than margin loss. random_seed: Sets the random seed for "Random*" attacks. manual_target_class: For "Manual*" attacks, Tensor of target class indices of shape [batch_size]. Returns: A callable, a Specification and a Tensor of target label (or None if the attack is not targeted). """ if name.endswith('Xent'): use_xent = True name = name[:-4] else: use_xent = False if name.endswith('Linf'): use_l2 = False name = name[:-4] # Just for syntactic sugar. elif name.endswith('L2'): use_l2 = True name = name[:-2] else: use_l2 = False num_classes = logits.shape[1].value if num_classes is None: raise ValueError('Cannot determine the number of classes from logits.') # Special case for multi-targeted attacks. m = re.match(r'((?:MemoryEfficient)?MultiTargetedPGDAttack)' r'(?:(Top|Random)(\d)*)?', name) if m is not None: # Request for a multi-targeted attack. is_multitargeted = True name = m.group(1) is_random = (m.group(2) == 'Random') max_specs = m.group(3) max_specs = int(max_specs) if max_specs is not None else 0 else: is_multitargeted = False # Any of the readily available attack classes use the standard classification # specification (one-vs-all) and are untargeted. if hasattr(attacks, name): attack_cls = getattr(attacks, name) parameters = {} if use_xent: parameters['objective_fn'] = _maximize_cross_entropy if use_l2: parameters['project_perturbation'] = _get_projection(2) if is_multitargeted: parameters['max_specifications'] = max_specs parameters['random_specifications'] = is_random if parameters: attack_cls = _change_parameters(attack_cls, **parameters) attack_specification = specification.ClassificationSpecification( label, num_classes) return attack_cls, attack_specification, None # Attacks can use an adaptive scheme. if name.endswith('AdaptivePGDAttack'): name = name[:-len('AdaptivePGDAttack')] + 'PGDAttack' is_adaptive = True else: is_adaptive = False # Attacks can be preceded by a number to indicate the number of target # classes. For efficiency, this is only available for *MoreLikely attacks. m = re.match(r'(\d*)(.*MoreLikelyPGDAttack)', name) if m is not None: num_targets = int(m.group(1)) name = m.group(2) else: num_targets = 1 # All attacks that are not directly listed in the attacks library are # targeted attacks that need to be manually constructed. if name not in ('RandomMoreLikelyPGDAttack', 'RandomMostLikelyPGDAttack', 'LeastLikelyMoreLikelyPGDAttack', 'LeastLikelyMostLikelyPGDAttack', 'ManualMoreLikelyPGDAttack', 'ManualMostLikelyPGDAttack'): raise ValueError('Unknown attack "{}".'.format(name)) base_attack_cls = (attacks.AdaptiveUntargetedPGDAttack if is_adaptive else attacks.UntargetedPGDAttack) if 'More' in name: if use_xent: raise ValueError('Using cross-entropy is not supported by ' '"*MoreLikelyPGDAttack".') attack_cls = base_attack_cls else: # We need to reverse the attack direction w.r.t. the specifications. attack_cls = _change_parameters( base_attack_cls, objective_fn=(_minimize_cross_entropy if use_xent else _minimize_margin), success_fn=_all_smaller) if use_l2: attack_cls = _change_parameters( attack_cls, project_perturbation=_get_projection(2)) # Set attack specification and target class. if name == 'RandomMoreLikelyPGDAttack': # A random target class should become more likely than the true class. attack_specification = specification.RandomClassificationSpecification( label, num_classes, num_targets=num_targets, seed=random_seed) target_class = (tf.squeeze(attack_specification.target_class, 1) if num_targets == 1 else None) elif name == 'LeastLikelyMoreLikelyPGDAttack': attack_specification = specification.LeastLikelyClassificationSpecification( label, num_classes, logits, num_targets=num_targets) target_class = (tf.squeeze(attack_specification.target_class, 1) if num_targets == 1 else None) elif name == 'ManualMoreLikelyPGDAttack': attack_specification = specification.TargetedClassificationSpecification( label, num_classes, manual_target_class) target_class = (tf.squeeze(attack_specification.target_class, 1) if num_targets == 1 else None) elif name == 'RandomMostLikelyPGDAttack': # This attack needs to make the random target the highest logits for # it is be successful. target_class = _get_random_class(label, num_classes, seed=random_seed) attack_specification = specification.ClassificationSpecification( target_class, num_classes) elif name == 'LeastLikelyMostLikelyPGDAttack': # This attack needs to make the least likely target the highest logits # for it is be successful. target_class = _get_least_likely_class(label, num_classes, logits) attack_specification = specification.ClassificationSpecification( target_class, num_classes) else: assert name == 'ManualMostLikelyPGDAttack' target_class = manual_target_class attack_specification = specification.ClassificationSpecification( target_class, num_classes) return attack_cls, attack_specification, target_class def create_attack(attack_config, predictor, label, epsilon, input_bounds=(0., 1.), random_init=1., random_seed=None, predictor_kwargs=None, logits=None): """Creates an attack from a textual configuration. Args: attack_config: String with format "[AttackClass]_[steps]x [inner_restarts]x[outer_restarts]_[OptimizerClass]_[step_size]". Inner restarts involve tiling the input (they are more runtime efficient but use more memory), while outer restarts use a tf.while_loop. predictor: A VerifiableModelWrapper or StandardModelWrapper instance. label: A Tensor of labels. epsilon: Perturbation radius. input_bounds: Tuple with minimum and maximum value allowed on inputs. random_init: Probability of starting from random location rather than nominal input image. random_seed: Sets the random seed for "Random*" attacks. predictor_kwargs: Dict of arguments passed to the predictor network. logits: Logits corresponding to the nominal inputs. If None, it assumes that predictor has a property named `logits`. Returns: An Attack instance. """ if attack_config: name, steps_and_restarts, optimizer, step_size = re.split( r'_\s*(?![^()]*\))', attack_config, maxsplit=3) # Optimizers can specify contructor arguments using # (arg1=value1;arg2=value2) syntax. m = re.match(r'([^\(]*)\(([^\)]*)\)', optimizer) if m is not None: optimizer = m.group(1) kwargs = 'dict(' + m.group(2).replace(';', ',') + ')' kwargs = eval(kwargs) # pylint: disable=eval-used else: kwargs = {} optimizer = getattr(attacks, optimizer) # Wrap optimizer if needed. if kwargs: optimizer = attacks.wrap_optimizer(optimizer, **kwargs) num_steps, inner_restarts, outer_restarts = ( int(i) for i in steps_and_restarts.split('x', 3)) step_size = step_size.replace(':', ',') else: name = 'UntargetedPGDAttack' num_steps = 200 inner_restarts = 1 outer_restarts = 1 optimizer = attacks.UnrolledAdam step_size = .1 def attack_learning_rate_fn(t): return parse_learning_rate(t, step_size) if logits is None: logits = predictor.logits attack_cls, attack_specification, target_class = get_attack_builder( logits, label, name=name, random_seed=random_seed) attack_strategy = attack_cls( predictor, attack_specification, epsilon, num_steps=num_steps, num_restarts=inner_restarts, input_bounds=input_bounds, optimizer_builder=optimizer, lr_fn=attack_learning_rate_fn, random_init=random_init, predictor_kwargs=predictor_kwargs) attack_strategy.target_class = target_class if outer_restarts > 1: attack_strategy = attacks.RestartedAttack( attack_strategy, num_restarts=outer_restarts) return attack_strategy def parse_learning_rate(step, learning_rate): """Returns the learning rate as a tensor.""" if isinstance(learning_rate, float): return learning_rate # Learning rate schedule of the form: # initial_learning_rate[,learning@steps]*. E.g., "1e-3" or # "1e-3,1e-4@15000,1e-5@25000". We use eval to allow learning specified as # fractions (e.g., 2/255). tokens = learning_rate.split(',') first_lr = float(eval(tokens[0])) # pylint: disable=eval-used if len(tokens) == 1: return tf.constant(first_lr, dtype=tf.float32) # Parse steps. init_values = [first_lr] final_values = [] init_step = [0] final_step = [] for t in tokens[1:]: if '@' in t: lr, boundary = t.split('@', 1) is_linear = False elif 'S' in t: # Syntactic sugar to indicate a step. lr, boundary = t.split('S', 1) is_linear = False elif 'L' in t: lr, boundary = t.split('L', 1) is_linear = True else: raise ValueError('Unknown specification.') lr = float(eval(lr)) # pylint: disable=eval-used init_values.append(lr) if is_linear: final_values.append(lr) else: final_values.append(init_values[-2]) boundary = int(boundary) init_step.append(boundary) final_step.append(boundary) large_step = max(final_step) + 1 final_step.append(large_step) final_values.append(lr) # Find current index. boundaries = list(final_step) + [large_step + 2] boundaries = tf.convert_to_tensor(boundaries, dtype=tf.int64) b = boundaries - tf.minimum(step + 1, large_step + 1) large_step = tf.constant( large_step, shape=boundaries.shape, dtype=step.dtype) b = tf.where(b < 0, large_step, b) idx = tf.minimum(tf.argmin(b), len(init_values) - 1) init_step = tf.convert_to_tensor(init_step, dtype=tf.float32) final_step = tf.convert_to_tensor(final_step, dtype=tf.float32) init_values = tf.convert_to_tensor(init_values, dtype=tf.float32) final_values = tf.convert_to_tensor(final_values, dtype=tf.float32) x1 = tf.gather(init_step, idx) x2 = tf.gather(final_step, idx) y1 = tf.gather(init_values, idx) y2 = tf.gather(final_values, idx) return (tf.cast(step, tf.float32) - x1) / (x2 - x1) * (y2 - y1) + y1 def _change_parameters(attack_cls, **updated_kwargs): def _build_new_attack(*args, **kwargs): kwargs.update(updated_kwargs) return attack_cls(*args, **kwargs) return _build_new_attack def _get_random_class(label, num_classes, seed=None): batch_size = tf.shape(label)[0] target_label = tf.random.uniform( shape=(batch_size,), minval=1, maxval=num_classes, dtype=tf.int64, seed=seed) return tf.mod(tf.cast(label, tf.int64) + target_label, num_classes) def _get_least_likely_class(label, num_classes, logits): target_label = tf.argmin(logits, axis=1, output_type=tf.int64) # In the off-chance that the least likely class is the true class, the target # class is changed to the be the next index. return tf.mod(target_label + tf.cast( tf.equal(target_label, tf.cast(label, tf.int64)), tf.int64), num_classes) def _maximize_cross_entropy(specification_bounds): """Used to maximize the cross entropy loss.""" # Bounds has shape [num_restarts, batch_size, num_specs]. shape = tf.shape(specification_bounds) added_shape = [shape[0], shape[1], 1] v = tf.concat([ specification_bounds, tf.zeros(added_shape, dtype=specification_bounds.dtype)], axis=2) l = tf.concat([ tf.zeros_like(specification_bounds), tf.ones(added_shape, dtype=specification_bounds.dtype)], axis=2) # Minimize the cross-entropy loss w.r.t. target. return tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.stop_gradient(l), logits=v) def _minimize_cross_entropy(specification_bounds): return -_maximize_cross_entropy(specification_bounds) def _maximize_margin(specification_bounds): # Bounds has shape [num_restarts, batch_size, num_specs]. return tf.reduce_max(specification_bounds, axis=-1) def _minimize_margin(specification_bounds): return -_maximize_margin(specification_bounds) def _all_smaller(specification_bounds): specification_bounds = tf.reduce_max(specification_bounds, axis=-1) return specification_bounds < 0 def _get_projection(p): """Returns a projection function.""" if p == np.inf: def _projection(perturbation, epsilon, input_image, image_bounds): clipped_perturbation = tf.clip_by_value(perturbation, -epsilon, epsilon) new_image = tf.clip_by_value(input_image + clipped_perturbation, image_bounds[0], image_bounds[1]) return new_image - input_image return _projection elif p == 2: def _projection(perturbation, epsilon, input_image, image_bounds): axes = list(range(1, len(perturbation.get_shape()))) clipped_perturbation = tf.clip_by_norm(perturbation, epsilon, axes=axes) new_image = tf.clip_by_value(input_image + clipped_perturbation, image_bounds[0], image_bounds[1]) return new_image - input_image return _projection else: raise ValueError('p must be np.inf or 2.') ================================================ FILE: interval_bound_propagation/src/verifiable_wrapper.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Wrapper around modules that provides additional facilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import abc import types from absl import logging from interval_bound_propagation.src import layers import six import sonnet as snt import tensorflow.compat.v1 as tf @six.add_metaclass(abc.ABCMeta) class VerifiableWrapper(object): """Abstract wrapper class.""" def __init__(self, module): self._module = module self._input_bounds = None self._output_bounds = None @property def input_bounds(self): assert self._input_bounds is not None return self._input_bounds @property def output_bounds(self): return self._output_bounds @property def module(self): return self._module def __str__(self): if isinstance(self._module, tf.Tensor): return str(self._module) if isinstance(self._module, types.LambdaType): return self._module.__name__ if isinstance(self._module, snt.AbstractModule): return self._module.module_name if hasattr(self._module, '__class__'): return self._module.__class__.__name__ return str(self._module) def propagate_bounds(self, *input_bounds): """Propagates bounds and saves input and output bounds.""" output_bounds = self._propagate_through(self.module, *input_bounds) if len(input_bounds) == 1: self._input_bounds = input_bounds[0] else: self._input_bounds = tuple(input_bounds) self._output_bounds = output_bounds return output_bounds @abc.abstractmethod def _propagate_through(self, module, *input_bounds): """Propagates bounds through a verifiable wrapper. Args: module: This wrapped module, through which bounds are to be propagated. *input_bounds: Bounds on the node's input(s). Returns: New bounds on the node's output. """ class ModelInputWrapper(object): """Virtual node representing the network's inputs.""" def __init__(self, index): super(ModelInputWrapper, self).__init__() self._index = index self._output_bounds = None @property def index(self): return self._index @property def output_bounds(self): return self._output_bounds @output_bounds.setter def output_bounds(self, bounds): self._output_bounds = bounds def __str__(self): return 'Model input {}'.format(self.index) class ConstWrapper(VerifiableWrapper): """Wraps a constant tensor.""" def _propagate_through(self, module): # Make sure that the constant value can be converted to a tensor. return tf.convert_to_tensor(module) class LinearFCWrapper(VerifiableWrapper): """Wraps fully-connected layers.""" def __init__(self, module): if not isinstance(module, snt.Linear): raise ValueError('Cannot wrap {} with a LinearFCWrapper.'.format(module)) super(LinearFCWrapper, self).__init__(module) def _propagate_through(self, module, input_bounds): w = module.w b = module.b if module.has_bias else None return input_bounds.apply_linear(self, w, b) class LinearConvWrapper(VerifiableWrapper): """Wraps convolutional layers.""" class LinearConv1dWrapper(LinearConvWrapper): """Wraps 1-D convolutional layers.""" def __init__(self, module): if not isinstance(module, snt.Conv1D): raise ValueError('Cannot wrap {} with a LinearConv1dWrapper.'.format( module)) super(LinearConv1dWrapper, self).__init__(module) def _propagate_through(self, module, input_bounds): w = module.w b = module.b if module.has_bias else None padding = module.padding stride = module.stride[1] return input_bounds.apply_conv1d(self, w, b, padding, stride) class LinearConv2dWrapper(LinearConvWrapper): """Wraps 2-D convolutional layers.""" def __init__(self, module): if not isinstance(module, snt.Conv2D): raise ValueError('Cannot wrap {} with a LinearConv2dWrapper.'.format( module)) super(LinearConv2dWrapper, self).__init__(module) def _propagate_through(self, module, input_bounds): w = module.w b = module.b if module.has_bias else None padding = module.padding strides = module.stride[1:-1] return input_bounds.apply_conv2d(self, w, b, padding, strides) class IncreasingMonotonicWrapper(VerifiableWrapper): """Wraps monotonically increasing functions of the inputs.""" def __init__(self, module, **parameters): super(IncreasingMonotonicWrapper, self).__init__(module) self._parameters = parameters @property def parameters(self): return self._parameters def _propagate_through(self, module, main_bounds, *other_input_bounds): return main_bounds.apply_increasing_monotonic_fn(self, module, *other_input_bounds, **self.parameters) class SoftmaxWrapper(VerifiableWrapper): """Wraps softmax layers.""" def __init__(self): super(SoftmaxWrapper, self).__init__(None) def _propagate_through(self, module, input_bounds): return input_bounds.apply_softmax(self) class PiecewiseMonotonicWrapper(VerifiableWrapper): """Wraps a piecewise (not necessarily increasing) monotonic function.""" def __init__(self, module, boundaries=()): super(PiecewiseMonotonicWrapper, self).__init__(module) self._boundaries = boundaries @property def boundaries(self): return self._boundaries def _propagate_through(self, module, main_bounds, *other_input_bounds): return main_bounds.apply_piecewise_monotonic_fn(self, module, self.boundaries, *other_input_bounds) class ImageNormWrapper(IncreasingMonotonicWrapper): """Convenience wrapper for getting track of the ImageNorm layer.""" def __init__(self, module): if not isinstance(module, layers.ImageNorm): raise ValueError('Cannot wrap {} with a ImageNormWrapper.'.format(module)) super(ImageNormWrapper, self).__init__(module.apply) self._inner_module = module @property def inner_module(self): return self._inner_module class BatchNormWrapper(VerifiableWrapper): """Wraps batch normalization.""" def __init__(self, module): if not isinstance(module, snt.BatchNorm): raise ValueError('Cannot wrap {} with a BatchNormWrapper.'.format( module)) super(BatchNormWrapper, self).__init__(module) def _propagate_through(self, module, input_bounds): if isinstance(module, layers.BatchNorm): # This IBP-specific batch-norm implementation exposes stats recorded # the most recent time the BatchNorm module was connected. # These will be either the batch stats (e.g. if training) or the moving # averages, depending on how the module was called. mean = module.mean variance = module.variance epsilon = module.epsilon scale = module.scale bias = module.bias else: # This plain Sonnet batch-norm implementation only exposes the # moving averages. logging.warn('Sonnet BatchNorm module encountered: %s. ' 'IBP will always use its moving averages, not the local ' 'batch stats, even in training mode.', str(module)) mean = module.moving_mean variance = module.moving_variance epsilon = module._eps # pylint: disable=protected-access try: bias = module.beta except snt.Error: bias = None try: scale = module.gamma except snt.Error: scale = None return input_bounds.apply_batch_norm(self, mean, variance, scale, bias, epsilon) class BatchReshapeWrapper(VerifiableWrapper): """Wraps batch reshape.""" def __init__(self, module, shape): if not isinstance(module, snt.BatchReshape): raise ValueError('Cannot wrap {} with a BatchReshapeWrapper.'.format( module)) super(BatchReshapeWrapper, self).__init__(module) self._shape = shape @property def shape(self): return self._shape def _propagate_through(self, module, input_bounds): return input_bounds.apply_batch_reshape(self, self.shape) class BatchFlattenWrapper(BatchReshapeWrapper): """Wraps batch flatten.""" def __init__(self, module): if not isinstance(module, snt.BatchFlatten): raise ValueError('Cannot wrap {} with a BatchFlattenWrapper.'.format( module)) super(BatchFlattenWrapper, self).__init__(module, [-1]) ================================================ FILE: interval_bound_propagation/tests/attacks_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for attacks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import parameterized import interval_bound_propagation as ibp import sonnet as snt import tensorflow.compat.v1 as tf class MockWithIsTraining(object): """Mock wrapper around the predictor network.""" def __init__(self, module, test): self._module = module self._test = test def __call__(self, z0, is_training=False): # is_training should be False. self._test.assertFalse(is_training) return self._module(z0) class MockWithoutIsTraining(object): """Mock wrapper around the predictor network.""" def __init__(self, module, test): self._module = module self._test = test def __call__(self, z0): return self._module(z0) class AttacksTest(parameterized.TestCase, tf.test.TestCase): @parameterized.named_parameters( ('UntargetedWithGradientDescent', MockWithIsTraining, ibp.UntargetedPGDAttack, ibp.UnrolledGradientDescent, 1.), ('UntargetedWithAdam', MockWithIsTraining, ibp.UntargetedPGDAttack, ibp.UnrolledAdam, 1.), ('MultiTargetedWithGradientDescent', MockWithIsTraining, ibp.MultiTargetedPGDAttack, ibp.UnrolledGradientDescent, 1.), ('MultiTargetedWithAdam', MockWithIsTraining, ibp.MultiTargetedPGDAttack, ibp.UnrolledAdam, 1.), ('DiverseEpsilon', MockWithIsTraining, ibp.MultiTargetedPGDAttack, ibp.UnrolledAdam, [1., 1.]), ('WithoutIsTraining', MockWithoutIsTraining, ibp.UntargetedPGDAttack, ibp.UnrolledGradientDescent, 1.), ('Restarted', MockWithIsTraining, ibp.UntargetedPGDAttack, ibp.UnrolledGradientDescent, 1., True), ('SPSA', MockWithIsTraining, ibp.UntargetedPGDAttack, ibp.UnrolledSPSAAdam, 1.)) def testEndToEnd(self, predictor_cls, attack_cls, optimizer_cls, epsilon, restarted=False): # l-\infty norm of perturbation ball. if isinstance(epsilon, list): # We test the ability to have different epsilons across dimensions. epsilon = tf.constant([epsilon], dtype=tf.float32) bounds = (-.5, 2.5) # Create a simple network. m = snt.Linear(1, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(1.), }) z = tf.constant([[1, 2]], dtype=tf.float32) predictor = predictor_cls(m, self) # Not important for the test but needed. labels = tf.constant([1], dtype=tf.int64) # We create two attacks to maximize and then minimize the output. max_spec = ibp.LinearSpecification(tf.constant([[[1.]]])) max_attack = attack_cls(predictor, max_spec, epsilon, input_bounds=bounds, optimizer_builder=optimizer_cls) if restarted: max_attack = ibp.RestartedAttack(max_attack, num_restarts=10) z_max = max_attack(z, labels) min_spec = ibp.LinearSpecification(tf.constant([[[-1.]]])) min_attack = attack_cls(predictor, min_spec, epsilon, input_bounds=bounds, optimizer_builder=optimizer_cls) if restarted: min_attack = ibp.RestartedAttack(min_attack, num_restarts=10) z_min = min_attack(z, labels) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) z_max_values, z_min_values = sess.run([z_max, z_min]) z_max_values = z_max_values[0] z_min_values = z_min_values[0] self.assertAlmostEqual(2., z_max_values[0]) self.assertAlmostEqual(2.5, z_max_values[1]) self.assertAlmostEqual(0., z_min_values[0]) self.assertAlmostEqual(1., z_min_values[1]) if __name__ == '__main__': tf.test.main() ================================================ FILE: interval_bound_propagation/tests/bounds_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for bounds.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import parameterized import interval_bound_propagation as ibp import numpy as np import sonnet as snt import tensorflow.compat.v1 as tf class IntervalBoundsTest(parameterized.TestCase, tf.test.TestCase): def testFCIntervalBounds(self): m = snt.Linear(1, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(2.), }) z = tf.constant([[1, 2, 3]], dtype=tf.float32) m(z) # Connect to create weights. m = ibp.LinearFCWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) output_bounds = m.propagate_bounds(input_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u = sess.run([output_bounds.lower, output_bounds.upper]) l = l.item() u = u.item() self.assertAlmostEqual(5., l) self.assertAlmostEqual(11., u) def testConv1dIntervalBounds(self): m = snt.Conv1D( output_channels=1, kernel_shape=2, padding='VALID', stride=1, use_bias=True, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(2.), }) z = tf.constant([3, 4], dtype=tf.float32) z = tf.reshape(z, [1, 2, 1]) m(z) # Connect to create weights. m = ibp.LinearConv1dWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) output_bounds = m.propagate_bounds(input_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u = sess.run([output_bounds.lower, output_bounds.upper]) l = l.item() u = u.item() self.assertAlmostEqual(7., l) self.assertAlmostEqual(11., u) def testConv2dIntervalBounds(self): m = snt.Conv2D( output_channels=1, kernel_shape=(2, 2), padding='VALID', stride=1, use_bias=True, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(2.), }) z = tf.constant([1, 2, 3, 4], dtype=tf.float32) z = tf.reshape(z, [1, 2, 2, 1]) m(z) # Connect to create weights. m = ibp.LinearConv2dWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) output_bounds = m.propagate_bounds(input_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u = sess.run([output_bounds.lower, output_bounds.upper]) l = l.item() u = u.item() self.assertAlmostEqual(8., l) self.assertAlmostEqual(16., u) def testReluIntervalBounds(self): m = tf.nn.relu z = tf.constant([[-2, 3]], dtype=tf.float32) m = ibp.IncreasingMonotonicWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) output_bounds = m.propagate_bounds(input_bounds) with self.test_session() as sess: l, u = sess.run([output_bounds.lower, output_bounds.upper]) self.assertAlmostEqual([[0., 2.]], l.tolist()) self.assertAlmostEqual([[0., 4.]], u.tolist()) def testMulIntervalBounds(self): m = tf.multiply z = tf.constant([[-2, 3, 0]], dtype=tf.float32) m = ibp.PiecewiseMonotonicWrapper(m, (0,)) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) output_bounds = m.propagate_bounds(input_bounds, input_bounds) with self.test_session() as sess: l, u = sess.run([output_bounds.lower, output_bounds.upper]) self.assertAlmostEqual([[1., 4., -1.]], l.tolist()) self.assertAlmostEqual([[9., 16., 1.]], u.tolist()) def testSubIntervalBounds(self): m = tf.subtract z = tf.constant([[-2, 3, 0]], dtype=tf.float32) m = ibp.PiecewiseMonotonicWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) output_bounds = m.propagate_bounds(input_bounds, input_bounds) with self.test_session() as sess: l, u = sess.run([output_bounds.lower, output_bounds.upper]) self.assertAlmostEqual([[-2., -2., -2.]], l.tolist()) self.assertAlmostEqual([[2., 2., 2.]], u.tolist()) @parameterized.named_parameters( ('DefaultAxis', -1, [[[1., 0.5, 0.5], [1., 0.5, 0.5]], [[1. / 3, 0., 0.], [1. / 3, 0., 0.]]]), ('NonDefaultAxis', 0, [[[1., 1., 1.], [1., 1., 1.]], [[0., 0., 0.], [0., 0., 0.]]])) def testSoftmaxIntervalBounds(self, axis, expected_outputs): z = tf.constant([[1., -10., -10.], [1., -10., -10.]]) input_bounds = ibp.IntervalBounds(z - 1.0, z + 10.0) softmax_fn = lambda x: tf.nn.softmax(x, axis=axis) softmax_fn = ibp.VerifiableModelWrapper(softmax_fn) softmax_fn(z) output_bounds = softmax_fn.propagate_bounds(input_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u = sess.run([output_bounds.lower, output_bounds.upper]) self.assertTrue(np.all(np.abs(expected_outputs[0] - u) < 1e-3)) self.assertTrue(np.all(np.abs(expected_outputs[1] - l) < 1e-3)) def testBatchNormIntervalBounds(self): z = tf.constant([[1, 2, 3]], dtype=tf.float32) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) g = tf.reshape(tf.range(-1, 2, dtype=tf.float32), [1, 3]) b = tf.reshape(tf.range(3, dtype=tf.float32), [1, 3]) batch_norm = ibp.BatchNorm(scale=True, offset=True, eps=0., initializers={ 'gamma': lambda *args, **kwargs: g, 'beta': lambda *args, **kwargs: b, 'moving_mean': tf.constant_initializer(1.), 'moving_variance': tf.constant_initializer(4.), }) batch_norm(z, is_training=False) batch_norm = ibp.BatchNormWrapper(batch_norm) # Test propagation. output_bounds = batch_norm.propagate_bounds(input_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u = sess.run([output_bounds.lower, output_bounds.upper]) self.assertAlmostEqual([[-.5, 1., 2.5]], l.tolist()) self.assertAlmostEqual([[.5, 1., 3.5]], u.tolist()) def testCaching(self): m = snt.Linear(1, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(2.), }) z = tf.placeholder(shape=(1, 3), dtype=tf.float32) m(z) # Connect to create weights. m = ibp.LinearFCWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) output_bounds = m.propagate_bounds(input_bounds) input_bounds.enable_caching() output_bounds.enable_caching() update_all_caches_op = tf.group([input_bounds.update_cache_op, output_bounds.update_cache_op]) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) # Initialise the caches based on the model inputs. sess.run(update_all_caches_op, feed_dict={z: [[1., 2., 3.]]}) l, u = sess.run([output_bounds.lower, output_bounds.upper]) l = l.item() u = u.item() self.assertAlmostEqual(5., l) self.assertAlmostEqual(11., u) # Update the cache based on a different set of inputs. sess.run([output_bounds.update_cache_op], feed_dict={z: [[2., 3., 7.]]}) # We only updated the output bounds' cache. # This asserts that the computation depends on the underlying # input bounds tensor, not on cached version of it. # (Thus it doesn't matter what order the caches are updated.) l, u = sess.run([output_bounds.lower, output_bounds.upper]) l = l.item() u = u.item() self.assertAlmostEqual(11., l) self.assertAlmostEqual(17., u) if __name__ == '__main__': tf.test.main() ================================================ FILE: interval_bound_propagation/tests/crown_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for CROWN bounds.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import interval_bound_propagation as ibp import numpy as np import sonnet as snt import tensorflow.compat.v1 as tf def _generate_identity_spec(modules, shape, dimension=1): spec = ibp.LinearSpecification(tf.reshape(tf.eye(dimension), shape), prune_irrelevant=False) initial_bound = ibp.crown.create_initial_backward_bounds(spec, modules) return initial_bound class CROWNBoundsTest(tf.test.TestCase): def testFCBackwardBounds(self): m = snt.Linear(1, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(2.), }) z = tf.constant([[1, 2, 3]], dtype=tf.float32) m(z) # Connect to create weights. m = ibp.LinearFCWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) m.propagate_bounds(input_bounds) # Create IBP bounds. crown_init_bounds = _generate_identity_spec([m], shape=(1, 1, 1)) output_bounds = m.propagate_bounds(crown_init_bounds) concrete_bounds = output_bounds.concretize() with self.test_session() as sess: sess.run(tf.global_variables_initializer()) lw, uw, lb, ub, cl, cu = sess.run([output_bounds.lower.w, output_bounds.upper.w, output_bounds.lower.b, output_bounds.upper.b, concrete_bounds.lower, concrete_bounds.upper]) self.assertTrue(np.all(lw == 1.)) self.assertTrue(np.all(lb == 2.)) self.assertTrue(np.all(uw == 1.)) self.assertTrue(np.all(ub == 2.)) cl = cl.item() cu = cu.item() self.assertAlmostEqual(5., cl) self.assertAlmostEqual(11., cu) def testConv2dBackwardBounds(self): m = snt.Conv2D( output_channels=1, kernel_shape=(2, 2), padding='VALID', stride=1, use_bias=True, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(2.), }) z = tf.constant([1, 2, 3, 4], dtype=tf.float32) z = tf.reshape(z, [1, 2, 2, 1]) m(z) # Connect to create weights. m = ibp.LinearConv2dWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) m.propagate_bounds(input_bounds) # Create IBP bounds. crown_init_bounds = _generate_identity_spec([m], shape=(1, 1, 1, 1, 1)) output_bounds = m.propagate_bounds(crown_init_bounds) concrete_bounds = output_bounds.concretize() with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u = sess.run([concrete_bounds.lower, concrete_bounds.upper]) l = l.item() u = u.item() self.assertAlmostEqual(8., l) self.assertAlmostEqual(16., u) def testReluBackwardBounds(self): m = tf.nn.relu z = tf.constant([[-2, 3]], dtype=tf.float32) m = ibp.IncreasingMonotonicWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) m.propagate_bounds(input_bounds) # Create IBP bounds. crown_init_bounds = _generate_identity_spec([m], shape=(1, 2, 2), dimension=2) output_bounds = m.propagate_bounds(crown_init_bounds) concrete_bounds = output_bounds.concretize() with self.test_session() as sess: l, u = sess.run([concrete_bounds.lower, concrete_bounds.upper]) self.assertAlmostEqual([[0., 2.]], l.tolist()) self.assertAlmostEqual([[0., 4.]], u.tolist()) if __name__ == '__main__': tf.test.main() ================================================ FILE: interval_bound_propagation/tests/fastlin_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for symbolic bounds.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import parameterized import interval_bound_propagation as ibp import numpy as np import sonnet as snt import tensorflow.compat.v1 as tf class SymbolicBoundsTest(parameterized.TestCase, tf.test.TestCase): def testConvertSymbolicBounds(self): z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) z = tf.reshape(z, [1, 2, 2]) b = ibp.SymbolicBounds.convert(z) for l in (b.lower, b.upper): self.assertEqual([1, 4, 2, 2], l.w.shape.as_list()) self.assertEqual([1, 2, 2], l.b.shape.as_list()) self.assertEqual([1, 4], l.lower.shape.as_list()) self.assertEqual([1, 4], l.upper.shape.as_list()) def testFCSymbolicBounds(self): m = snt.Linear(1, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(2.), }) z = tf.constant([[1, 2, 3]], dtype=tf.float32) m(z) # Connect to create weights. m = ibp.LinearFCWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) input_bounds = ibp.SymbolicBounds.convert(input_bounds) output_bounds = m.propagate_bounds(input_bounds) concrete_bounds = ibp.IntervalBounds.convert(output_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u, cl, cu = sess.run([output_bounds.lower, output_bounds.upper, concrete_bounds.lower, concrete_bounds.upper]) self.assertTrue(np.all(l.w == 1.)) self.assertTrue(np.all(l.b == 2.)) self.assertAlmostEqual([[0, 1, 2]], l.lower.tolist()) self.assertAlmostEqual([[2, 3, 4]], l.upper.tolist()) self.assertTrue(np.all(u.w == 1.)) self.assertTrue(np.all(u.b == 2.)) self.assertAlmostEqual([[0, 1, 2]], u.lower.tolist()) self.assertAlmostEqual([[2, 3, 4]], u.upper.tolist()) cl = cl.item() cu = cu.item() self.assertAlmostEqual(5., cl) self.assertAlmostEqual(11., cu) def testConv2dSymbolicBounds(self): m = snt.Conv2D( output_channels=1, kernel_shape=(2, 2), padding='VALID', stride=1, use_bias=True, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(2.), }) z = tf.constant([1, 2, 3, 4], dtype=tf.float32) z = tf.reshape(z, [1, 2, 2, 1]) m(z) # Connect to create weights. m = ibp.LinearConv2dWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) input_bounds = ibp.SymbolicBounds.convert(input_bounds) output_bounds = m.propagate_bounds(input_bounds) output_bounds = ibp.IntervalBounds.convert(output_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u = sess.run([output_bounds.lower, output_bounds.upper]) l = l.item() u = u.item() self.assertAlmostEqual(8., l) self.assertAlmostEqual(16., u) def testConv1dSymbolicBounds(self): m = snt.Conv1D( output_channels=1, kernel_shape=(2), padding='VALID', stride=1, use_bias=True, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(3.), }) z = tf.constant([3, 4], dtype=tf.float32) z = tf.reshape(z, [1, 2, 1]) m(z) # Connect to create weights. m = ibp.LinearConv1dWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) input_bounds = ibp.SymbolicBounds.convert(input_bounds) output_bounds = m.propagate_bounds(input_bounds) output_bounds = ibp.IntervalBounds.convert(output_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) l, u = sess.run([output_bounds.lower, output_bounds.upper]) l = l.item() u = u.item() self.assertAlmostEqual(8., l) self.assertAlmostEqual(12., u) def testReluSymbolicBounds(self): m = tf.nn.relu z = tf.constant([[-2, 3]], dtype=tf.float32) m = ibp.IncreasingMonotonicWrapper(m) input_bounds = ibp.IntervalBounds(z - 1., z + 1.) input_bounds = ibp.SymbolicBounds.convert(input_bounds) output_bounds = m.propagate_bounds(input_bounds) output_bounds = ibp.IntervalBounds.convert(output_bounds) with self.test_session() as sess: l, u = sess.run([output_bounds.lower, output_bounds.upper]) self.assertAlmostEqual([[0., 2.]], l.tolist()) self.assertAlmostEqual([[0., 4.]], u.tolist()) if __name__ == '__main__': tf.test.main() ================================================ FILE: interval_bound_propagation/tests/layers_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for layers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import interval_bound_propagation as ibp import numpy as np import tensorflow.compat.v1 as tf def _get_inputs(dtype=tf.float32): v = np.array(range(6), dtype=dtype.as_numpy_dtype) input_v = np.array([v] * 7) inputs = tf.constant(input_v) return v, input_v, inputs class LayersTest(tf.test.TestCase): def assertBetween(self, value, minv, maxv): """Asserts that value is between minv and maxv (inclusive).""" self.assertLessEqual(minv, value) self.assertGreaterEqual(maxv, value) # Subset of the tests in sonnet/python/modules/batch_norm_test.py. def testBatchNormUpdateImproveStatistics(self): """Test that updating the moving_mean improves statistics.""" _, _, inputs = _get_inputs() # Use small decay_rate to update faster. bn = ibp.BatchNorm(offset=False, scale=False, decay_rate=0.1, update_ops_collection=tf.GraphKeys.UPDATE_OPS) out1 = bn(inputs, is_training=False) # Build the update ops. bn(inputs, is_training=True) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) out_v = sess.run(out1) # Before updating the moving_mean the results are off. self.assertBetween(np.max(np.abs(np.zeros([7, 6]) - out_v)), 2, 5) sess.run(tuple(tf.get_collection(tf.GraphKeys.UPDATE_OPS))) # After updating the moving_mean the results are better. out_v = sess.run(out1) self.assertBetween(np.max(np.abs(np.zeros([7, 6]) - out_v)), 1, 2) def testImageNorm(self): mean = [4, 0, -4] std = [1., 2., 4.] image = tf.constant(4., shape=[10, 2, 2, 3]) normalized_image = ibp.ImageNorm(mean, std)(image) with self.test_session() as sess: out_image = sess.run(normalized_image) self.assertTrue(np.all(np.isclose(out_image[:, :, :, 0], 0.))) self.assertTrue(np.all(np.isclose(out_image[:, :, :, 1], 2.))) self.assertTrue(np.all(np.isclose(out_image[:, :, :, 2], 2.))) if __name__ == '__main__': tf.test.main() ================================================ FILE: interval_bound_propagation/tests/loss_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for loss.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import interval_bound_propagation as ibp import sonnet as snt import tensorflow.compat.v1 as tf class FixedNN(snt.AbstractModule): def _build(self, z0, is_training=False): self._m = snt.Linear(2, initializers={ 'w': tf.constant_initializer(1.), 'b': lambda *unsed_args, **unused_kwargs: tf.constant([0., 1.]), }) return self._m(z0) class LossTest(tf.test.TestCase): def testEndToEnd(self): predictor = FixedNN() predictor = ibp.VerifiableModelWrapper(predictor) # Labels. labels = tf.constant([1], dtype=tf.int64) # Connect to input. z = tf.constant([[1, 2, 3]], dtype=tf.float32) predictor(z, is_training=True) # Input bounds. eps = 1. input_bounds = ibp.IntervalBounds(z - eps, z + eps) predictor.propagate_bounds(input_bounds) # Create output specification (that forces the first logits to be greater). c = tf.constant([[[1, -1]]], dtype=tf.float32) d = tf.constant([[0]], dtype=tf.float32) # Turn elision off for more interesting results. spec = ibp.LinearSpecification(c, d, collapse=False) # Create an attack. attack = ibp.UntargetedPGDAttack( predictor, spec, eps, num_steps=1, input_bounds=(-100., 100)) # Build loss. losses = ibp.Losses(predictor, spec, attack, interval_bounds_loss_type='hinge', interval_bounds_hinge_margin=0.) losses(labels) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) # We expect the worst-case logits from IBP to be [9, 4]. # The adversarial attack should fail since logits are always [l, l + 1]. # Similarly, the nominal predictions are correct. accuracy_values, loss_values = sess.run( [losses.scalar_metrics, losses.scalar_losses]) self.assertAlmostEqual(1., accuracy_values.nominal_accuracy) self.assertAlmostEqual(0., accuracy_values.verified_accuracy) self.assertAlmostEqual(1., accuracy_values.attack_accuracy) expected_xent = 0.31326168751822947 self.assertAlmostEqual(expected_xent, loss_values.nominal_cross_entropy, places=5) self.assertAlmostEqual(expected_xent, loss_values.attack_cross_entropy, places=5) expected_hinge = 5. self.assertAlmostEqual(expected_hinge, loss_values.verified_loss) if __name__ == '__main__': tf.test.main() ================================================ FILE: interval_bound_propagation/tests/model_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for model.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import parameterized import interval_bound_propagation as ibp import numpy as np import sonnet as snt import tensorflow.compat.v1 as tf def _build_model(): num_classes = 3 layer_types = ( ('conv2d', (2, 2), 4, 'VALID', 1), ('activation', 'relu'), ('linear', 10), ('activation', 'relu')) return ibp.DNN(num_classes, layer_types) class ModelTest(parameterized.TestCase, tf.test.TestCase): def testDNN(self): predictor = _build_model() # Input. z = tf.constant([1, 2, 3, 4], dtype=tf.float32) z = tf.reshape(z, [1, 2, 2, 1]) predictor(z) # Verify the variables that are created. expected_shapes = { 'predictor/conv2d_0/w:0': (2, 2, 1, 4), 'predictor/conv2d_0/b:0': (4,), 'predictor/linear_0/w:0': (4, 10), 'predictor/linear_0/b:0': (10,), 'predictor/linear_1/w:0': (10, 3), 'predictor/linear_1/b:0': (3,), } for v in predictor.get_variables(): self.assertEqual(expected_shapes[v.name], v.shape) def _propagation_test(self, wrapper, inputs, outputs): input_bounds = ibp.IntervalBounds(inputs, inputs) output_bounds = wrapper.propagate_bounds(input_bounds) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) o, l, u = sess.run([outputs, output_bounds.lower, output_bounds.upper]) self.assertAlmostEqual(o.tolist(), l.tolist()) self.assertAlmostEqual(o.tolist(), u.tolist()) def testVerifiableModelWrapperDNN(self): predictor = _build_model() # Input. z = tf.constant([1, 2, 3, 4], dtype=tf.float32) z = tf.reshape(z, [1, 2, 2, 1]) wrapper = ibp.VerifiableModelWrapper(predictor) wrapper(z) # Verify basic wrapping. self.assertEqual(predictor, wrapper.wrapped_network) self.assertEqual(3, wrapper.output_size) self.assertEqual((1, 3), tuple(wrapper.logits.shape.as_list())) self.assertEqual(z, wrapper.inputs) # Build another input and test reuse. z2 = tf.constant([1, 2, 3, 4], dtype=tf.float32) z2 = tf.reshape(z, [1, 2, 2, 1]) logits = wrapper(z2, reuse=True) self.assertEqual(z, wrapper.inputs) self.assertNotEqual(z2, wrapper.inputs) # Check that the verifiable modules are constructed. self.assertLen(wrapper.input_wrappers, 1) self.assertLen(wrapper.modules, 6) self.assertIsInstance(wrapper.modules[0].module, snt.Conv2D) self.assertEqual(wrapper.modules[1].module, tf.nn.relu) self.assertIsInstance(wrapper.modules[2].module, snt.BatchFlatten) self.assertIsInstance(wrapper.modules[3].module, snt.Linear) self.assertEqual(wrapper.modules[4].module, tf.nn.relu) self.assertIsInstance(wrapper.modules[5].module, snt.Linear) # It's a sequential network, so all nodes (including input) have fanout 1. self.assertEqual(wrapper.fanout_of(wrapper.input_wrappers[0]), 1) for module in wrapper.modules: self.assertEqual(wrapper.fanout_of(module), 1) # Check propagation. self._propagation_test(wrapper, z2, logits) def testVerifiableModelWrapperResnet(self): def _build(z0, is_training=False): # pylint: disable=unused-argument input_size = np.prod(z0.shape[1:]) # We make a resnet-like structure. z = snt.Linear(input_size)(z0) z_left = tf.nn.relu(z) z_left = snt.Linear(input_size)(z_left) z = z_left + z0 return snt.Linear(2)(z) z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) wrapper = ibp.VerifiableModelWrapper(_build) logits = wrapper(z) self.assertLen(wrapper.input_wrappers, 1) self.assertLen(wrapper.modules, 5) # Check input has fanout 2, as it is the start of the resnet block. self.assertEqual(wrapper.fanout_of(wrapper.input_wrappers[0]), 2) for module in wrapper.modules: self.assertEqual(wrapper.fanout_of(module), 1) # Check propagation. self._propagation_test(wrapper, z, logits) def testVerifiableModelWrapperPool(self): def _build(z0): z = tf.reduce_mean(z0, axis=1, keep_dims=True) z = tf.reduce_max(z, axis=2, keep_dims=False) return snt.Linear(2)(z) z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) z = tf.reshape(z, [1, 2, 2]) wrapper = ibp.VerifiableModelWrapper(_build) logits = wrapper(z) self.assertLen(wrapper.modules, 3) # Check propagation. self._propagation_test(wrapper, z, logits) def testVerifiableModelWrapperConcat(self): def _build(z0): z = snt.Linear(10)(z0) z = tf.concat([z, z0], axis=1) return snt.Linear(2)(z) z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) wrapper = ibp.VerifiableModelWrapper(_build) logits = wrapper(z) self.assertLen(wrapper.modules, 3) # Check propagation. self._propagation_test(wrapper, z, logits) def testVerifiableModelWrapperExpandAndSqueeze(self): def _build(z0): z = snt.Linear(10)(z0) z = tf.expand_dims(z, axis=-1) z = tf.squeeze(z, axis=-1) return snt.Linear(2)(z) z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) wrapper = ibp.VerifiableModelWrapper(_build) logits = wrapper(z) self.assertLen(wrapper.modules, 4) # Check propagation. self._propagation_test(wrapper, z, logits) @parameterized.named_parameters( ('Add', lambda z: z + z, 3), ('Sub', lambda z: z - z, 3), ('Identity', tf.identity, 3), ('Mul', lambda z: z * z, 3), ('Slice', lambda z: tf.slice(z, [0, 0], [-1, 5]), 3), ('StridedSlice', lambda z: z[:, :5], 3), ('Reshape', lambda z: tf.reshape(z, [2, 5]), 3), ('Const', lambda z: z + tf.ones_like(z), 5)) def testVerifiableModelWrapperSimple(self, fn, expected_modules): def _build(z0): z = snt.Linear(10)(z0) z = fn(z) return snt.Linear(2)(z) z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) wrapper = ibp.VerifiableModelWrapper(_build) logits = wrapper(z) self.assertLen(wrapper.modules, expected_modules) # Check propagation. self._propagation_test(wrapper, z, logits) def testPointlessReshape(self): def _build(z0): z = snt.Linear(10)(z0) z = snt.BatchFlatten()(z) # This is a no-op; no graph nodes created. return snt.Linear(2)(z) z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) wrapper = ibp.VerifiableModelWrapper(_build) logits = wrapper(z) # Expect the batch flatten to have been skipped. self.assertLen(wrapper.modules, 2) self.assertIsInstance(wrapper.modules[0], ibp.LinearFCWrapper) self.assertIsInstance(wrapper.modules[1], ibp.LinearFCWrapper) # Check propagation. self._propagation_test(wrapper, z, logits) def testLeakyRelu(self): def _build(z0): z = snt.Linear(10)(z0) z = tf.nn.leaky_relu(z0, alpha=0.375) return snt.Linear(2)(z) z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) wrapper = ibp.VerifiableModelWrapper(_build) logits = wrapper(z) self.assertLen(wrapper.modules, 3) self.assertEqual(wrapper.modules[1].module.__name__, 'leaky_relu') self.assertEqual(wrapper.modules[1].parameters['alpha'], 0.375) # Check propagation. self._propagation_test(wrapper, z, logits) def testMultipleInputs(self): # Tensor to overwrite. def _build(z0, z1): return z0 + z1 z0 = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) z1 = tf.constant([[2, 2, 4, 4]], dtype=tf.float32) wrapper = ibp.VerifiableModelWrapper(_build) logits = wrapper(z0, z1) input_bounds0 = ibp.IntervalBounds(z0 - 2, z0 + 1) input_bounds1 = ibp.IntervalBounds(z1, z1 + 10) output_bounds = wrapper.propagate_bounds(input_bounds0, input_bounds1) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) o, l, u = sess.run([logits, output_bounds.lower, output_bounds.upper]) print(o, l, u) self.assertAlmostEqual([[3., 4., 7., 8.]], o.tolist()) self.assertAlmostEqual([[1., 2., 5., 6.]], l.tolist()) self.assertAlmostEqual([[14., 15., 18., 19.]], u.tolist()) if __name__ == '__main__': tf.test.main() ================================================ FILE: interval_bound_propagation/tests/relative_bounds_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for relative_bounds.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import parameterized import interval_bound_propagation as ibp from interval_bound_propagation import layer_utils import numpy as np import sonnet as snt import tensorflow.compat.v1 as tf class RelativeIntervalBoundsTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(('float32', tf.float32), ('float64', tf.float64)) def test_linear_bounds_shape(self, dtype): batch_size = 11 input_size = 7 output_size = 5 w = tf.placeholder(dtype=dtype, shape=(input_size, output_size)) b = tf.placeholder(dtype=dtype, shape=(output_size,)) lb_rel_in = tf.placeholder(dtype=dtype, shape=(batch_size, input_size)) ub_rel_in = tf.placeholder(dtype=dtype, shape=(batch_size, input_size)) nominal = tf.placeholder(dtype=dtype, shape=(batch_size, input_size)) bounds_in = ibp.RelativeIntervalBounds(lb_rel_in, ub_rel_in, nominal) bounds_out = bounds_in.apply_linear(None, w, b) lb_out, ub_out = bounds_out.lower, bounds_out.upper self.assertEqual(dtype, lb_out.dtype) self.assertEqual(dtype, ub_out.dtype) self.assertEqual((batch_size, output_size), lb_out.shape) self.assertEqual((batch_size, output_size), ub_out.shape) @parameterized.named_parameters(('float32', tf.float32, 1.e-6), ('float64', tf.float64, 1.e-8)) def test_linear_bounds(self, dtype, tol): w = tf.constant([[1.0, 2.0, 3.0], [4.0, -5.0, 6.0]], dtype=dtype) b = tf.constant([0.1, 0.2, 0.3], dtype=dtype) lb_in = tf.constant([[-1.0, -1.0]], dtype=dtype) ub_in = tf.constant([[2.0, 2.0]], dtype=dtype) nominal = tf.constant([[3.1, 4.2]], dtype=dtype) bounds_in = ibp.RelativeIntervalBounds(lb_in - nominal, ub_in - nominal, nominal) bounds_out = bounds_in.apply_linear(None, w, b) lb_out, ub_out = bounds_out.lower, bounds_out.upper lb_out_exp = np.array([[-4.9, -11.8, -8.7]]) ub_out_exp = np.array([[10.1, 9.2, 18.3]]) with self.test_session() as session: lb_out_act, ub_out_act = session.run((lb_out, ub_out)) self.assertAllClose(lb_out_exp, lb_out_act, atol=tol, rtol=tol) self.assertAllClose(ub_out_exp, ub_out_act, atol=tol, rtol=tol) @parameterized.named_parameters(('float32', tf.float32), ('float64', tf.float64)) def test_conv2d_bounds_shape(self, dtype): batch_size = 23 input_height = 17 input_width = 7 kernel_height = 3 kernel_width = 4 input_channels = 3 output_channels = 5 padding = 'VALID' strides = (2, 1) # Expected output dimensions, based on convolution settings. output_height = 8 output_width = 4 w = tf.placeholder(dtype=dtype, shape=( kernel_height, kernel_width, input_channels, output_channels)) b = tf.placeholder(dtype=dtype, shape=(output_channels,)) lb_rel_in = tf.placeholder(dtype=dtype, shape=( batch_size, input_height, input_width, input_channels)) ub_rel_in = tf.placeholder(dtype=dtype, shape=( batch_size, input_height, input_width, input_channels)) nominal = tf.placeholder(dtype=dtype, shape=( batch_size, input_height, input_width, input_channels)) bounds_in = ibp.RelativeIntervalBounds(lb_rel_in, ub_rel_in, nominal) bounds_out = bounds_in.apply_conv2d(None, w, b, padding, strides) lb_out, ub_out = bounds_out.lower, bounds_out.upper self.assertEqual(dtype, lb_out.dtype) self.assertEqual(dtype, ub_out.dtype) self.assertEqual((batch_size, output_height, output_width, output_channels), lb_out.shape) self.assertEqual((batch_size, output_height, output_width, output_channels), ub_out.shape) @parameterized.named_parameters(('float32', tf.float32, 1.e-5), ('float64', tf.float64, 1.e-8)) def test_conv2d_bounds(self, dtype, tol): batch_size = 53 input_height = 17 input_width = 7 kernel_height = 3 kernel_width = 4 input_channels = 3 output_channels = 2 padding = 'VALID' strides = (2, 1) w = tf.random_normal(dtype=dtype, shape=( kernel_height, kernel_width, input_channels, output_channels)) b = tf.random_normal(dtype=dtype, shape=(output_channels,)) lb_in = tf.random_normal(dtype=dtype, shape=( batch_size, input_height, input_width, input_channels)) ub_in = tf.random_normal(dtype=dtype, shape=( batch_size, input_height, input_width, input_channels)) lb_in, ub_in = tf.minimum(lb_in, ub_in), tf.maximum(lb_in, ub_in) nominal = tf.random_normal(dtype=dtype, shape=( batch_size, input_height, input_width, input_channels)) bounds_in = ibp.RelativeIntervalBounds(lb_in - nominal, ub_in - nominal, nominal) bounds_out = bounds_in.apply_conv2d(None, w, b, padding, strides) lb_out, ub_out = bounds_out.lower, bounds_out.upper # Compare against equivalent linear layer. bounds_out_lin = _materialised_conv_bounds( w, b, padding, strides, bounds_in) lb_out_lin, ub_out_lin = bounds_out_lin.lower, bounds_out_lin.upper with self.test_session() as session: (lb_out_val, ub_out_val, lb_out_lin_val, ub_out_lin_val) = session.run((lb_out, ub_out, lb_out_lin, ub_out_lin)) self.assertAllClose(lb_out_val, lb_out_lin_val, atol=tol, rtol=tol) self.assertAllClose(ub_out_val, ub_out_lin_val, atol=tol, rtol=tol) @parameterized.named_parameters(('float32', tf.float32), ('float64', tf.float64)) def test_conv1d_bounds_shape(self, dtype): batch_size = 23 input_length = 13 kernel_length = 3 input_channels = 3 output_channels = 5 padding = 'VALID' strides = (2,) # Expected output dimensions, based on convolution settings. output_length = 6 w = tf.placeholder(dtype=dtype, shape=( kernel_length, input_channels, output_channels)) b = tf.placeholder(dtype=dtype, shape=(output_channels,)) lb_rel_in = tf.placeholder(dtype=dtype, shape=( batch_size, input_length, input_channels)) ub_rel_in = tf.placeholder(dtype=dtype, shape=( batch_size, input_length, input_channels)) nominal = tf.placeholder(dtype=dtype, shape=( batch_size, input_length, input_channels)) bounds_in = ibp.RelativeIntervalBounds(lb_rel_in, ub_rel_in, nominal) bounds_out = bounds_in.apply_conv1d(None, w, b, padding, strides[0]) lb_out, ub_out = bounds_out.lower, bounds_out.upper self.assertEqual(dtype, lb_out.dtype) self.assertEqual(dtype, ub_out.dtype) self.assertEqual((batch_size, output_length, output_channels), lb_out.shape) self.assertEqual((batch_size, output_length, output_channels), ub_out.shape) @parameterized.named_parameters(('float32', tf.float32, 1.e-5), ('float64', tf.float64, 1.e-8)) def test_conv1d_bounds(self, dtype, tol): batch_size = 53 input_length = 13 kernel_length = 5 input_channels = 3 output_channels = 2 padding = 'VALID' strides = (2,) w = tf.random_normal(dtype=dtype, shape=( kernel_length, input_channels, output_channels)) b = tf.random_normal(dtype=dtype, shape=(output_channels,)) lb_in = tf.random_normal(dtype=dtype, shape=( batch_size, input_length, input_channels)) ub_in = tf.random_normal(dtype=dtype, shape=( batch_size, input_length, input_channels)) lb_in, ub_in = tf.minimum(lb_in, ub_in), tf.maximum(lb_in, ub_in) nominal = tf.random_normal(dtype=dtype, shape=( batch_size, input_length, input_channels)) bounds_in = ibp.RelativeIntervalBounds(lb_in - nominal, ub_in - nominal, nominal) bounds_out = bounds_in.apply_conv1d(None, w, b, padding, strides[0]) lb_out, ub_out = bounds_out.lower, bounds_out.upper # Compare against equivalent linear layer. bounds_out_lin = _materialised_conv_bounds( w, b, padding, strides, bounds_in) lb_out_lin, ub_out_lin = bounds_out_lin.lower, bounds_out_lin.upper with self.test_session() as session: (lb_out_val, ub_out_val, lb_out_lin_val, ub_out_lin_val) = session.run((lb_out, ub_out, lb_out_lin, ub_out_lin)) self.assertAllClose(lb_out_val, lb_out_lin_val, atol=tol, rtol=tol) self.assertAllClose(ub_out_val, ub_out_lin_val, atol=tol, rtol=tol) @parameterized.named_parameters( ('float32_snt', snt.BatchNorm, tf.float32, 1.e-5, False), ('float64_snt', snt.BatchNorm, tf.float64, 1.e-8, False), ('float32', ibp.BatchNorm, tf.float32, 1.e-5, False), ('float64', ibp.BatchNorm, tf.float64, 1.e-8, False), ('float32_train', ibp.BatchNorm, tf.float32, 1.e-5, True), ('float64_train', ibp.BatchNorm, tf.float64, 1.e-8, True)) def test_batchnorm_bounds(self, batchnorm_class, dtype, tol, is_training): batch_size = 11 input_size = 7 output_size = 5 lb_in = tf.random_normal(dtype=dtype, shape=(batch_size, input_size)) ub_in = tf.random_normal(dtype=dtype, shape=(batch_size, input_size)) lb_in, ub_in = tf.minimum(lb_in, ub_in), tf.maximum(lb_in, ub_in) nominal = tf.random_normal(dtype=dtype, shape=(batch_size, input_size)) # Linear layer. w = tf.random_normal(dtype=dtype, shape=(input_size, output_size)) b = tf.random_normal(dtype=dtype, shape=(output_size,)) # Batch norm layer. epsilon = 1.e-2 bn_initializers = { 'beta': tf.random_normal_initializer(), 'gamma': tf.random_uniform_initializer(.1, 3.), 'moving_mean': tf.random_normal_initializer(), 'moving_variance': tf.random_uniform_initializer(.1, 3.) } batchnorm_module = batchnorm_class(offset=True, scale=True, eps=epsilon, initializers=bn_initializers) # Connect the batchnorm module to the graph. batchnorm_module(tf.random_normal(dtype=dtype, shape=(batch_size, output_size)), is_training=is_training) bounds_in = ibp.RelativeIntervalBounds(lb_in - nominal, ub_in - nominal, nominal) bounds_out = bounds_in.apply_linear(None, w, b) bounds_out = bounds_out.apply_batch_norm( batchnorm_module, batchnorm_module.mean if is_training else batchnorm_module.moving_mean, batchnorm_module.variance if is_training else batchnorm_module.moving_variance, batchnorm_module.gamma, batchnorm_module.beta, epsilon) lb_out, ub_out = bounds_out.lower, bounds_out.upper # Separately, calculate dual objective by adjusting the linear layer. wn, bn = layer_utils.combine_with_batchnorm(w, b, batchnorm_module) bounds_out_lin = bounds_in.apply_linear(None, wn, bn) lb_out_lin, ub_out_lin = bounds_out_lin.lower, bounds_out_lin.upper init_op = tf.global_variables_initializer() with self.test_session() as session: session.run(init_op) (lb_out_val, ub_out_val, lb_out_lin_val, ub_out_lin_val) = session.run((lb_out, ub_out, lb_out_lin, ub_out_lin)) self.assertAllClose(lb_out_val, lb_out_lin_val, atol=tol, rtol=tol) self.assertAllClose(ub_out_val, ub_out_lin_val, atol=tol, rtol=tol) def _materialised_conv_bounds(w, b, padding, strides, bounds_in): """Calculates bounds on output of an N-D convolution layer. The calculation is performed by first materialising the convolution as a (sparse) fully-connected linear layer. Doing so will affect performance, but may be useful for investigating numerical stability issues. Args: w: (N+2)D tensor of shape (kernel_height, kernel_width, input_channels, output_channels) containing weights for the convolution. b: 1D tensor of shape (output_channels) containing biases for the convolution, or `None` if no bias. padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. strides: Integer list of length N: `[vertical_stride, horizontal_stride]`. bounds_in: bounds of shape (batch_size, input_height, input_width, input_channels) containing bounds on the inputs to the convolution layer. Returns: bounds of shape (batch_size, output_height, output_width, output_channels) with bounds on the outputs of the convolution layer. Raises: ValueError: if an unsupported convolution dimensionality is encountered. """ # Flatten the inputs, as the materialised convolution will have no # spatial structure. bounds_in_flat = bounds_in.apply_batch_reshape(None, [-1]) # Materialise the convolution as a (sparse) fully connected linear layer. input_shape = bounds_in.shape[1:] w_lin, b_lin = layer_utils.materialise_conv(w, b, input_shape, padding=padding, strides=strides) bounds_out_flat = bounds_in_flat.apply_linear(None, w_lin, b_lin) # Unflatten the output bounds. output_shape = layer_utils.conv_output_shape(input_shape, w, padding, strides) return bounds_out_flat.apply_batch_reshape(None, output_shape) if __name__ == '__main__': tf.test.main() ================================================ FILE: interval_bound_propagation/tests/simplex_bounds_test.py ================================================ # coding=utf-8 # Copyright 2019 The Interval Bound Propagation Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for naive_bounds.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import parameterized import interval_bound_propagation as ibp from interval_bound_propagation import layer_utils import numpy as np import tensorflow.compat.v1 as tf class SimplexBoundsTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(('float32', tf.float32), ('float64', tf.float64)) def test_linear_simplex_bounds_shape(self, dtype): vocab_size = 103 batch_size = 11 input_size = 7 output_size = 5 w = tf.placeholder(dtype=dtype, shape=(input_size, output_size)) b = tf.placeholder(dtype=dtype, shape=(output_size,)) embedding = tf.placeholder(dtype=dtype, shape=(vocab_size, input_size)) centres = tf.placeholder(dtype=dtype, shape=(batch_size, input_size)) r = .2 bounds_in = ibp.SimplexBounds(embedding, centres, r) bounds_out = bounds_in.apply_linear(None, w, b) lb_out, ub_out = bounds_out.lower, bounds_out.upper self.assertEqual(dtype, lb_out.dtype) self.assertEqual(dtype, ub_out.dtype) self.assertEqual((batch_size, output_size), lb_out.shape) self.assertEqual((batch_size, output_size), ub_out.shape) @parameterized.named_parameters(('float32', tf.float32, 1.e-6), ('float64', tf.float64, 1.e-8)) def test_linear_bounds_on_embedding_layer(self, dtype, tol): w = tf.constant([[1.0, 2.0, 3.0], [4.0, -5.0, 6.0]], dtype=dtype) b = tf.constant([0.01, -0.02, 0.03], dtype=dtype) embedding = tf.constant([[0.0, 0.0], [10.0, 10.0], [0.0, -20.0]], dtype=dtype) centres = tf.constant([[7.0, 6.0]], dtype=dtype) r = .1 # Simplex vertices: [6.3, 5.4], [7.3, 6.4], and [6.3, 3.4]. # They map to: [27.91, -14.42, 51.33], [32.91, -17.42, 60.33], # and [19.91, -4.42, 39.33]. bounds_in = ibp.SimplexBounds(embedding, centres, r) bounds_out = bounds_in.apply_linear(None, w, b) lb_out, ub_out = bounds_out.lower, bounds_out.upper lb_out_exp = np.array([[19.91, -17.42, 39.33]]) ub_out_exp = np.array([[32.91, -4.42, 60.33]]) with self.test_session() as session: lb_out_act, ub_out_act = session.run((lb_out, ub_out)) self.assertAllClose(lb_out_exp, lb_out_act, atol=tol, rtol=tol) self.assertAllClose(ub_out_exp, ub_out_act, atol=tol, rtol=tol) @parameterized.named_parameters(('float32', tf.float32), ('float64', tf.float64)) def test_conv1d_simplex_bounds_shape(self, dtype): num_vertices = 41 batch_size = 11 input_length = 13 kernel_length = 5 input_channels = 3 output_channels = 2 padding = 'VALID' strides = (2,) # Expected output dimensions, based on convolution settings. output_length = 5 w = tf.placeholder(dtype=dtype, shape=( kernel_length, input_channels, output_channels)) b = tf.placeholder(dtype=dtype, shape=(output_channels,)) vertices = tf.placeholder(dtype=dtype, shape=( batch_size, num_vertices, input_length, input_channels)) centres = tf.placeholder(dtype=dtype, shape=( batch_size, input_length, input_channels)) r = .2 bounds_in = ibp.SimplexBounds(vertices, centres, r) bounds_out = bounds_in.apply_conv1d(None, w, b, padding, strides) lb_out, ub_out = bounds_out.lower, bounds_out.upper self.assertEqual(dtype, lb_out.dtype) self.assertEqual(dtype, ub_out.dtype) self.assertEqual((batch_size, output_length, output_channels), lb_out.shape) self.assertEqual((batch_size, output_length, output_channels), ub_out.shape) @parameterized.named_parameters(('float32', tf.float32, 2.e-6), ('float64', tf.float64, 1.e-8)) def test_conv1d_simplex_bounds(self, dtype, tol): num_vertices = 37 batch_size = 53 input_length = 17 kernel_length = 7 input_channels = 3 output_channels = 2 padding = 'VALID' strides = (2,) w = tf.random_normal(dtype=dtype, shape=( kernel_length, input_channels, output_channels)) b = tf.random_normal(dtype=dtype, shape=(output_channels,)) vertices = tf.random_normal(dtype=dtype, shape=( batch_size, num_vertices, input_length, input_channels)) centres = tf.random_normal(dtype=dtype, shape=( batch_size, input_length, input_channels)) r = .2 bounds_in = ibp.SimplexBounds(vertices, centres, r) bounds_out = bounds_in.apply_conv1d(None, w, b, padding, strides[0]) lb_out, ub_out = bounds_out.lower, bounds_out.upper # Compare against equivalent linear layer. bounds_out_lin = _materialised_conv_simplex_bounds( w, b, padding, strides, bounds_in) lb_out_lin, ub_out_lin = bounds_out_lin.lower, bounds_out_lin.upper with self.test_session() as session: (lb_out_val, ub_out_val, lb_out_lin_val, ub_out_lin_val) = session.run((lb_out, ub_out, lb_out_lin, ub_out_lin)) self.assertAllClose(lb_out_val, lb_out_lin_val, atol=tol, rtol=tol) self.assertAllClose(ub_out_val, ub_out_lin_val, atol=tol, rtol=tol) def _materialised_conv_simplex_bounds(w, b, padding, strides, bounds_in): """Calculates naive bounds on output of an N-D convolution layer. The calculation is performed by first materialising the convolution as a (sparse) fully-connected linear layer. Doing so will affect performance, but may be useful for investigating numerical stability issues. The layer inputs and the vertices are assumed to be (N-D) sequences in an embedding space. The input domain is taken to be the simplex of perturbations of the centres (true inputs) towards the given vertices. Specifically, the input domain is the convex hull of this set of vertices:: { (1-r)*centres + r*vertices[j] : j=1.8.0'], 'tensorflow with gpu': ['tensorflow-gpu>=1.8.0'], 'sonnet': ['dm-sonnet>=1.26'], 'sonnet with gpu': ['dm-sonnet-gpu>=1.26'], } def ibp_test_suite(): test_loader = unittest.TestLoader() test_suite = test_loader.discover('interval_bound_propagation/tests', pattern='*_test.py') return test_suite setup( name='interval_bound_propagation', version='1.1', description='A library to train verifiably robust neural networks.', url='https://github.com/deepmind/interval_bound_propagation', author='DeepMind', author_email='no-reply@google.com', # Contained modules and scripts. packages=find_packages(), install_requires=REQUIRED_PACKAGES, extras_require=EXTRA_PACKAGES, platforms=['any'], license='Apache 2.0', test_suite='setup.ibp_test_suite', )