Repository: KaidiXu/auto_LiRPA Branch: master Commit: ca767f1d8c0a Files: 249 Total size: 1.5 MB Directory structure: gitextract_yr8n0etx/ ├── .github/ │ └── ISSUE_TEMPLATE/ │ └── bug_report.md ├── .gitignore ├── .readthedocs.yaml ├── CONTRIBUTORS ├── LICENSE ├── README.md ├── auto_LiRPA/ │ ├── __init__.py │ ├── backward_bound.py │ ├── beta_crown.py │ ├── bound_general.py │ ├── bound_multi_gpu.py │ ├── bound_op_map.py │ ├── bound_ops.py │ ├── bounded_tensor.py │ ├── concretize_bounds.py │ ├── concretize_func.py │ ├── cuda/ │ │ ├── cuda_kernels.cu │ │ └── cuda_utils.cpp │ ├── cuda_utils.py │ ├── edit_graph.py │ ├── eps_scheduler.py │ ├── forward_bound.py │ ├── interval_bound.py │ ├── jacobian.py │ ├── linear_bound.py │ ├── operators/ │ │ ├── __init__.py │ │ ├── activation_base.py │ │ ├── activations.py │ │ ├── add_sub.py │ │ ├── base.py │ │ ├── bivariate.py │ │ ├── clampmult.py │ │ ├── constant.py │ │ ├── convex_concave.py │ │ ├── convolution.py │ │ ├── cut_ops.py │ │ ├── dropout.py │ │ ├── dtype.py │ │ ├── gelu.py │ │ ├── indexing.py │ │ ├── jacobian.py │ │ ├── leaf.py │ │ ├── linear.py │ │ ├── logical.py │ │ ├── minmax.py │ │ ├── normalization.py │ │ ├── pooling.py │ │ ├── reduce.py │ │ ├── relu.py │ │ ├── reshape.py │ │ ├── resize.py │ │ ├── rnn.py │ │ ├── s_shaped.py │ │ ├── shape.py │ │ ├── slice_concat.py │ │ ├── softmax.py │ │ ├── solver_utils.py │ │ ├── tile.py │ │ └── trigonometric.py │ ├── opt_pruner.py │ ├── optimize_graph.py │ ├── optimized_bounds.py │ ├── output_constraints.py │ ├── parse_graph.py │ ├── patches.py │ ├── perturbations.py │ ├── solver_module.py │ ├── tools.py │ ├── utils.py │ └── wrapper.py ├── doc/ │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── api.rst │ ├── conf.py │ ├── index.rst │ └── process.py ├── examples/ │ ├── .gitignore │ ├── __init__.py │ ├── language/ │ │ ├── .gitignore │ │ ├── Transformer/ │ │ │ ├── Transformer.py │ │ │ ├── __init__.py │ │ │ ├── modeling.py │ │ │ └── utils.py │ │ ├── data_utils.py │ │ ├── language_utils.py │ │ ├── lstm.py │ │ ├── oracle.py │ │ ├── preprocess/ │ │ │ ├── pre_compute_lm_scores.py │ │ │ └── preprocess_sst.py │ │ └── train.py │ ├── sequence/ │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── data_utils.py │ │ ├── lstm.py │ │ └── train.py │ ├── simple/ │ │ ├── invprop.py │ │ ├── lp_full.py │ │ ├── mip_lp_solver.py │ │ ├── models/ │ │ │ └── spectral_NOR_MLP_B.pth │ │ └── toy.py │ └── vision/ │ ├── .gitignore │ ├── bound_option.py │ ├── cifar_training.py │ ├── custom_op.py │ ├── data/ │ │ ├── .gitignore │ │ ├── ImageNet64/ │ │ │ └── imagenet_data_loader.py │ │ └── tinyImageNet/ │ │ ├── .gitignore │ │ └── tinyimagenet_download.sh │ ├── datasets.py │ ├── efficient_convolution.py │ ├── imagenet_training.py │ ├── jacobian.py │ ├── models/ │ │ ├── __init__.py │ │ ├── densenet.py │ │ ├── densenet_imagenet.py │ │ ├── densenet_no_bn.py │ │ ├── feedforward.py │ │ ├── mobilenet.py │ │ ├── resnet.py │ │ ├── resnet18.py │ │ ├── resnext.py │ │ ├── resnext_imagenet64.py │ │ ├── vnncomp_resnet.py │ │ ├── wide_resnet_cifar.py │ │ └── wide_resnet_imagenet64.py │ ├── pretrained/ │ │ ├── cifar_2c2f.pth │ │ ├── kw_mnist.pth │ │ ├── mnist_a_adv.pth │ │ ├── mnist_cnn_small.pth │ │ ├── mnist_fc_3layer.pth │ │ └── test_min_max.pth │ ├── save_intermediate_bound.py │ ├── simple_training.py │ ├── simple_verification.py │ ├── tinyimagenet_training.py │ ├── verify_two_node.py │ └── weight_perturbation_training.py ├── setup.py └── tests/ ├── .gitignore ├── data/ │ ├── .gitignore │ ├── avgpool_test_data │ ├── beta_crown_test_data │ ├── bound_ops_data │ ├── ckpt_lstm │ ├── ckpt_transformer │ ├── constant_test_data │ ├── conv1d_test_data_3-0-2 │ ├── conv1d_test_data_3-0-3 │ ├── conv1d_test_data_3-1-2 │ ├── conv1d_test_data_3-1-3 │ ├── conv1d_test_data_4-0-2 │ ├── conv1d_test_data_4-0-3 │ ├── conv1d_test_data_4-1-2 │ ├── conv1d_test_data_4-1-3 │ ├── distinct_patches_test_data │ ├── invprop/ │ │ ├── ood.onnx │ │ ├── ood_reference │ │ └── simple_reference │ ├── jacobian_test_data │ ├── language_test_data │ ├── maxpool_test_data_3-0-3-0 │ ├── maxpool_test_data_3-0-3-1 │ ├── maxpool_test_data_3-1-3-0 │ ├── maxpool_test_data_3-1-3-1 │ ├── maxpool_test_data_4-0-4-0 │ ├── maxpool_test_data_4-0-4-1 │ ├── maxpool_test_data_4-1-4-0 │ ├── maxpool_test_data_4-1-4-1 │ ├── min_max_test_data │ ├── rectangle_patches_test_data │ ├── resnet_patches_test_data │ ├── s_shape_test_data │ ├── test_constrained_concretize │ ├── test_general_shape_data │ ├── test_perturbation_data │ ├── test_save_data │ ├── vision_clip_test_data │ ├── vision_test_data │ └── weight_perturbation_test_data ├── data_64/ │ ├── avgpool_test_data │ ├── bound_ops_data │ ├── constant_test_data │ ├── conv1d_test_data_3-0-2 │ ├── conv1d_test_data_3-0-3 │ ├── conv1d_test_data_3-1-2 │ ├── conv1d_test_data_3-1-3 │ ├── conv1d_test_data_4-0-2 │ ├── conv1d_test_data_4-0-3 │ ├── conv1d_test_data_4-1-2 │ ├── conv1d_test_data_4-1-3 │ ├── general_shape_data │ ├── invprop/ │ │ ├── ood_reference │ │ └── simple_reference │ ├── jacobian_test_data │ ├── maxpool_test_data_3-0-3-0 │ ├── maxpool_test_data_3-0-3-1 │ ├── maxpool_test_data_3-1-3-0 │ ├── maxpool_test_data_3-1-3-1 │ ├── maxpool_test_data_4-0-4-0 │ ├── maxpool_test_data_4-0-4-1 │ ├── maxpool_test_data_4-1-4-0 │ ├── maxpool_test_data_4-1-4-1 │ ├── min_max_test_data │ ├── rectangle_patches_test_data │ ├── resnet_patches_test_data │ ├── s_shape_test_data │ ├── test_constrained_concretize │ ├── test_general_shape_data │ ├── test_save_data │ ├── vision_clip_test_data │ ├── vision_test_data │ └── weight_perturbation_test_data ├── test_1d_activation.py ├── test_2d_activation.py ├── test_avgpool.py ├── test_bound_ops.py ├── test_branching_heuristics.py ├── test_clip_domains.py ├── test_constant.py ├── test_constrained_concretize.py ├── test_conv.py ├── test_conv1d.py ├── test_distinct_patches.py ├── test_examples.py ├── test_examples_ci.py ├── test_general_nonlinear.py ├── test_general_shape.py ├── test_identity.py ├── test_invprop.py ├── test_jacobian.py ├── test_language_models.py ├── test_linear_cnn_model.py ├── test_linear_model.py ├── test_maxpool.py ├── test_min_max.py ├── test_perturbation.py ├── test_rectangle_patches.py ├── test_resnet_patches.py ├── test_s_shaped.py ├── test_save_intermediate.py ├── test_simple_verification.py ├── test_state_dict_name.py ├── test_tensor_storage.py ├── test_upsample.py ├── test_vision_models.py ├── test_vision_models_hardtanh.py ├── test_weight_perturbation.py └── testcase.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Please provide us with the following to receive timely help: 1. A minimum example to reproduce the bug. Keep your code as short as possible but still directly runnable. 2. Model files, especially when the bug is only triggered on specific models. 3. **Complete** outputs of the program when the bug is triggered. Please do **not** just include the last few lines. If it's very long, you can use [PasteBin](https://pastebin.com/) or upload to a file-sharing service. 4. Detailed instructions to reproduce the problem. If you changed part of our tool, please rebase your changes to main branch and push your changes to a fork so we can investigate easier. Without the above information, you might not be able to receive timely help from us. **System configuration:** - OS: [e.g. Ubuntu 22.04. Windows and MacOS are not supported.] - Python version: [e.g., Python 3.8] - Pytorch Version: [e.g., PyTorch 1.12] - Hardware: [e.g., RTX 4090] - Have you tried to reproduce the problem in a cleanly created conda/virtualenv environment using official installation instructions and the latest code on the main branch?: [Yes/No] **Screenshots** If applicable, add screenshots to help explain your problem. **Additional context** Add any other context about the problem here. ================================================ FILE: .gitignore ================================================ tmp build __pycache__ *.egg-info dist *.swp *.swo *.log .trace_graph Verified_ret*.npy Verified-acc*.npy vnn-comp_*.npz *.tar.gz verifier_log_* .vscode/ *.pt .idea *.so release *.compiled .DS_Store *.out *.txt release release_abcrown cachier out.csv results.csv ================================================ FILE: .readthedocs.yaml ================================================ # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the version of Python and other tools you might need build: os: ubuntu-20.04 tools: python: "3.11" # Build documentation in the docs/ directory with Sphinx sphinx: configuration: doc/conf.py # Optionally declare the Python requirements required to build your docs python: install: - method: pip path: . - requirements: doc/requirements.txt ================================================ FILE: CONTRIBUTORS ================================================ Team leaders: * Faculty: Huan Zhang (huan@huan-zhang.com), UIUC * Student: Xiangru Zhong (xiangru4@illinois.edu), UIUC Current developers (* indicates members of VNN-COMP 2025 team): * \*Duo Zhou (duozhou2@illinois.edu), UIUC * \*Keyi Shen (keyis2@illinois.edu), UIUC (graduated, now at Georgia Tech) * \*Hesun Chen (hesunc2@illinois.edu), UIUC * \*Haoyu Li (haoyuli5@illinois.edu), UIUC * \*Ruize Gao (ruizeg2@illinois.edu), UIUC * \*Hao Cheng (haoc539@illinois.edu), UIUC * Zhouxing Shi (zhouxingshichn@gmail.com), UCLA/UC Riverside * Lei Huang (leih5@illinois.edu), UIUC * Taobo Liao (taobol2@illinois.edu), UIUC * Jorge Chavez (jorgejc2@illinois.edu), UIUC Past developers: * Hongji Xu (hx84@duke.edu), Duke University (intern with Prof. Huan Zhang) * Christopher Brix (brix@cs.rwth-aachen.de), RWTH Aachen University * Hao Chen (haoc8@illinois.edu), UIUC * Keyu Lu (keyulu2@illinois.edu), UIUC * Kaidi Xu (kx46@drexel.edu), Drexel University * Sanil Chawla (schawla7@illinois.edu), UIUC * Linyi Li (linyi2@illinois.edu), UIUC * Zhuolin Yang (zhuolin5@illinois.edu), UIUC * Zhuowen Yuan (realzhuowen@gmail.com), UIUC * Qirui Jin (qiruijin@umich.edu), University of Michigan * Shiqi Wang (sw3215@columbia.edu), Columbia University * Yihan Wang (yihanwang@ucla.edu), UCLA * Jinqi (Kathryn) Chen (jinqic@cs.cmu.edu), CMU auto_LiRPA is currently supported in part by the National Science Foundation (NSF; award 2331967, 2525287), the AI2050 program at Schmidt Science, the Virtual Institute for Scientific Software (VISS) at Georgia Tech, the University Research Program at Toyota Research Institute (TRI), and a Mathworks research award. The team acknowledges the financial and advisory support from Prof. Zico Kolter (zkolter@cs.cmu.edu), Prof. Cho-Jui Hsieh (chohsieh@cs.ucla.edu), Prof. Suman Jana (suman@cs.columbia.edu), Prof. Bo Li (lbo@illinois.edu), and Prof. Xue Lin (xue.lin@northeastern.edu) during 2021 - 2023. ================================================ FILE: LICENSE ================================================ Copyright (C) 2021-2025 The α,β-CROWN Team See CONTRIBUTORS for the list of all contributors and their affiliations. Team leaders: Faculty: Huan Zhang (UIUC) Student: Xiangru Zhong (UIUC) Current developers: Duo Zhou (UIUC) Keyi Shen (UIUC/Georgia Tech) Hesun Chen (UIUC) Haoyu Li (UIUC) Ruize Gao (UIUC) Hao Cheng (UIUC) Zhouxing Shi (UCLA/UC Riverside) Lei Huang (UIUC) Taobo Liao (UIUC) Jorge Chavez (UIUC) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # auto_LiRPA: Automatic Linear Relaxation based Perturbation Analysis for Neural Networks [![Documentation Status](https://readthedocs.org/projects/auto-lirpa/badge/?version=latest)](https://auto-lirpa.readthedocs.io/en/latest/?badge=latest) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://PaperCode.cc/AutoLiRPA-Demo) [![Video Introduction](https://img.shields.io/badge/play-video-red.svg)](http://PaperCode.cc/AutoLiRPA-Video) [![BSD license](https://img.shields.io/badge/License-BSD-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)

## What's New? - [α,β-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git) (using `auto_LiRPA` as its core library) is the winner of [VNN-COMP 2025](https://sites.google.com/view/vnn2025) and is **ranked top-1** in all [scored benchmarks](https://github.com/VNN-COMP/vnncomp2025_results/blob/main/SCORING-SMALL-TOL/latex/main.pdf). (08/2025) - Bounding of computation graphs containing Jacobian operators now supports more nonlinear operators (e.g., ```tanh```, ```sigmoid```), enabling verification of [continuous-time Lyapunov stability](https://github.com/Verified-Intelligence/Two-Stage_Neural_Controller_Training). (12/2025) - [α,β-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git) (using `auto_LiRPA` as its core library) is the winner of [VNN-COMP 2024](https://sites.google.com/view/vnn2024). Our tool is **ranked top-1** in all benchmarks (including 12 [regular track](https://github.com/ChristopherBrix/vnncomp2024_results/blob/main/SCORING/latex/results_regular_track.pdf) and 9 [extended track](https://github.com/ChristopherBrix/vnncomp2024_results/blob/main/SCORING/latex/results_extended_track.pdf) benchmarks). (08/2024) - The [INVPROP algorithm](https://arxiv.org/pdf/2302.01404.pdf) allows to compute overapproximationsw of preimages (the set of inputs of an NN generating a given output set) and tighten bounds using output constraints. (03/2024) - Branch-and-bound support for non-ReLU and general nonlinearities ([GenBaB](https://arxiv.org/pdf/2405.21063)) with optimizable bounds (α-CROWN) for new nonlinear functions (sin, cos, GeLU). We achieve significant improvements on verifying neural networks with non-ReLU nonlinearities such as Transformers, LSTM, and [ML4ACOPF](https://github.com/AI4OPT/ml4acopf_benchmark). (09/2023) - [α,β-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git) ([alpha-beta-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git)) (using `auto_LiRPA` as its core library) **won** [VNN-COMP 2023](https://sites.google.com/view/vnn2023). (08/2023) - Bound computation for higher-order computational graphs to support bounding Jacobian, Jacobian-vector products, and [local Lipschitz constants](https://arxiv.org/abs/2210.07394). (11/2022) - Our neural network verification tool [α,β-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git) ([alpha-beta-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git)) (using `auto_LiRPA` as its core library) **won** [VNN-COMP 2022](https://sites.google.com/view/vnn2022). Our library supports the large CIFAR100, TinyImageNet and ImageNet models in VNN-COMP 2022. (09/2022) - Implementation of **general cutting planes** ([GCP-CROWN](https://arxiv.org/pdf/2208.05740.pdf)), support of more activation functions and improved performance and scalability. (09/2022) - Our neural network verification tool [α,β-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git) ([alpha-beta-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git)) **won** [VNN-COMP 2021](https://sites.google.com/view/vnn2021) **with the highest total score**, outperforming 11 SOTA verifiers. α,β-CROWN uses the `auto_LiRPA` library as its core bound computation library. (09/2021) - [Optimized CROWN/LiRPA](https://arxiv.org/pdf/2011.13824.pdf) bound (α-CROWN) for ReLU, **sigmoid**, **tanh**, and **maxpool** activation functions, which can significantly outperform regular CROWN bounds. See [simple_verification.py](examples/vision/simple_verification.py#L59) for an example. (07/31/2021) - Handle split constraints for ReLU neurons ([β-CROWN](https://arxiv.org/pdf/2103.06624.pdf)) for complete verifiers. (07/31/2021) - A memory efficient GPU implementation of backward (CROWN) bounds for convolutional layers. (10/31/2020) - Certified defense models for downscaled ImageNet, TinyImageNet, CIFAR-10, LSTM/Transformer. (08/20/2020) - Adding support to **complex vision models** including DenseNet, ResNeXt and WideResNet. (06/30/2020) - **Loss fusion**, a technique that reduces training cost of tight LiRPA bounds (e.g. CROWN-IBP) to the same asymptotic complexity of IBP, making LiRPA based certified defense scalable to large datasets (e.g., TinyImageNet, downscaled ImageNet). (06/30/2020) - **Multi-GPU** support to scale LiRPA based training to large models and datasets. (06/30/2020) - Initial release. (02/28/2020) ## Introduction `auto_LiRPA` is a library for automatically deriving and computing bounds with linear relaxation based perturbation analysis (LiRPA) (e.g. [CROWN](https://arxiv.org/pdf/1811.00866.pdf) and [DeepPoly](https://files.sri.inf.ethz.ch/website/papers/DeepPoly.pdf)) for neural networks, which is a useful tool for formal robustness verification. We generalize existing LiRPA algorithms for feed-forward neural networks to a graph algorithm on general computational graphs, defined by PyTorch. Additionally, our implementation is also automatically **differentiable**, allowing optimizing network parameters to shape the bounds into certain specifications (e.g., certified defense). You can find [a video ▶️ introduction here](http://PaperCode.cc/AutoLiRPA-Video). Our library supports the following algorithms: * Backward mode LiRPA bound propagation ([CROWN](https://arxiv.org/pdf/1811.00866.pdf)/[DeepPoly](https://files.sri.inf.ethz.ch/website/papers/DeepPoly.pdf)) * Backward mode LiRPA bound propagation with optimized bounds ([α-CROWN](https://arxiv.org/pdf/2011.13824.pdf)) * Backward mode LiRPA bound propagation with split constraints ([β-CROWN](https://arxiv.org/pdf/2103.06624.pdf) for ReLU, and [GenBaB](https://arxiv.org/pdf/2405.21063) for general nonlinear functions) * Generalized backward mode LiRPA bound propagation with general cutting plane constraints ([GCP-CROWN](https://arxiv.org/pdf/2208.05740.pdf)) * Backward mode LiRPA bound propagation with bounds tightened using output constraints ([INVPROP](https://arxiv.org/pdf/2302.01404.pdf)) * Generalized backward mode LiRPA bound propagation for higher-order computational graphs ([Shi et al., 2022](https://arxiv.org/abs/2210.07394)) * Forward mode LiRPA bound propagation ([Xu et al., 2020](https://arxiv.org/pdf/2002.12920)) * Forward mode LiRPA bound propagation with optimized bounds (similar to [α-CROWN](https://arxiv.org/pdf/2011.13824.pdf)) * Interval bound propagation ([IBP](https://arxiv.org/pdf/1810.12715.pdf)) * Hybrid approaches, e.g., Forward+Backward, IBP+Backward ([CROWN-IBP](https://arxiv.org/pdf/1906.06316.pdf)), [α,β-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git) ([alpha-beta-CROWN](https://github.com/Verified-Intelligence/alpha-beta-CROWN.git)) * MIP/LP formulation of neural networks Our library allows automatic bound derivation and computation for general computational graphs, in a similar manner that gradients are obtained in modern deep learning frameworks -- users only define the computation in a forward pass, and `auto_LiRPA` traverses through the computational graph and derives bounds for any nodes on the graph. With `auto_LiRPA` we free users from deriving and implementing LiPRA for most common tasks, and they can simply apply LiPRA as a tool for their own applications. This is especially useful for users who are not experts of LiRPA and cannot derive these bounds manually (LiRPA is significantly more complicated than backpropagation). ## Technical Background in 1 Minute Deep learning frameworks such as PyTorch represent neural networks (NN) as a computational graph, where each mathematical operation is a node and edges define the flow of computation:

Normally, the inputs of a computation graph (which defines a NN) are data and model weights, and PyTorch goes through the graph and produces model prediction (a bunch of numbers):

Our `auto_LiRPA` library conducts perturbation analysis on a computational graph, where the input data and model weights are defined within some user-defined ranges. We get guaranteed output ranges (bounds):

## Installation Python 3.11+ and PyTorch 2.0+ are required. It is highly recommended to have a pre-installed PyTorch that matches your system and our version requirement (see [PyTorch Get Started](https://pytorch.org/get-started)). Then you can install `auto_LiRPA` via: ```bash git clone https://github.com/Verified-Intelligence/auto_LiRPA cd auto_LiRPA pip install . ``` If you intend to modify this library, use `pip install -e .` instead. ## Quick Start First define your computation as a `nn.Module` and wrap it using `auto_LiRPA.BoundedModule()`. Then, you can call the `compute_bounds` function to obtain certified lower and upper bounds under input perturbations: ```python from auto_LiRPA import BoundedModule, BoundedTensor, PerturbationLpNorm # Define computation as a nn.Module. class MyModel(nn.Module): def forward(self, x): # Define your computation here. model = MyModel() my_input = load_a_batch_of_data() # Wrap the model with auto_LiRPA. model = BoundedModule(model, my_input) # Define perturbation. Here we add Linf perturbation to input data. ptb = PerturbationLpNorm(norm=np.inf, eps=0.1) # Make the input a BoundedTensor with the pre-defined perturbation. my_input = BoundedTensor(my_input, ptb) # Regular forward propagation using BoundedTensor works as usual. prediction = model(my_input) # Compute LiRPA bounds using the backward mode bound propagation (CROWN). lb, ub = model.compute_bounds(x=(my_input,), method="backward") ``` Checkout [examples/vision/simple_verification.py](examples/vision/simple_verification.py) for a complete but very basic example. We also provide a [Google Colab Demo](http://PaperCode.cc/AutoLiRPA-Demo) including an example of computing verification bounds for a 18-layer ResNet model on CIFAR-10 dataset. Once the ResNet model is defined as usual in Pytorch, obtaining provable output bounds is as easy as obtaining gradients through autodiff. Bounds are efficiently computed on GPUs. ## More Working Examples We provide [a wide range of examples](doc/src/examples.md) of using `auto_LiRPA`: * [Basic Bound Computation on a Toy Neural Network (simplest example)](examples/simple/toy.py) * [Basic Bound Computation with **Robustness Verification** of Neural Networks as an example](doc/src/examples.md#basic-bound-computation-and-robustness-verification-of-neural-networks) * [MIP/LP Formulation of Neural Networks](examples/simple/mip_lp_solver.py) * [Basic **Certified Adversarial Defense** Training](doc/src/examples.md#basic-certified-adversarial-defense-training) * [Large-scale Certified Defense Training on **ImageNet**](doc/src/examples.md#certified-adversarial-defense-on-downscaled-imagenet-and-tinyimagenet-with-loss-fusion) * [Certified Adversarial Defense Training on Sequence Data with **LSTM**](doc/src/examples.md#certified-adversarial-defense-training-for-lstm-on-mnist) * [Certifiably Robust Language Classifier using **Transformers**](doc/src/examples.md#certifiably-robust-language-classifier-with-transformer-and-lstm) * [Certified Robustness against **Model Weight Perturbations**](doc/src/examples.md#certified-robustness-against-model-weight-perturbations-and-certified-defense) * [Bounding **Jacobian** and **local Lipschitz constants**](examples/vision/jacobian.py) * [Compute an Overapproximate of Neural Network **Preimage**](examples/simple/invprop.py) `auto_LiRPA` has also been used in the following works: * [**α,β-CROWN for complete neural network verification**](https://github.com/Verified-Intelligence/alpha-beta-CROWN) * [**Fast certified robust training**](https://github.com/shizhouxing/Fast-Certified-Robust-Training) * [**Computing local Lipschitz constants**](https://github.com/shizhouxing/Local-Lipschitz-Constants) ## Full Documentations For more documentations, please refer to: * [Documentation homepage](https://auto-lirpa.readthedocs.io) * [API documentation](https://auto-lirpa.readthedocs.io/en/latest/api.html) * [Adding custom operators](https://auto-lirpa.readthedocs.io/en/latest/custom_op.html) * [Guide](https://auto-lirpa.readthedocs.io/en/latest/paper.html) for reproducing [our NeurIPS 2020 paper](https://arxiv.org/abs/2002.12920) ## Publications Please kindly cite our papers if you use the `auto_LiRPA` library. Full [BibTeX entries](doc/src/examples.md#bibtex-entries) can be found [here](doc/src/examples.md#bibtex-entries). The general LiRPA based bound propagation algorithm was originally proposed in our paper: * [Automatic Perturbation Analysis for Scalable Certified Robustness and Beyond](https://arxiv.org/pdf/2002.12920). NeurIPS 2020. Kaidi Xu\*, Zhouxing Shi\*, Huan Zhang\*, Yihan Wang, Kai-Wei Chang, Minlie Huang, Bhavya Kailkhura, Xue Lin, Cho-Jui Hsieh (\* Equal contribution) The `auto_LiRPA` library is further extended to support: * Optimized bounds (α-CROWN): [Fast and Complete: Enabling Complete Neural Network Verification with Rapid and Massively Parallel Incomplete Verifiers](https://arxiv.org/pdf/2011.13824.pdf). ICLR 2021. Kaidi Xu\*, Huan Zhang\*, Shiqi Wang, Yihan Wang, Suman Jana, Xue Lin and Cho-Jui Hsieh (\* Equal contribution). * Split constraints (β-CROWN): [Beta-CROWN: Efficient Bound Propagation with Per-neuron Split Constraints for Complete and Incomplete Neural Network Verification](https://arxiv.org/pdf/2103.06624.pdf). NeurIPS 2021. Shiqi Wang\*, Huan Zhang\*, Kaidi Xu\*, Suman Jana, Xue Lin, Cho-Jui Hsieh and Zico Kolter (\* Equal contribution). * General constraints (GCP-CROWN): [GCP-CROWN: General Cutting Planes for Bound-Propagation-Based Neural Network Verification](https://arxiv.org/abs/2208.05740). Huan Zhang\*, Shiqi Wang\*, Kaidi Xu\*, Linyi Li, Bo Li, Suman Jana, Cho-Jui Hsieh and Zico Kolter (\* Equal contribution). * Higher-order computational graphs (Lipschitz constants and Jacobian): [Efficiently Computing Local Lipschitz Constants of Neural Networks via Bound Propagation](https://arxiv.org/abs/2210.07394). NeurIPS 2022. Zhouxing Shi, Yihan Wang, Huan Zhang, Zico Kolter, Cho-Jui Hsieh. * Branch-and-bound for non-ReLU and general nonlinear functions (GenBaB): [Neural Network Verification with Branch-and-Bound for General Nonlinearities](https://arxiv.org/pdf/2405.21063). TACAS 2025. Zhouxing Shi\*, Qirui Jin\*, Zico Kolter, Suman Jana, Cho-Jui Hsieh, Huan Zhang (\* Equal contribution). * Tightening of bounds and preimage computation using the INVPROP algorithm: [Provably Bounding Neural Network Preimages](https://arxiv.org/pdf/2302.01404.pdf). NeurIPS 2023. Suhas Kotha\*, Christopher Brix\*, Zico Kolter, Krishnamurthy (Dj) Dvijotham\*\*, Huan Zhang\*\* (\* Equal contribution; \*\* Equal advising). Certified training (verification-aware training by optimizing bounds) using `auto_LiRPA` is improved with: * Much shorter warmup schedule and faster training: [Fast Certified Robust Training with Short Warmup](https://arxiv.org/pdf/2103.17268.pdf). NeurIPS 2021. Zhouxing Shi\*, Yihan Wang\*, Huan Zhang, Jinfeng Yi and Cho-Jui Hsieh (\* Equal contribution). * Training-time branch-and-bound: [Certified Training with Branch-and-Bound: A Case Study on Lyapunov-stable Neural Control](https://arxiv.org/abs/2411.18235). Zhouxing Shi, Cho-Jui Hsieh, and Huan Zhang. ## Developers and Copyright Team leaders: * Faculty: Huan Zhang (huan@huan-zhang.com), UIUC * Student: Xiangru Zhong (xiangru4@illinois.edu), UIUC Current developers (* indicates members of VNN-COMP 2025 team): * \*Duo Zhou (duozhou2@illinois.edu), UIUC * \*Keyi Shen (keyis2@illinois.edu), UIUC (graduated, now at Georgia Tech) * \*Hesun Chen (hesunc2@illinois.edu), UIUC * \*Haoyu Li (haoyuli5@illinois.edu), UIUC * \*Ruize Gao (ruizeg2@illinois.edu), UIUC * \*Hao Cheng (haoc539@illinois.edu), UIUC * Zhouxing Shi (zhouxingshichn@gmail.com), UCLA/UC Riverside * Lei Huang (leih5@illinois.edu), UIUC * Taobo Liao (taobol2@illinois.edu), UIUC * Jorge Chavez (jorgejc2@illinois.edu), UIUC Past developers: * Hongji Xu (hx84@duke.edu), Duke University (intern with Prof. Huan Zhang) * Christopher Brix (brix@cs.rwth-aachen.de), RWTH Aachen University * Hao Chen (haoc8@illinois.edu), UIUC * Keyu Lu (keyulu2@illinois.edu), UIUC * Kaidi Xu (kx46@drexel.edu), Drexel University * Sanil Chawla (schawla7@illinois.edu), UIUC * Linyi Li (linyi2@illinois.edu), UIUC * Zhuolin Yang (zhuolin5@illinois.edu), UIUC * Zhuowen Yuan (realzhuowen@gmail.com), UIUC * Qirui Jin (qiruijin@umich.edu), University of Michigan * Shiqi Wang (sw3215@columbia.edu), Columbia University * Yihan Wang (yihanwang@ucla.edu), UCLA * Jinqi (Kathryn) Chen (jinqic@cs.cmu.edu), CMU `auto_LiRPA` is currently supported in part by the National Science Foundation (NSF; award 2331967, 2525287), the AI2050 program at Schmidt Science, the Virtual Institute for Scientific Software (VISS) at Georgia Tech, the University Research Program at Toyota Research Institute (TRI), and a Mathworks research award. We thank the [commits](https://github.com/Verified-Intelligence/auto_LiRPA/commits) and [pull requests](https://github.com/Verified-Intelligence/auto_LiRPA/pulls) from community contributors. Our library is released under the BSD 3-Clause license. ================================================ FILE: auto_LiRPA/__init__.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .bound_general import BoundedModule from .bound_multi_gpu import BoundDataParallel from .bounded_tensor import BoundedTensor, BoundedParameter from .perturbations import PerturbationLpNorm, PerturbationSynonym, PerturbationLinear from .wrapper import CrossEntropyWrapper, CrossEntropyWrapperMultiInput from .bound_op_map import register_custom_op, unregister_custom_op __version__ = '0.7.0' ================================================ FILE: auto_LiRPA/backward_bound.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import os import torch from torch import Tensor from collections import deque from tqdm import tqdm from .patches import Patches from .utils import * from .bound_ops import * import warnings from typing import TYPE_CHECKING, List if TYPE_CHECKING: from .bound_general import BoundedModule def batched_backward(self: 'BoundedModule', node, C, unstable_idx, batch_size, bound_lower=True, bound_upper=True, return_A=None): if return_A is None: return_A = self.return_A output_shape = node.output_shape[1:] dim = int(prod(output_shape)) if unstable_idx is None: unstable_idx = torch.arange(dim, device=self.device) dense = True else: dense = False unstable_size = get_unstable_size(unstable_idx) print(f'Batched CROWN: node {node}, unstable size {unstable_size}') crown_batch_size = self.bound_opts['crown_batch_size'] auto_batch_size = AutoBatchSize(self.bound_opts['crown_batch_size'], self.device, vram_ratio=self.bound_opts['batched_crown_max_vram_ratio']) ret = [] ret_A = {} # if return_A, we will store A here i = 0 torch.cuda.empty_cache() with tqdm(total=unstable_size) as pbar: while i < unstable_size: crown_batch_size = auto_batch_size.batch_size if isinstance(unstable_idx, tuple): unstable_idx_batch = tuple( u[i : i + crown_batch_size] for u in unstable_idx ) unstable_size_batch = len(unstable_idx_batch[0]) else: unstable_idx_batch = unstable_idx[i : i + crown_batch_size] unstable_size_batch = len(unstable_idx_batch) auto_batch_size.record_actual_batch_size(unstable_size_batch) if node.patches_start and node.mode == "patches": assert C is None or C.type == 'Patches' C_batch = Patches(shape=[ unstable_size_batch, batch_size, *node.output_shape[1:-2], 1, 1], identity=1, unstable_idx=unstable_idx_batch, output_shape=[batch_size, *node.output_shape[1:]]) elif C.type == 'OneHot': assert isinstance(node, (BoundLinear, BoundMatMul)) C_batch = OneHotC( [batch_size, unstable_size_batch, *node.output_shape[1:]], self.device, unstable_idx_batch, None) else: assert C is None or C.type == 'eye' C_batch = torch.zeros([1, unstable_size_batch, dim], device=self.device) C_batch[0, torch.arange(unstable_size_batch), unstable_idx_batch] = 1.0 C_batch = C_batch.expand(batch_size, -1, -1).view( batch_size, unstable_size_batch, *output_shape) # overwrite return_A options to run backward general ori_return_A_option = self.return_A self.return_A = return_A batch_ret = self.backward_general( node, C_batch, bound_lower=bound_lower, bound_upper=bound_upper, average_A=False, need_A_only=False, unstable_idx=unstable_idx_batch) ret.append(batch_ret[:2]) if len(batch_ret) > 2: # A found, we merge A batch_A = batch_ret[2] ret_A = merge_A(node, batch_A, ret_A) # restore return_A options self.return_A = ori_return_A_option pbar.update(unstable_size_batch) i += unstable_size_batch auto_batch_size.update() if bound_lower: lb = torch.cat([item[0].view(batch_size, -1) for item in ret], dim=1) if dense: # In this case, restore_sparse_bounds will not be called. # And thus we restore the shape here. lb = lb.reshape(batch_size, *output_shape) else: lb = None if bound_upper: ub = torch.cat([item[1].view(batch_size, -1) for item in ret], dim=1) if dense: # In this case, restore_sparse_bounds will not be called. # And thus we restore the shape here. ub = ub.reshape(batch_size, *output_shape) else: ub = None if return_A: return lb, ub, ret_A else: return lb, ub def backward_general( self: 'BoundedModule', bound_node, C, start_backpropagation_at_node = None, bound_lower=True, bound_upper=True, average_A=False, need_A_only=False, unstable_idx=None, update_mask=None, apply_output_constraints_to: Optional[List[str]] = None, initial_As: Optional[dict] = None, initial_lb: Optional[torch.tensor] = None, initial_ub: Optional[torch.tensor] = None, ): use_beta_crown = self.bound_opts['optimize_bound_args']['enable_beta_crown'] tighten_input_bounds = ( self.bound_opts['optimize_bound_args']['tighten_input_bounds'] ) if self.invprop_enabled(): self.invprop_init_infeasible_bounds(bound_node, C) if bound_node.are_output_constraints_activated_for_layer(apply_output_constraints_to): return self.backward_general_invprop( initial_As=initial_As, initial_lb=initial_lb, initial_ub=initial_ub, bound_node=bound_node, C=C, start_backpropagation_at_node=start_backpropagation_at_node, bound_lower=bound_lower, bound_upper=bound_upper, average_A=average_A, need_A_only=need_A_only, unstable_idx=unstable_idx, update_mask=update_mask ) roots = self.roots() if start_backpropagation_at_node is None: # When output constraints are used, backward_general_with_output_constraint() # adds additional layers at the end, performs the backpropagation through these, # and then calls backward_general() on the output layer. # In this case, the layer we start from (start_backpropagation_at_node) differs # from the layer that should be bounded (bound_node) # When output constraints are not used, the bounded node is the one where # backpropagation starts. start_backpropagation_at_node = bound_node if self.verbose: logger.debug(f'Bound backward from {start_backpropagation_at_node.__class__.__name__}({start_backpropagation_at_node.name}) ' f'to bound {bound_node.__class__.__name__}({bound_node.name})') if isinstance(C, BatchedCrownC): logger.debug(f' C: {C}') elif C is not None: logger.debug(f' C: shape {C.shape}, type {type(C)}') _print_time = bool(os.environ.get('AUTOLIRPA_PRINT_TIME', 0)) if isinstance(C, BatchedCrownC): # If C is a str, use batched CROWN. If batched CROWN is not intended to # be enabled, C must be a explicitly provided non-str object for this function. if need_A_only or average_A: raise ValueError( 'Batched CROWN is not compatible with ' f'need_A_only={need_A_only}, average_A={average_A}') ret = self.batched_backward( bound_node, C, unstable_idx, batch_size=roots[0].value.shape[0], bound_lower=bound_lower, bound_upper=bound_upper, ) bound_node.lower, bound_node.upper = ret[:2] return ret for n in self.nodes(): n.lA = n.uA = None degree_out = get_degrees(start_backpropagation_at_node) C, batch_size, output_dim, output_shape = self._preprocess_C(C, bound_node) if initial_As is None: start_backpropagation_at_node.lA = C if bound_lower else None start_backpropagation_at_node.uA = C if bound_upper else None else: for layer_name, (lA, uA) in initial_As.items(): self[layer_name].lA = lA self[layer_name].uA = uA assert start_backpropagation_at_node.lA is not None or start_backpropagation_at_node.uA is not None if initial_lb is None: lb = torch.tensor(0., device=self.device) else: lb = initial_lb if initial_ub is None: ub = torch.tensor(0., device=self.device) else: ub = initial_ub # Save intermediate layer A matrices when required. A_record = {} queue = deque([start_backpropagation_at_node]) while len(queue) > 0: l = queue.popleft() # backward from l if l.name in self.root_names: continue # if all the succeeds are done, then we can turn to this node in the # next iteration. for l_pre in l.inputs: degree_out[l_pre.name] -= 1 if degree_out[l_pre.name] == 0: queue.append(l_pre) # Initially, l.lA or l.uA will be set to C for this node. if l.lA is not None or l.uA is not None: if self.verbose: logger.debug(f' Bound backward to {l} (out shape {l.output_shape})') if l.lA is not None: logger.debug(' lA type %s shape %s', type(l.lA), list(l.lA.shape)) if l.uA is not None: logger.debug(' uA type %s shape %s', type(l.uA), list(l.uA.shape)) if _print_time: start_time = time.time() self.backward_from[l.name].append(bound_node) if not l.perturbed: if not hasattr(l, 'forward_value'): self.get_forward_value(l) lb, ub = add_constant_node(lb, ub, l) continue if l.zero_uA_mtx and l.zero_lA_mtx: # A matrices are all zero, no need to propagate. continue lA, uA = l.lA, l.uA if (l.name != start_backpropagation_at_node.name and use_beta_crown and getattr(l, 'sparse_betas', None)): lA, uA, lbias, ubias = self.beta_crown_backward_bound( l, lA, uA, start_node=start_backpropagation_at_node) lb = lb + lbias ub = ub + ubias if isinstance(l, BoundOptimizableActivation): # For other optimizable activation functions (TODO: unify with ReLU). if bound_node.name != self.final_node_name: start_shape = bound_node.output_shape[1:] else: start_shape = C.shape[0] l.preserve_mask = update_mask else: start_shape = None A, lower_b, upper_b = l.bound_backward( lA, uA, *l.inputs, start_node=bound_node, unstable_idx=unstable_idx, start_shape=start_shape) # After propagation through this node, we delete its lA, uA variables. if bound_node.name != self.final_name: del l.lA, l.uA if _print_time: torch.cuda.synchronize() time_elapsed = time.time() - start_time if time_elapsed > 5e-3: print(l, time_elapsed) if lb.ndim > 0 and type(lower_b) == Tensor and self.conv_mode == 'patches': lb, ub, lower_b, upper_b = check_patch_biases(lb, ub, lower_b, upper_b) lb = lb + lower_b ub = ub + upper_b if self.return_A and self.needed_A_dict and bound_node.name in self.needed_A_dict: # FIXME remove [0][0] and [0][1]? if len(self.needed_A_dict[bound_node.name]) == 0 or l.name in self.needed_A_dict[bound_node.name]: # A could be either patches (in this case we cannot transpose so directly return) # or matrix (in this case we transpose) A_record.update({ l.name: { "lA": ( A[0][0].detach() if isinstance(A[0][0], Patches) else A[0][0].transpose(0, 1).detach() ) if A[0][0] is not None else None, "uA": ( A[0][1].detach() if isinstance(A[0][1], Patches) else A[0][1].transpose(0, 1).detach() ) if A[0][1] is not None else None, # When not used, lb or ub is tensor(0). "lbias": lb.transpose(0, 1).detach() if lb.ndim > 1 else None, "ubias": ub.transpose(0, 1).detach() if ub.ndim > 1 else None, "unstable_idx": unstable_idx }}) # FIXME: solve conflict with the following case self.A_dict.update({bound_node.name: A_record}) if need_A_only and set(self.needed_A_dict[bound_node.name]) == set(A_record.keys()): # We have collected all A matrices we need. We can return now! self.A_dict.update({bound_node.name: A_record}) # Do not concretize to save time. We just need the A matrices. # return A matrix as a dict: {node_start.name: [A_lower, A_upper]} return None, None, self.A_dict for i, l_pre in enumerate(l.inputs): add_bound(l, l_pre, lA=A[i][0], uA=A[i][1]) if lb.ndim >= 2: lb = lb.transpose(0, 1) if ub.ndim >= 2: ub = ub.transpose(0, 1) # TODO merge into `concretize` if (self.cut_used and getattr(self, 'cut_module', None) is not None and self.cut_module.x_coeffs is not None): # propagate input neuron in cut constraints roots[0].lA, roots[0].uA = self.cut_module.input_cut( bound_node, roots[0].lA, roots[0].uA, roots[0].lower.size()[1:], unstable_idx, batch_mask=update_mask) lb, ub = self.concretize_bounds( node=bound_node, lower=lb, upper=ub, concretize_mode='backward', batch_size=batch_size, output_dim=output_dim, average_A=average_A, clip_neuron_selection_value=self.clip_neuron_selection_value, clip_neuron_selection_type=self.clip_neuron_selection_type ) if self.return_A and self.needed_A_dict and bound_node.name in self.needed_A_dict: save_root_A( bound_node, A_record, self.A_dict, roots, self.needed_A_dict[bound_node.name], lb=lb, ub=ub, unstable_idx=unstable_idx) for root in self.roots(): # These are saved for `save_root_A`. We do not need them afterwards. root.lb = root.ub = None if tighten_input_bounds and isinstance(bound_node, BoundInput): shape = bound_node.perturbation.x_L.shape lb_reshaped = lb.reshape(shape) bound_node.perturbation.x_L = lb_reshaped - lb_reshaped.detach() + torch.max(bound_node.perturbation.x_L.detach(), lb_reshaped.detach()) ub_reshaped = ub.reshape(shape) bound_node.perturbation.x_U = ub_reshaped - ub_reshaped.detach() + torch.min(bound_node.perturbation.x_U.detach(), ub_reshaped.detach()) lb = lb.view(batch_size, *output_shape) if bound_lower else None ub = ub.view(batch_size, *output_shape) if bound_upper else None # TODO merge into `concretize` if (self.cut_used and getattr(self, "cut_module", None) is not None and self.cut_module.cut_bias is not None): # propagate cut bias in cut constraints lb, ub = self.cut_module.bias_cut(bound_node, lb, ub, unstable_idx, batch_mask=update_mask) if lb is not None and ub is not None and ((lb-ub)>0).sum().item() > 0: # make sure there is no bug for cut constraints propagation print(f"Warning: lb is larger than ub with diff: {(lb-ub)[(lb-ub)>0].max().item()}") if self.verbose: logger.debug('') if self.invprop_enabled(): lb, ub = self.invprop_check_infeasible_bounds(lb, ub) if self.return_A: if self.bound_opts['clip_in_alpha_crown'] and self.final_name in self.A_dict.keys(): for v in self.A_dict[self.final_name].values(): if v["lA"] is not None: self.constraints_optimized = (v["lA"], v["lbias"]) return lb, ub, self.A_dict else: return lb, ub def get_unstable_size(unstable_idx): if isinstance(unstable_idx, tuple): return unstable_idx[0].numel() else: return unstable_idx.numel() def check_optimized_variable_sparsity(self: 'BoundedModule', node): alpha_sparsity = None # unknown, optimizable variables are not created for this node. for relu in self.relus: # FIXME: this hardcoded for ReLUs. Need to support other optimized nonlinear functions. # alpha_lookup_idx is only created for sparse-spec alphas. if relu.alpha_lookup_idx is not None and node.name in relu.alpha_lookup_idx: if relu.alpha_lookup_idx[node.name] is not None: # This node was created with sparse alpha alpha_sparsity = True elif self.bound_opts['optimize_bound_args']['use_shared_alpha']: # Shared alpha, the spec dimension is 1, and sparsity can be supported. alpha_sparsity = True else: alpha_sparsity = False break return alpha_sparsity def get_sparse_C(self: 'BoundedModule', node, ref_intermediate): (sparse_intermediate_bounds, ref_intermediate_lb, ref_intermediate_ub) = ref_intermediate sparse_conv_intermediate_bounds = self.bound_opts.get('sparse_conv_intermediate_bounds', False) minimum_sparsity = self.bound_opts.get('minimum_sparsity', 0.9) crown_batch_size = self.bound_opts.get('crown_batch_size', 1e9) dim = int(prod(node.output_shape[1:])) batch_size = self.batch_size reduced_dim = False # Only partial neurons (unstable neurons) are bounded. unstable_idx = None unstable_size = np.inf newC = None alpha_is_sparse = self.check_optimized_variable_sparsity(node) # NOTE: batched CROWN is so far only supported for some of the cases below # FIXME: C matrix shape incorrect for BoundParams. if (isinstance(node, BoundLinear) or isinstance(node, BoundMatMul)) and int( os.environ.get('AUTOLIRPA_USE_FULL_C', 0)) == 0: if sparse_intermediate_bounds: # If we are doing bound refinement and reference bounds are given, # we only refine unstable neurons. # Also, if we are checking against LP solver we will refine all # neurons and do not use this optimization. # For each batch element, we find the unstable neurons. unstable_idx, unstable_size = self.get_unstable_locations( ref_intermediate_lb, ref_intermediate_ub) if unstable_size == 0: # Do nothing, no bounds will be computed. reduced_dim = True unstable_idx = [] elif unstable_size > crown_batch_size: # Create C in batched CROWN newC = BatchedCrownC('OneHot') reduced_dim = True elif (((0 < unstable_size <= minimum_sparsity * dim and alpha_is_sparse is None) or alpha_is_sparse) and len(node.output_shape) <= 2): # When we already have sparse alpha for this layer, we always # use sparse C. Otherwise we determine it by sparsity. # Create an abstract C matrix, the unstable_idx are the non-zero # elements in specifications for all batches. # Shouldn't use OneHotC if the output is not a 1-d tensor. newC = OneHotC( [batch_size, unstable_size, *node.output_shape[1:]], self.device, unstable_idx, None) reduced_dim = True else: unstable_idx = None del ref_intermediate_lb, ref_intermediate_ub if not reduced_dim: if dim > crown_batch_size: newC = BatchedCrownC('eye') else: newC = eyeC([batch_size, dim, *node.output_shape[1:]], self.device) elif node.patches_start and node.mode == "patches": if sparse_intermediate_bounds: unstable_idx, unstable_size = self.get_unstable_locations( ref_intermediate_lb, ref_intermediate_ub, conv=True) if unstable_size == 0: # Do nothing, no bounds will be computed. reduced_dim = True unstable_idx = [] elif unstable_size > crown_batch_size: # Create C in batched CROWN newC = BatchedCrownC('Patches') reduced_dim = True # We sum over the channel direction, so need to multiply that. elif (sparse_conv_intermediate_bounds and unstable_size <= minimum_sparsity * dim and alpha_is_sparse is None) or alpha_is_sparse: # When we already have sparse alpha for this layer, we always # use sparse C. Otherwise we determine it by sparsity. # Create an abstract C matrix, the unstable_idx are the non-zero # elements in specifications for all batches. # The shape of patches is [unstable_size, batch, C, H, W]. newC = Patches( shape=[unstable_size, batch_size, *node.output_shape[1:-2], 1, 1], identity=1, unstable_idx=unstable_idx, output_shape=[batch_size, *node.output_shape[1:]]) reduced_dim = True else: unstable_idx = None del ref_intermediate_lb, ref_intermediate_ub # Here we create an Identity Patches object if not reduced_dim: newC = Patches( None, 1, 0, [node.output_shape[1], batch_size, *node.output_shape[2:], *node.output_shape[1:-2], 1, 1], 1, output_shape=[batch_size, *node.output_shape[1:]]) elif (isinstance(node, (BoundAdd, BoundSub)) and node.mode == "patches" and len(node.output_shape) >= 4): # FIXME: BoundAdd does not always have patches. Need to use a better way # to determine patches mode. # FIXME: We should not hardcode BoundAdd here! if sparse_intermediate_bounds: if crown_batch_size < 1e9: warnings.warn('Batched CROWN is not supported in this case') unstable_idx, unstable_size = self.get_unstable_locations( ref_intermediate_lb, ref_intermediate_ub, conv=True) if unstable_size == 0: # Do nothing, no bounds will be computed. reduced_dim = True unstable_idx = [] elif (sparse_conv_intermediate_bounds and unstable_size <= minimum_sparsity * dim and alpha_is_sparse is None) or alpha_is_sparse: # When we already have sparse alpha for this layer, we always # use sparse C. Otherwise we determine it by sparsity. num_channel = node.output_shape[-3] # Identity patch size: (ouc_c, 1, 1, 1, out_c, 1, 1). patches = ( torch.eye(num_channel, device=self.device, dtype=list(self.parameters())[0].dtype)).view( num_channel, 1, 1, 1, num_channel, 1, 1) # Expand to (out_c, 1, unstable_size, out_c, 1, 1). patches = patches.expand(-1, 1, node.output_shape[-2], node.output_shape[-1], -1, 1, 1) patches = patches[unstable_idx[0], :, unstable_idx[1], unstable_idx[2]] # Expand with the batch dimension. Final shape # (unstable_size, batch_size, out_c, 1, 1). patches = patches.expand(-1, batch_size, -1, -1, -1) newC = Patches( patches, 1, 0, patches.shape, unstable_idx=unstable_idx, output_shape=[batch_size, *node.output_shape[1:]]) reduced_dim = True else: unstable_idx = None del ref_intermediate_lb, ref_intermediate_ub if not reduced_dim: num_channel = node.output_shape[-3] # Identity patch size: (ouc_c, 1, 1, 1, out_c, 1, 1). patches = ( torch.eye(num_channel, device=self.device, dtype=list(self.parameters())[0].dtype)).view( num_channel, 1, 1, 1, num_channel, 1, 1) # Expand to (out_c, batch, out_h, out_w, out_c, 1, 1). patches = patches.expand(-1, batch_size, node.output_shape[-2], node.output_shape[-1], -1, 1, 1) newC = Patches(patches, 1, 0, patches.shape, output_shape=[ batch_size, *node.output_shape[1:]]) else: if sparse_intermediate_bounds: unstable_idx, unstable_size = self.get_unstable_locations( ref_intermediate_lb, ref_intermediate_ub) if unstable_size == 0: # Do nothing, no bounds will be computed. reduced_dim = True unstable_idx = [] elif unstable_size > crown_batch_size: # Create in C in batched CROWN newC = BatchedCrownC('eye') reduced_dim = True elif (unstable_size <= minimum_sparsity * dim and alpha_is_sparse is None) or alpha_is_sparse: newC = torch.zeros([1, unstable_size, dim], device=self.device) # Fill the corresponding elements to 1.0 newC[0, torch.arange(unstable_size), unstable_idx] = 1.0 newC = newC.expand(batch_size, -1, -1).view( batch_size, unstable_size, *node.output_shape[1:]) reduced_dim = True else: unstable_idx = None del ref_intermediate_lb, ref_intermediate_ub if not reduced_dim: if dim > 1000: warnings.warn( f"Creating an identity matrix with size {dim}x{dim} for node {node}. " "This may indicate poor performance for bound computation. " "If you see this message on a small network please submit " "a bug report.", stacklevel=2) if dim > crown_batch_size: newC = BatchedCrownC('eye') else: newC = torch.eye(dim, device=self.device).unsqueeze(0).expand( batch_size, -1, -1 ).view(batch_size, dim, *node.output_shape[1:]) return newC, reduced_dim, unstable_idx, unstable_size def restore_sparse_bounds(self: 'BoundedModule', node, unstable_idx, unstable_size, ref_intermediate, new_lower=None, new_upper=None): ref_intermediate_lb, ref_intermediate_ub = ref_intermediate[1:] batch_size = self.batch_size if unstable_size == 0: # No unstable neurons. Skip the update. node.lower = ref_intermediate_lb.detach().clone() node.upper = ref_intermediate_ub.detach().clone() else: if new_lower is None: new_lower = node.lower if new_upper is None: new_upper = node.upper # If we only calculated unstable neurons, we need to scatter the results back based on reference bounds. if isinstance(unstable_idx, tuple): lower = ref_intermediate_lb.detach().clone() upper = ref_intermediate_ub.detach().clone() # Conv layer with patches, the unstable_idx is a 3-element tuple for 3 indices (C, H,W) of unstable neurons. if len(unstable_idx) == 3: lower[:, unstable_idx[0], unstable_idx[1], unstable_idx[2]] = new_lower upper[:, unstable_idx[0], unstable_idx[1], unstable_idx[2]] = new_upper elif len(unstable_idx) == 4: lower[:, unstable_idx[0], unstable_idx[1], unstable_idx[2], unstable_idx[3]] = new_lower upper[:, unstable_idx[0], unstable_idx[1], unstable_idx[2], unstable_idx[3]] = new_upper else: # Other layers. lower = ref_intermediate_lb.detach().clone().reshape(batch_size, -1) upper = ref_intermediate_ub.detach().clone().reshape(batch_size, -1) lower[:, unstable_idx] = new_lower.view(batch_size, -1) upper[:, unstable_idx] = new_upper.view(batch_size, -1) node.lower = lower.view(batch_size, *node.output_shape[1:]) node.upper = upper.view(batch_size, *node.output_shape[1:]) def get_degrees(node_start): if not isinstance(node_start, list): node_start = [node_start] degrees = {} added = {} queue = deque() for node in node_start: queue.append(node) added[node.name] = True while len(queue) > 0: l = queue.popleft() for l_pre in l.inputs: degrees[l_pre.name] = degrees.get(l_pre.name, 0) + 1 if not added.get(l_pre.name, False): queue.append(l_pre) added[l_pre.name] = True return degrees def _preprocess_C(self: 'BoundedModule', C, node): if isinstance(C, Patches): if C.unstable_idx is None: # Patches have size (out_c, batch, out_h, out_w, c, h, w). if len(C.shape) == 7: out_c, batch_size, out_h, out_w = C.shape[:4] output_dim = out_c * out_h * out_w else: out_dim, batch_size, out_c, out_h, out_w = C.shape[:5] output_dim = out_dim * out_c * out_h * out_w else: # Patches have size (unstable_size, batch, c, h, w). output_dim, batch_size = C.shape[:2] else: batch_size, output_dim = C.shape[:2] # The C matrix specified by the user has shape (batch, spec) # but internally we have (spec, batch) format. if not isinstance(C, (eyeC, Patches, OneHotC)): C = C.transpose(0, 1).reshape( output_dim, batch_size, *node.output_shape[1:]) elif isinstance(C, eyeC): C = C._replace(shape=(C.shape[1], C.shape[0], *C.shape[2:])) elif isinstance(C, OneHotC): C = C._replace( shape=(C.shape[1], C.shape[0], *C.shape[2:]), index=C.index.transpose(0,-1), coeffs=None if C.coeffs is None else C.coeffs.transpose(0,-1)) if isinstance(C, Patches) and C.unstable_idx is not None: # Sparse patches; the output shape is (unstable_size, ). output_shape = [C.shape[0]] elif prod(node.output_shape[1:]) != output_dim and not isinstance(C, Patches): # For the output node, the shape of the bound follows C # instead of the original output shape # # TODO Maybe don't set node.lower and node.upper in this case? # Currently some codes still depend on node.lower and node.upper output_shape = [-1] else: # Generally, the shape of the bounds match the output shape of the node output_shape = node.output_shape[1:] return C, batch_size, output_dim, output_shape def addA(A1, A2): """ Add two A (each of them is either Tensor or Patches) """ if type(A1) == type(A2): return A1 + A2 elif type(A1) == Patches: return A1 + A2 elif type(A2) == Patches: return A2 + A1 else: raise NotImplementedError(f'Unsupported types for A1 ({type(A1)}) and A2 ({type(A2)}') def add_bound(node, node_pre, lA=None, uA=None): """ Propagate lA and uA to a preceding node. @param node: The current bounded node @param node_pre: An input of the current bounded node that needs lA, lbias ,etc. back propagated to it @param lA: lA matrix associated with the current bounded node @param uA: uA matrix associated with the current bounded node @return: """ if lA is not None: if node_pre.lA is None: # First A added to this node. node_pre.zero_lA_mtx = node.zero_backward_coeffs_l node_pre.lA = lA else: node_pre.zero_lA_mtx = node_pre.zero_lA_mtx and node.zero_backward_coeffs_l new_node_lA = addA(node_pre.lA, lA) node_pre.lA = new_node_lA if uA is not None: if node_pre.uA is None: # First A added to this node. node_pre.zero_uA_mtx = node_pre.zero_backward_coeffs_u node_pre.uA = uA else: node_pre.zero_uA_mtx = node_pre.zero_uA_mtx and node.zero_backward_coeffs_u node_pre.uA = addA(node_pre.uA, uA) def add_constant_node(lb, ub, node): new_lb = node.get_bias(node.lA, node.forward_value) new_ub = node.get_bias(node.uA, node.forward_value) if isinstance(lb, Tensor) and isinstance(new_lb, Tensor) and lb.ndim > 0 and lb.ndim != new_lb.ndim: new_lb = new_lb.reshape(lb.shape) if isinstance(ub, Tensor) and isinstance(new_ub, Tensor) and ub.ndim > 0 and ub.ndim != new_ub.ndim: new_ub = new_ub.reshape(ub.shape) lb = lb + new_lb # FIXME (09/16): shape for the bias of BoundConstant. ub = ub + new_ub return lb, ub def save_root_A(node, A_record, A_dict, roots, needed_A_dict, lb, ub, unstable_idx): root_A_record = {} for i in range(len(roots)): if roots[i].lA is None and roots[i].uA is None: continue if roots[i].name in needed_A_dict: if roots[i].lA is not None: if isinstance(roots[i].lA, Patches): _lA = roots[i].lA.detach() else: _lA = roots[i].lA.transpose(0, 1).detach() else: _lA = None if roots[i].uA is not None: if isinstance(roots[i].uA, Patches): _uA = roots[i].uA.detach() else: _uA = roots[i].uA.transpose(0, 1).detach() else: _uA = None # Include all the bias terms except the one concretized from the # current root node. lb_ = lb - roots[i].lb if (roots[i].lb is not None) else lb ub_ = ub - roots[i].ub if (roots[i].ub is not None) else ub root_A_record.update({roots[i].name: { "lA": _lA, "uA": _uA, # When not used, lb or ub is tensor(0). They have been transposed above. "lbias": lb_.detach() if lb_.ndim > 1 else None, "ubias": ub_.detach() if ub_.ndim > 1 else None, "unstable_idx": unstable_idx }}) root_A_record.update(A_record) # merge to existing A_record A_dict.update({node.name: root_A_record}) def select_unstable_idx(ref_intermediate_lb, ref_intermediate_ub, unstable_locs, max_crown_size): """When there are too many unstable neurons, only bound those with the loosest reference bounds.""" gap = ( ref_intermediate_ub[:, unstable_locs] - ref_intermediate_lb[:, unstable_locs]).sum(dim=0) indices = torch.argsort(gap, descending=True) indices_selected = indices[:max_crown_size] indices_selected, _ = torch.sort(indices_selected) print(f'{len(indices_selected)}/{len(indices)} unstable neurons selected for CROWN') return indices_selected def get_unstable_locations(self: 'BoundedModule', ref_intermediate_lb, ref_intermediate_ub, conv=False, channel_only=False): # FIXME (2023): This function should be a member class of the Bound object, since the # definition of unstable neurons depends on the activation function. max_crown_size = self.bound_opts.get('max_crown_size', int(1e9)) # For conv layer we only check the case where all neurons are active/inactive. unstable_masks = torch.logical_and(ref_intermediate_lb < 0, ref_intermediate_ub > 0) # For simplicity, merge unstable locations for all elements in this batch. TODO: use individual unstable mask. # It has shape (H, W) indicating if a neuron is unstable/stable. # TODO: so far we merge over the batch dimension to allow easier implementation. if channel_only: # Only keep channels with unstable neurons. Used for initializing alpha. unstable_locs = unstable_masks.sum(dim=(0,2,3)).bool() # Shape is consistent with linear layers: a list of unstable neuron channels (no batch dim). unstable_idx = unstable_locs.nonzero().squeeze(1) else: if not conv and unstable_masks.ndim > 2: # Flatten the conv layer shape. unstable_masks = unstable_masks.reshape(unstable_masks.size(0), -1) ref_intermediate_lb = ref_intermediate_lb.reshape(ref_intermediate_lb.size(0), -1) ref_intermediate_ub = ref_intermediate_ub.reshape(ref_intermediate_ub.size(0), -1) unstable_locs = unstable_masks.sum(dim=0).bool() if conv: # Now converting it to indices for these unstable nuerons. # These are locations (i,j) of unstable neurons. unstable_idx = unstable_locs.nonzero(as_tuple=True) else: unstable_idx = unstable_locs.nonzero().squeeze(1) unstable_size = get_unstable_size(unstable_idx) if unstable_size > max_crown_size: indices_seleted = select_unstable_idx( ref_intermediate_lb, ref_intermediate_ub, unstable_locs, max_crown_size) if isinstance(unstable_idx, tuple): unstable_idx = tuple(u[indices_seleted] for u in unstable_idx) else: unstable_idx = unstable_idx[indices_seleted] unstable_size = get_unstable_size(unstable_idx) return unstable_idx, unstable_size def get_alpha_crown_start_nodes( self: 'BoundedModule', node, c=None, share_alphas=False, final_node_name=None, ): """ Given a layer "node", return a list of following nodes after this node whose bounds will propagate through this node. Each element in the list is a tuple with 3 elements: (following_node_name, following_node_shape, unstable_idx) """ # When use_full_conv_alpha is True, conv layers do not share alpha. sparse_intermediate_bounds = self.bound_opts.get('sparse_intermediate_bounds', False) use_full_conv_alpha_thresh = self.bound_opts.get('use_full_conv_alpha_thresh', 512) start_nodes = [] for nj in self.backward_from[node.name]: # Pre-activation layers. unstable_idx = None use_sparse_conv = None # Whether a sparse-spec alpha is used for a conv output node. None for non-conv output node. use_full_conv_alpha = self.bound_opts.get('use_full_conv_alpha', False) # Find the indices of unstable neuron, used for create sparse-feature alpha. if (sparse_intermediate_bounds and isinstance(node, BoundOptimizableActivation) and nj.name != final_node_name and not share_alphas): # Create sparse optimization variables for intermediate neurons. # These are called "sparse-spec" alpha because we only create alpha only for # the intermediate of final output nodes whose bounds are needed. # "sparse-spec" alpha makes sense only for piece-wise linear functions. # For other intermediate nodes, there is no "unstable" or "stable" neuron. # FIXME: whether an layer has unstable/stable neurons should be in Bound obj. # FIXME: get_unstable_locations should be a member class of ReLU. if len(nj.output_name) == 1 and isinstance(self[nj.output_name[0]], (BoundRelu, BoundSignMerge, BoundMaxPool)): if ((isinstance(nj, (BoundLinear, BoundMatMul))) and int(os.environ.get('AUTOLIRPA_USE_FULL_C', 0)) == 0): # unstable_idx has shape [neuron_size_of_nj]. Batch dimension is reduced. unstable_idx, _ = self.get_unstable_locations(nj.lower, nj.upper) elif isinstance(nj, (BoundConv, BoundAdd, BoundSub, BoundBatchNormalization)) and nj.mode == 'patches': if nj.name in node.patch_size: # unstable_idx has shape [channel_size_of_nj]. Batch and spatial dimensions are reduced. unstable_idx, _ = self.get_unstable_locations( nj.lower, nj.upper, channel_only=not use_full_conv_alpha, conv=True) use_sparse_conv = False # alpha is shared among channels. Sparse-spec alpha in hw dimension not used. if use_full_conv_alpha and unstable_idx[0].size(0) > use_full_conv_alpha_thresh: # Too many unstable neurons. Using shared alpha per channel. unstable_idx, _ = self.get_unstable_locations( nj.lower, nj.upper, channel_only=True, conv=True) use_full_conv_alpha = False else: # Matrix mode for conv layers. Although the bound propagation started with patches mode, # when A matrix is propagated to this layer, it might become a dense matrix since patches # can be come very large after many layers. In this case, # unstable_idx has shape [c_out * h_out * w_out]. Batch dimension is reduced. unstable_idx, _ = self.get_unstable_locations(nj.lower, nj.upper) use_sparse_conv = True # alpha is not shared among channels, and is sparse in spec dimension. else: # FIXME: we should not check for fixed names here. Need to enable patches mode more generally. if isinstance(nj, (BoundConv, BoundAdd, BoundSub, BoundBatchNormalization)) and nj.mode == 'patches': use_sparse_conv = False # Sparse-spec alpha can never be used, because it is not a ReLU activation. if nj.name == final_node_name: # Final layer, always the number of specs as the shape. size_final = self[final_node_name].output_shape[1:] if c is None else c.size(1) # The 4-th element indicates that this start node is the final node, # which may be utilized by operators that do not know the name of # the final node. start_nodes.append((final_node_name, size_final, None, True)) continue if share_alphas: # all intermediate neurons from the same layer share the same set of alphas. output_shape = 1 elif isinstance(node, BoundOptimizableActivation) and node.patch_size and nj.name in node.patch_size: # Patches mode. Use output channel size as the spec size. This still shares some alpha, but better than no sharing. if use_full_conv_alpha: # alphas not shared among channels, so the spec dim shape is c,h,w # The patch size is [out_ch, batch, out_h, out_w, in_ch, H, W]. We use out_ch as the output shape. output_shape = node.patch_size[nj.name][0], node.patch_size[nj.name][2], node.patch_size[nj.name][3] else: # The spec dim is c only, and is shared among h, w. output_shape = node.patch_size[nj.name][0] assert not sparse_intermediate_bounds or use_sparse_conv is False # Double check our assumption holds. If this fails, then we created wrong shapes for alpha. else: # Output is linear layer (use_sparse_conv = None), or patch converted to matrix (use_sparse_conv = True). assert not sparse_intermediate_bounds or use_sparse_conv is not False # Double check our assumption holds. If this fails, then we created wrong shapes for alpha. output_shape = nj.lower.shape[1:] # FIXME: for non-relu activations it's still expecting a prod. start_nodes.append((nj.name, output_shape, unstable_idx, False)) return start_nodes def merge_A(node, batch_A, ret_A): for key0 in batch_A: if key0 not in ret_A: ret_A[key0] = {} for key1 in batch_A[key0]: value = batch_A[key0][key1] if key1 not in ret_A[key0]: # create: ret_A[key0].update({ key1: { "lA": value["lA"], "uA": value["uA"], "lbias": value["lbias"], "ubias": value["ubias"], "unstable_idx": value["unstable_idx"] } }) elif key0 == node.name: # merge: # the batch splitting only happens for current node, i.e., # for other nodes the returned lA should be the same across different batches # so no need to repeatly merge them exist = ret_A[key0][key1] if exist["unstable_idx"] is not None: if isinstance(exist["unstable_idx"], torch.Tensor): merged_unstable = torch.cat([ exist["unstable_idx"], value['unstable_idx']], dim=0) elif isinstance(exist["unstable_idx"], tuple): if exist["unstable_idx"]: merged_unstable = tuple([ torch.cat([exist["unstable_idx"][idx], value['unstable_idx'][idx]], dim=0) for idx in range(len(exist['unstable_idx']))] ) else: merged_unstable = None else: raise NotImplementedError( f'Unsupported type {type(exist["unstable_idx"])}') else: merged_unstable = None merge_dict = {"unstable_idx": merged_unstable} for name in ["lA", "uA"]: if exist[name] is not None: if isinstance(exist[name], torch.Tensor): # for matrix the spec dim is 1 merge_dict[name] = torch.cat([exist[name], value[name]], dim=1) else: assert isinstance(exist[name], Patches) # for patches the spec dim`is 0 merge_dict[name] = exist[name].create_similar( torch.cat([exist[name].patches, value[name].patches], dim=0), unstable_idx=merged_unstable ) else: merge_dict[name] = None for name in ["lbias", "ubias"]: if exist[name] is not None: # for bias the spec dim in 1 merge_dict[name] = torch.cat([exist[name], value[name]], dim=1) else: merge_dict[name] = None ret_A[key0][key1] = merge_dict return ret_A ================================================ FILE: auto_LiRPA/beta_crown.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from collections import OrderedDict import numpy as np import torch from torch import Tensor from .patches import Patches, inplace_unfold from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule class SparseBeta: def __init__(self, shape, bias=False, betas=None, device='cpu'): self.device = device self.val = torch.zeros(shape) self.loc = torch.zeros(shape, dtype=torch.long, device=device) self.sign = torch.zeros(shape, device=device) self.bias = torch.zeros(shape, device=device) if bias else None if betas: for bi in range(len(betas)): if betas[bi] is not None: self.val[bi, :len(betas[bi])] = betas[bi] self.val = self.val.detach().to( device, non_blocking=True).requires_grad_() def apply_splits(self, history, key): loc_numpy = np.zeros(self.loc.shape, dtype=np.int32) sign_numpy = np.zeros(self.sign.shape) if self.bias is not None: bias_numpy = np.zeros(self.bias.shape) for bi in range(len(history)): # Add history splits. (layer, neuron) is the current decision. split_locs, split_coeffs = history[bi][key][:2] split_len = len(split_locs) if split_len > 0: sign_numpy[bi, :split_len] = split_coeffs loc_numpy[bi, :split_len] = split_locs if self.bias is not None: split_bias = history[bi][key][2] bias_numpy[bi, :split_len] = split_bias self.loc.copy_(torch.from_numpy(loc_numpy), non_blocking=True) self.sign.copy_(torch.from_numpy(sign_numpy), non_blocking=True) if self.bias is not None: self.bias.copy_(torch.from_numpy(bias_numpy), non_blocking=True) def get_split_nodes(self: 'BoundedModule'): self.split_nodes = [] self.split_activations = {} splittable_activations = self.get_splittable_activations() self._set_used_nodes(self[self.final_name]) for layer in self.layers_requiring_bounds: split_activations_ = [] for activation_name in layer.output_name: activation = self[activation_name] if activation in splittable_activations: split_activations_.append( (activation, activation.inputs.index(layer))) if split_activations_: if layer.lower is None and layer.upper is None: continue self.split_nodes.append(layer) self.split_activations[layer.name] = split_activations_ return self.split_nodes, self.split_activations def set_beta(self: 'BoundedModule', enable_opt_interm_bounds, parameters, lr_beta, lr_cut_beta, cutter, dense_coeffs_mask): """ Set betas, best_betas, coeffs, dense_coeffs_mask, best_coeffs, biases and best_biases. """ coeffs = None betas = [] best_betas = OrderedDict() # TODO compute only once self.nodes_with_beta = [] for node in self.split_nodes: if not hasattr(node, 'sparse_betas'): continue self.nodes_with_beta.append(node) if enable_opt_interm_bounds: for sparse_beta in node.sparse_betas.values(): if sparse_beta is not None: betas.append(sparse_beta.val) best_betas[node.name] = { beta_m: sparse_beta.val.detach().clone() for beta_m, sparse_beta in node.sparse_betas.items() } else: betas.append(node.sparse_betas[0].val) best_betas[node.name] = node.sparse_betas[0].val.detach().clone() # Beta has shape (batch, max_splits_per_layer) parameters.append({ 'params': [item for item in betas if item.numel() > 0], 'lr': lr_beta, 'batch_dim': 0}) if self.cut_used: self.set_beta_cuts(parameters, lr_cut_beta, betas, best_betas, cutter) return betas, best_betas, coeffs, dense_coeffs_mask def set_beta_cuts(self: 'BoundedModule', parameters, lr_cut_beta, betas, best_betas, cutter): # also need to optimize cut betas parameters.append({'params': self.cut_beta_params, 'lr': lr_cut_beta, 'batch_dim': 0}) betas += self.cut_beta_params best_betas['cut'] = [beta.detach().clone() for beta in self.cut_beta_params] if getattr(cutter, 'opt', False): parameters.append(cutter.get_parameters()) def reset_beta(self: 'BoundedModule', node, shape, betas, bias=False, start_nodes=None): # Create only the non-zero beta. For each layer, it is padded to maximal length. # We create tensors on CPU first, and they will be transferred to GPU after initialized. if self.bound_opts.get('enable_opt_interm_bounds', False): node.sparse_betas = { key: SparseBeta( shape, betas=[(betas[j][i] if betas[j] is not None else None) for j in range(len(betas))], device=self.device, bias=bias, ) for i, key in enumerate(start_nodes) } else: node.sparse_betas = [SparseBeta( shape, betas=betas, device=self.device, bias=bias)] def beta_crown_backward_bound(self: 'BoundedModule', node, lA, uA, start_node=None): """Update A and bias with Beta-CROWN. Must be explicitly called at the end of "bound_backward". """ # Regular Beta CROWN with single neuron split # Each split constraint only has single neuron (e.g., second ReLU neuron > 0). A = lA if lA is not None else uA lbias = ubias = 0 def _bias_unsupported(): raise NotImplementedError('Bias for beta not supported in this case.') if type(A) is Patches: if not self.bound_opts.get('enable_opt_interm_bounds', False): raise NotImplementedError('Sparse beta not supported in the patches mode') if node.sparse_betas[start_node.name].bias is not None: _bias_unsupported() # expand sparse_beta to full beta beta_values = (node.sparse_betas[start_node.name].val * node.sparse_betas[start_node.name].sign) beta_indices = node.sparse_betas[start_node.name].loc node.masked_beta = torch.zeros(2, *node.shape).reshape(2, -1).to(A.patches.dtype) node.non_deter_scatter_add( node.masked_beta, dim=1, index=beta_indices, src=beta_values.to(node.masked_beta.dtype)) node.masked_beta = node.masked_beta.reshape(2, *node.shape) # unfold the beta as patches, size (batch, out_h, out_w, in_c, H, W) A_patches = A.patches masked_beta_unfolded = inplace_unfold( node.masked_beta, kernel_size=A_patches.shape[-2:], padding=A.padding, stride=A.stride, inserted_zeros=A.inserted_zeros, output_padding=A.output_padding) if A.unstable_idx is not None: masked_beta_unfolded = masked_beta_unfolded.permute(1, 2, 0, 3, 4, 5) # After selection, the shape is (unstable_size, batch, in_c, H, W). masked_beta_unfolded = masked_beta_unfolded[A.unstable_idx[1], A.unstable_idx[2]] else: # Add the spec (out_c) dimension. masked_beta_unfolded = masked_beta_unfolded.unsqueeze(0) if node.alpha_beta_update_mask is not None: masked_beta_unfolded = masked_beta_unfolded[node.alpha_beta_update_mask] if uA is not None: uA = uA.create_similar(uA.patches + masked_beta_unfolded) if lA is not None: lA = lA.create_similar(lA.patches - masked_beta_unfolded) elif type(A) is Tensor: if self.bound_opts.get('enable_opt_interm_bounds', False): if node.sparse_betas[start_node.name].bias is not None: _bias_unsupported() # For matrix mode, beta is sparse. beta_values = ( node.sparse_betas[start_node.name].val * node.sparse_betas[start_node.name].sign ).expand(A.size(0), -1, -1) # node.single_beta_loc has shape [batch, max_single_split]. # Need to expand at the specs dimension. beta_indices = (node.sparse_betas[start_node.name].loc .unsqueeze(0).expand(A.size(0), -1, -1)) beta_bias = node.sparse_betas[start_node.name].bias else: # For matrix mode, beta is sparse. beta_values = ( node.sparse_betas[0].val * node.sparse_betas[0].sign ).expand(A.size(0), -1, -1) # self.single_beta_loc has shape [batch, max_single_split]. # Need to expand at the specs dimension. beta_indices = node.sparse_betas[0].loc.unsqueeze(0).expand(A.size(0), -1, -1) beta_bias = node.sparse_betas[0].bias # For conv layer, the last dimension is flattened in indices. beta_values = beta_values.to(A.dtype) if beta_bias is not None: beta_bias = beta_bias.expand(A.size(0), -1, -1) if node.alpha_beta_update_mask is not None: beta_indices = beta_indices[:, node.alpha_beta_update_mask] beta_values = beta_values[:, node.alpha_beta_update_mask] if beta_bias is not None: beta_bias = beta_bias[:, node.alpha_beta_update_mask] if uA is not None: uA = node.non_deter_scatter_add( uA.reshape(uA.size(0), uA.size(1), -1), dim=2, index=beta_indices, src=beta_values).view(uA.size()) if lA is not None: lA = node.non_deter_scatter_add( lA.reshape(lA.size(0), lA.size(1), -1), dim=2, index=beta_indices, src=beta_values.neg()).view(lA.size()) if beta_bias is not None: bias = (beta_values * beta_bias).sum(dim=-1) lbias = bias ubias = -bias else: raise RuntimeError(f"Unknown type {type(A)} for A") return lA, uA, lbias, ubias def print_optimized_beta(acts): masked_betas = [] for model in acts: masked_betas.append(model.masked_beta) if model.history_beta_used: print(f'{model.name} history beta', model.new_history_beta.squeeze()) if model.split_beta_used: print(f'{model.name} split beta:', model.split_beta.view(-1)) print(f'{model.name} bias:', model.split_bias) ================================================ FILE: auto_LiRPA/bound_general.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import copy from typing import List import numpy as np import warnings from collections import OrderedDict, deque import torch from torch.nn import Parameter from .bound_op_map import bound_op_map from .bound_ops import * from .bounded_tensor import BoundedTensor, BoundedParameter from .parse_graph import parse_module from .perturbations import * from .utils import * from .patches import Patches from .optimized_bounds import default_optimize_bound_args warnings.simplefilter('once') class BoundedModule(nn.Module): """Bounded module with support for automatically computing bounds. Args: model (nn.Module): The original model to be wrapped by BoundedModule. global_input (tuple): A dummy input to the original model. The shape of the dummy input should be consistent with the actual input to the model except for the batch dimension. bound_opts (dict): Options for bounds. See `Bound Options `_. device (str or torch.device): Device of the bounded module. If 'auto', the device will be automatically inferred from the device of parameters in the original model or the dummy input. custom_ops (dict): A dictionary of custom operators. The dictionary maps operator names to their corresponding bound classes (subclasses of `Bound`). """ def __init__(self, model, global_input, bound_opts=None, device='auto', verbose=False, custom_ops=None): super().__init__() if isinstance(model, BoundedModule): for key in model.__dict__.keys(): setattr(self, key, getattr(model, key)) return self.ori_training = model.training if bound_opts is None: bound_opts = {} # Default options. default_bound_opts = { 'conv_mode': 'patches', 'sparse_intermediate_bounds': True, 'sparse_conv_intermediate_bounds': True, 'sparse_intermediate_bounds_with_ibp': True, 'sparse_features_alpha': True, 'sparse_spec_alpha': True, 'minimum_sparsity': 0.9, 'enable_opt_interm_bounds': False, 'crown_batch_size': np.inf, 'forward_refinement': False, 'forward_max_dim': int(1e9), # Do not share alpha for conv layers. 'use_full_conv_alpha': True, 'disabled_optimization': [], # Threshold for number of unstable neurons for each layer to disable # use_full_conv_alpha. 'use_full_conv_alpha_thresh': 512, 'verbosity': 1 if verbose else 0, 'optimize_graph': {'optimizer': None}, 'compare_crown_with_ibp': False, # Whether run an additional forward pass before computing bounds. 'forward_before_compute_bounds': False, 'clip_in_alpha_crown': False, # Whether to compute bounds for every node in the graph. # (rather than only the nodes whose intermediate bounds are needed.) 'bound_every_node': False, } default_bound_opts.update(bound_opts) self.bound_opts = default_bound_opts optimize_bound_args = copy.deepcopy(default_optimize_bound_args) optimize_bound_args.update( self.bound_opts.get('optimize_bound_args', {})) self.bound_opts.update({'optimize_bound_args': optimize_bound_args}) self.verbose = verbose self.custom_ops = custom_ops if custom_ops is not None else {} if device == 'auto': try: self.device = next(model.parameters()).device except StopIteration: # Model has no parameters. We use the device of input tensor. if isinstance(global_input, torch.Tensor): self.device = global_input.device elif isinstance(global_input, tuple): self.device = global_input[0].device else: raise NotImplementedError( # pylint: disable=raise-missing-from 'Unable to decide the device. Consider providing a ' '`device` argument to `BoundedModule` explicitly.') else: self.device = device self.global_input = tuple(unpack_inputs(global_input, device=self.device)) self.check_incompatible_nodes(model) self.conv_mode = self.bound_opts.get('conv_mode', 'patches') # Cached IBP results which may be reused self.ibp_lower, self.ibp_upper = None, None self.optimizable_activations = [] self.relus = [] # save relu layers for convenience self.layers_with_constraint = [] state_dict_copy = copy.deepcopy(model.state_dict()) object.__setattr__(self, 'ori_state_dict', state_dict_copy) model.to(self.device) output = model(*self.global_input) if not isinstance(output, torch.Tensor): raise TypeError( 'Output of the model is expected to be a single torch.Tensor. ' f'Actual type: {type(output)}') self.final_shape = output.shape self.bound_opts.update({'final_shape': self.final_shape}) self._convert(model, self.global_input) self._optimize_graph() # Compute forward_value and mark perturbed nodes self.forward(*self.global_input) self._expand_jacobian() self._check_patches_mode() self.next_split_hint = [] # Split hints, used in beta optimization. # Beta values for all intermediate bounds. # Set to None (not used) by default. self.best_intermediate_betas = None # Initialization value for intermediate betas. self.init_intermediate_betas = None # whether using cut self.cut_used = False # a placeholder for cut timestamp, which would be a non-positive int self.cut_timestamp = -1 # a placeholder to save the latest samplewise mask for # pruning-in-iteration optimization self.last_update_preserve_mask = None # If output constraints are used, it is possible that none of the possible # inputs satisfy them. In this case, the lower bounds will be set to +inf, # and the upper bounds to -inf. self.infeasible_bounds = None self.solver_model = None # Needed for output constraints - the output layer should not use them self.final_node().is_final_node = True self.dynamic = False # This is the topk ratio for half-naive, half-constrained concretization. # Please check for concretize_bounds.py for more details. self.clip_neuron_selection_type = 'ratio' self.clip_neuron_selection_value = -1.0 # A boolean tensor with shape (batchsize, ). It indicates if a batch is # infeasible when concretizing with constraints. # Always call `init_infeasible_bounds_constraints` function to initialize it. self.infeasible_bounds_constraints = None # This is designed for clipping during alpha-CROWN. # For each alpha-CROWN optimization iteration, the lA and lbias of the final layer # will be set as `constraints_optimized` for the next iteration. # Please check backward_bound.py and optimized_bound for more info. self.constraints_optimized = None def nodes(self) -> List[Bound]: return self._modules.values() def get_enabled_opt_act(self): # Optimizable activations that are actually used and perturbed return [ n for n in self.optimizable_activations if n.used and n.perturbed and not getattr(n, 'is_linear_op', False) ] def get_optimizable_activations(self): for node in self.nodes(): if (isinstance(node, BoundOptimizableActivation) and node.optimizable and len(getattr(node, 'requires_input_bounds', [])) > 0 and node not in self.optimizable_activations): disabled = False for item in self.bound_opts.get('disable_optimization', []): if item.lower() in str(type(node)).lower(): disabled = True if disabled: logging.debug('Disabled optimization for %s', node) continue if node not in self.optimizable_activations: self.optimizable_activations.append(node) def get_perturbed_optimizable_activations(self): return [n for n in self.optimizable_activations if n.perturbed] def get_splittable_activations(self): """Activation functions that can be split during branch and bound.""" return [n for n in self.nodes() if n.perturbed and n.splittable and n.used] def get_layers_requiring_bounds(self): """Layer names whose intermediate layer bounds are required.""" intermediate_layers = [] tighten_input_bounds = ( self.bound_opts['optimize_bound_args']['tighten_input_bounds'] ) directly_optimize_layer_names = ( self.bound_opts['optimize_bound_args']['directly_optimize'] ) for node in self.nodes(): if node.name in directly_optimize_layer_names: intermediate_layers.append(node) if not node.used or not node.perturbed: continue for i in getattr(node, 'requires_input_bounds', []): input_node = node.inputs[i] if (input_node not in intermediate_layers and input_node.perturbed): # If not perturbed, it may not have the batch dimension. # So we do not include it, and it is unnecessary. intermediate_layers.append(input_node) if ( node.name in self.layers_with_constraint or (isinstance(node, BoundInput) and tighten_input_bounds) ): if node not in intermediate_layers: intermediate_layers.append(node) return intermediate_layers def check_incompatible_nodes(self, model): """Check whether the model has incompatible nodes that the conversion may be inaccurate""" node_types = [type(m) for m in list(model.modules())] if (torch.nn.Dropout in node_types and torch.nn.BatchNorm1d in node_types and any(input.shape[0] == 1 for input in self.global_input)): # In fact, we just need the input that is involved in the # dropout layer to have batch size larger than 1, but we don't know # which of them is, so we just check all of them. print('We cannot support torch.nn.Dropout and torch.nn.BatchNorm1d ' 'at the same time!') print('Suggest to use another dummy input which has batch size ' 'larger than 1 and set model to train() mode.') return if not self.ori_training and torch.nn.Dropout in node_types: print('Dropout operation CANNOT be parsed during conversion when ' 'the model is in eval() mode!') print('Set model to train() mode!') self.ori_training = True if self.ori_training and torch.nn.BatchNorm1d in node_types: print('BatchNorm1d may raise error during conversion when the model' ' is in train() mode!') print('Set model to eval() mode!') self.ori_training = False def non_deter_wrapper(self, op, *args, **kwargs): """Some operations are non-deterministic and deterministic mode will fail. So we temporary disable it.""" if self.bound_opts.get('deterministic', False): torch.use_deterministic_algorithms(False) ret = op(*args, **kwargs) if self.bound_opts.get('deterministic', False): torch.use_deterministic_algorithms(True) return ret def non_deter_scatter_add(self, *args, **kwargs): return self.non_deter_wrapper(torch.scatter_add, *args, **kwargs) def non_deter_index_select(self, *args, **kwargs): return self.non_deter_wrapper(torch.index_select, *args, **kwargs) def set_bound_opts(self, new_opts): for k, v in new_opts.items(): # assert v is not dict, 'only support change optimize_bound_args' if type(v) == dict: self.bound_opts[k].update(v) else: self.bound_opts[k] = v def set_gcp_relu_indicators(self, relu_layer_name, relu_indicators): """ Sets the GCP (Generalized Cutting Plane) relu indicators for the specified ReLU layer by name. Args: relu_layer_name (str): The name of the ReLU layer to update. relu_indicators (torch.Tensor): A tensor containing unstable relu indices or masks. """ # Search for the layer by name for m in self.relus: if m.name == relu_layer_name: # Set the indicators for the found ReLU layer m.gcp_unstable_relu_indicators = relu_indicators return # If not found, raise an error raise ValueError(f'No ReLU layer found with name {relu_layer_name}') @staticmethod def _get_A_norm(A): if not isinstance(A, (list, tuple)): A = (A, ) norms = [] for aa in A: if aa is not None: if isinstance(aa, Patches): aa = aa.patches norms.append(aa.abs().sum().item()) else: norms.append(None) return norms def __call__(self, *input, **kwargs): if 'method_opt' in kwargs: opt = kwargs['method_opt'] kwargs.pop('method_opt') else: opt = 'forward' for kwarg in [ 'disable_multi_gpu', 'no_replicas', 'get_property', 'node_class', 'att_name']: if kwarg in kwargs: kwargs.pop(kwarg) if opt == 'compute_bounds': return self.compute_bounds(**kwargs) else: return self.forward(*input, **kwargs) def register_parameter(self, name, param): r"""Adds a parameter to the module. The parameter can be accessed as an attribute using given name. Args: name (string): name of the parameter. The parameter can be accessed from this module using the given name param (Parameter): parameter to be added to the module. """ if '_parameters' not in self.__dict__: raise AttributeError( 'cannot assign parameter before Module.__init__() call') elif not isinstance(name, str): raise TypeError('parameter name should be a string. ' f'Got {torch.typename(name)}') elif name == '': raise KeyError('parameter name can\'t be empty string') elif hasattr(self, name) and name not in self._parameters: raise KeyError(f'attribute "{name}" already exists') if param is None: self._parameters[name] = None elif not isinstance(param, Parameter): raise TypeError( f'cannot assign "{torch.typename(param)}" object to ' f'parameter "{name}" ' '(torch.nn.Parameter or None required)') elif param.grad_fn: raise ValueError( f'Cannot assign non-leaf Tensor to parameter "{name}". Model ' 'parameters must be created explicitly. To express "{name}" ' 'as a function of another Tensor, compute the value in ' 'the forward() method.') else: self._parameters[name] = param def _named_members(self, get_members_fn, prefix='', recurse=True, remove_duplicate: bool = True, **kwargs): # pylint: disable=unused-argument r"""Helper method for yielding various names + members of modules.""" memo = set() modules = self.named_modules(prefix=prefix) if recurse else [ (prefix, self)] for module_prefix, module in modules: members = get_members_fn(module) for k, v in members: if v is None or v in memo: continue if remove_duplicate: memo.add(v) name = module_prefix + ('.' if module_prefix else '') + k # translate name to ori_name if name in self.node_name_map: name = self.node_name_map[name] yield name, v def train(self, mode=True): super().train(mode) for node in self.nodes(): node.train(mode=mode) def eval(self): super().eval() for node in self.nodes(): node.eval() def to(self, *args, **kwargs): # Moves and/or casts some attributes except pytorch will do by default. for node in self.nodes(): for attr in ['lower', 'upper', 'forward_value', 'd', 'lA',]: if hasattr(node, attr): this_attr = getattr(node, attr) if isinstance(this_attr, torch.Tensor): this_attr = this_attr.to(*args, **kwargs) setattr(node, attr, this_attr) if hasattr(node, 'interval'): # construct new interval this_attr = getattr(node, 'interval') setattr(node, 'interval', (this_attr[0].to( *args, **kwargs), this_attr[1].to(*args, **kwargs))) return super().to(*args, **kwargs) def __getitem__(self, name): module = self._modules[name] # We never create modules that are None, the assert fixes type hints assert module is not None return module def roots(self): return [self[name] for name in self.root_names] def final_node(self): return self[self.final_name] def get_forward_value(self, node): """ Recursively get `forward_value` for `node` and its parent nodes""" if getattr(node, 'forward_value', None) is not None: return node.forward_value inputs = [self.get_forward_value(inp) for inp in node.inputs] for inp in node.inputs: node.from_input = node.from_input or inp.from_input node.input_shape = inputs[0].shape if len(inputs) > 0 else None fv = node.forward(*inputs) if isinstance(fv, (torch.Size, tuple)): fv = torch.tensor(fv, device=self.device) node.forward_value = fv node.output_shape = fv.shape # In most cases, the batch dimension is just the first dimension # if the node depends on input. Otherwise if the node doesn't # depend on input, there is no batch dimension (default is -1). node.batch_dim = 0 if node.from_input else node.batch_dim # Unperturbed node but it is not a root node. # Save forward_value to value. (Can be used in forward bounds.) if not node.from_input and len(node.inputs) > 0: node.value = node.forward_value return fv def forward(self, *x, final_node_name=None, interm_bounds=None, clear_forward_only=False, reset_perturbed_nodes=True, cache_bounds=False): r"""Standard forward computation for the network. Args: x (tuple or None): Input to the model. final_node_name (str, optional): The name of the final node in the model. The value on the corresponding node will be returned. clear_forward_only (bool, default `False`): Whether only standard forward values stored on the nodes should be cleared. If `True`, only standard forward values stored on the nodes will be cleared. Otherwise, bound information on the nodes will also be cleared. reset_perturbed_nodes (bool, default `True`): Mark all perturbed nodes with input perturbations. When set to `True`, it may accidentally clear all .perturbed properties for intermediate nodes. Returns: output: The output of the model, or if `final_node_name` is not `None`, return the value on the corresponding node instead. """ self.set_input(*x, interm_bounds=interm_bounds, clear_forward_only=clear_forward_only, reset_perturbed_nodes=reset_perturbed_nodes, cache_bounds=cache_bounds) if final_node_name is None: final_node_name = self.output_name[0] return self.get_forward_value(self[final_node_name]) def _mark_perturbed_nodes(self, input): """Mark the graph nodes and determine which nodes need perturbation.""" # Set some of the input as perturbed if they are bounded objects any_perturbed = False for name, index in zip(self.input_name, self.input_index): if index is None: continue if isinstance(input[index], (BoundedTensor, BoundedParameter)): self[name].perturbed = True any_perturbed = True # If none of the inputs is a bounded object, set all of them as perturbed if not any_perturbed: for name, index in zip(self.input_name, self.input_index): if index is not None: self[name].perturbed = True degree_in = {} queue = deque() relus = [] # Initially the queue contains all "root" nodes. for key in self._modules.keys(): l = self[key] degree_in[l.name] = len(l.inputs) if degree_in[l.name] == 0: queue.append(l) # in_degree ==0 -> root node while len(queue) > 0: node = queue.popleft() # We set the relu here to ensure the list is sorted according to topological order. if isinstance(node, BoundRelu): relus.append(node) # Obtain all output node, and add the output nodes to the queue if # all its input nodes have been visited. # The initial "perturbed" property is set in BoundInput or # BoundParams object, depending on ptb. for name_next in node.output_name: node_next = self[name_next] if not node_next.never_perturbed: # The next node is perturbed if it is already perturbed, # or this node is perturbed. node_next.perturbed = node_next.perturbed or node.perturbed degree_in[name_next] -= 1 # all inputs of this node have been visited, # now put it in queue. if degree_in[name_next] == 0: queue.append(node_next) node.update_requires_input_bounds() self.relus = relus self.get_optimizable_activations() self.splittable_activations = self.get_splittable_activations() self.perturbed_optimizable_activations = ( self.get_perturbed_optimizable_activations()) return def _check_patches_mode(self): """Disable patches mode if there is no Conv node. This is a workaround (before a more general patches mode is implemented) to avoid issues relevant to the patches node, for complicated models without any Conv. """ has_conv = False for node in self.nodes(): if isinstance(node, (BoundConv, BoundConvTranspose, BoundConv2dGrad)): has_conv = True if not has_conv and self.conv_mode == 'patches': self.conv_mode = 'matrix' for node in self.nodes(): if getattr(node, 'mode', None) == 'patches': node.mode = 'matrix' def _clear_and_set_new( self, interm_bounds, clear_forward_only=False, reset_perturbed_nodes=True, cache_bounds=False, ): for l in self.nodes(): if hasattr(l, 'linear'): if isinstance(l.linear, tuple): for item in l.linear: del item delattr(l, 'linear') if hasattr(l, 'patch_size'): l.patch_size = {} if clear_forward_only: if hasattr(l, 'forward_value'): delattr(l, 'forward_value') else: for attr in ['interval', 'forward_value', 'd', 'lA', 'lower_d', 'upper_k']: if hasattr(l, attr): delattr(l, attr) if cache_bounds: l.move_lower_and_upper_bounds_to_cache() else: l.delete_lower_and_upper_bounds() for attr in ['zero_backward_coeffs_l', 'zero_backward_coeffs_u', 'zero_lA_mtx', 'zero_uA_mtx']: setattr(l, attr, False) # Given an interval here to make IBP/CROWN start from this node if interm_bounds is not None and l.name in interm_bounds.keys(): l.interval = tuple(interm_bounds[l.name][:2]) l.lower = interm_bounds[l.name][0] l.upper = interm_bounds[l.name][1] if l.lower is not None: l.lower = l.lower.detach().requires_grad_(False) if l.upper is not None: l.upper = l.upper.detach().requires_grad_(False) # Mark all nodes as non-perturbed except for weights. if reset_perturbed_nodes: if not hasattr(l, 'perturbation') or l.perturbation is None: l.perturbed = False # Clear operator-specific attributes l.clear() def set_input( self, *x, interm_bounds=None, clear_forward_only=False, reset_perturbed_nodes=True, cache_bounds=False, ): self._clear_and_set_new( interm_bounds=interm_bounds, clear_forward_only=clear_forward_only, reset_perturbed_nodes=reset_perturbed_nodes, cache_bounds=cache_bounds, ) inputs_unpacked = unpack_inputs(x) for name, index in zip(self.input_name, self.input_index): if index is None: continue node = self[name] node.value = inputs_unpacked[index] if isinstance(node.value, (BoundedTensor, BoundedParameter)): node.perturbation = node.value.ptb else: node.perturbation = None # Mark all perturbed nodes. if reset_perturbed_nodes: self._mark_perturbed_nodes(inputs_unpacked) def _get_node_input(self, nodesOP, nodesIn, node): ret = [] for i in range(len(node.inputs)): for op in nodesOP: if op.name == node.inputs[i]: ret.append(op.bound_node) break if len(ret) == i + 1: continue for io in nodesIn: if io.name == node.inputs[i]: ret.append(io.bound_node) break if len(ret) <= i: raise ValueError(f'cannot find inputs of node: {node.name}') return ret def _to(self, obj, dest, inplace=False): """ Move all tensors in the object to a specified dest (device or dtype). The inplace=True option is available for dict.""" if obj is None: return obj elif isinstance(obj, torch.Tensor): return obj.to(dest) elif isinstance(obj, Patches): return obj.patches.to(dest) elif isinstance(obj, tuple): return tuple([self._to(item, dest) for item in obj]) elif isinstance(obj, list): return list([self._to(item, dest) for item in obj]) elif isinstance(obj, dict): if inplace: for k, v in obj.items(): obj[k] = self._to(v, dest, inplace=True) return obj else: return {k: self._to(v, dest) for k, v in obj.items()} else: raise NotImplementedError(type(obj)) def _convert_nodes(self, model, global_input): r""" Returns: nodesOP (list): List of operator nodes nodesIn (list): List of input nodes nodesOut (list): List of output nodes template (object): Template to specify the output format """ global_input_cpu = self._to(global_input, 'cpu') if self.ori_training: model.train() else: model.eval() model.to('cpu') nodesOP, nodesIn, nodesOut, template = parse_module( model, global_input_cpu) model.to(self.device) for i in range(0, len(nodesIn)): if nodesIn[i].param is not None: nodesIn[i] = nodesIn[i]._replace( param=nodesIn[i].param.to(self.device)) # Convert input nodes and parameters. attr = {'device': self.device} for i, n in enumerate(nodesIn): if n.input_index is not None: nodesIn[i] = nodesIn[i]._replace(bound_node=BoundInput( ori_name=nodesIn[i].ori_name, value=global_input[nodesIn[i].input_index], perturbation=nodesIn[i].perturbation, input_index=n.input_index, options=self.bound_opts, attr=attr)) else: bound_class = BoundParams if isinstance( nodesIn[i].param, nn.Parameter) else BoundBuffers nodesIn[i] = nodesIn[i]._replace(bound_node=bound_class( ori_name=nodesIn[i].ori_name, value=nodesIn[i].param, perturbation=nodesIn[i].perturbation, options=self.bound_opts, attr=attr)) unsupported_ops = [] # Convert other operation nodes. for n in range(len(nodesOP)): attr = nodesOP[n].attr inputs = self._get_node_input(nodesOP, nodesIn, nodesOP[n]) try: if nodesOP[n].op in self.custom_ops: op = self.custom_ops[nodesOP[n].op] elif nodesOP[n].op in bound_op_map: op = bound_op_map[nodesOP[n].op] elif nodesOP[n].op.startswith('aten::ATen'): op = globals()[f'BoundATen{attr["operator"].capitalize()}'] elif nodesOP[n].op.startswith('onnx::'): op = globals()[f'Bound{nodesOP[n].op[6:]}'] else: raise KeyError except (NameError, KeyError): unsupported_ops.append(nodesOP[n]) logger.error('The node has an unsupported operation: %s', nodesOP[n]) continue attr['device'] = self.device # FIXME generalize if (nodesOP[n].op == 'onnx::BatchNormalization' or getattr(op, 'TRAINING_FLAG', False)): # BatchNormalization node needs model.training flag to set # running mean and vars set training=False to avoid wrongly # updating running mean/vars during bound wrapper nodesOP[n] = nodesOP[n]._replace(bound_node=op( attr, inputs, nodesOP[n].output_index, self.bound_opts, False)) else: nodesOP[n] = nodesOP[n]._replace(bound_node=op( attr, inputs, nodesOP[n].output_index, self.bound_opts)) if unsupported_ops: logger.error('Unsupported operations:') for n in unsupported_ops: logger.error(f'Name: {n.op}, Attr: {n.attr}') raise NotImplementedError('There are unsupported operations') for node in nodesIn + nodesOP: node.bound_node.name = node.name nodes_dict = {} for node in nodesOP + nodesIn: nodes_dict[node.name] = node.bound_node nodesOP = [n.bound_node for n in nodesOP] nodesIn = [n.bound_node for n in nodesIn] nodesOut = [nodes_dict[n] for n in nodesOut] return nodesOP, nodesIn, nodesOut, template def _build_graph(self, nodesOP, nodesIn, nodesOut, template): # We were assuming that the original model had only one output node. assert len(nodesOut) == 1 self.final_name = nodesOut[0].name self.input_name, self.input_index, self.root_names = [], [], [] self.output_name = [n.name for n in nodesOut] self.output_template = template self._modules.clear() for node in nodesIn: self.add_input_node(node, index=node.input_index) self.add_nodes(nodesOP) if self.conv_mode == 'patches': self.root_names: List[str] = [node.name for node in nodesIn] def rename_nodes(self, nodesOP, nodesIn, rename_dict): def rename(node): node.name = rename_dict[node.name] return node for i in range(len(nodesOP)): nodesOP[i] = rename(nodesOP[i]) for i in range(len(nodesIn)): nodesIn[i] = rename(nodesIn[i]) def _split_complex(self, nodesOP, nodesIn): finished = True for n in range(len(nodesOP)): if hasattr(nodesOP[n], 'complex') and nodesOP[n].complex: complex_node = nodesOP[n] finished = False _nodesOP, _nodesIn, _nodesOut, _ = self._convert_nodes( nodesOP[n].model, nodesOP[n].input) # assuming each supported complex operation only has one output assert len(_nodesOut) == 1 name_base = nodesOP[n].name + '/split' rename_dict = {} for node in _nodesOP + _nodesIn: rename_dict[node.name] = name_base + node.name num_inputs = len(nodesOP[n].inputs) for i in range(num_inputs): rename_dict[_nodesIn[i].name] = nodesOP[n].input_name[i] rename_dict[_nodesOP[-1].name] = nodesOP[n].name self.rename_nodes(_nodesOP, _nodesIn, rename_dict) output_name = _nodesOP[-1].name # Any input node of some node within the complex node should be # replaced with the complex node's corresponding input node. for node in _nodesOP: for i in range(len(node.inputs)): if node.input_name[i] in nodesOP[n].input_name: index = nodesOP[n].input_name.index( node.input_name[i]) node.inputs[i] = nodesOP[n].inputs[index] # For any output node of this complex node, # modify its input node. for node in nodesOP: if output_name in node.input_name: index = node.input_name.index(output_name) node.inputs[index] = _nodesOP[-1] # Mark where the nodes come from for node in _nodesOP: node.from_complex_node = type(complex_node).__name__ nodesOP = nodesOP[:n] + _nodesOP + nodesOP[(n + 1):] nodesIn = nodesIn + _nodesIn[num_inputs:] break return nodesOP, nodesIn, finished def _get_node_name_map(self): """Build a dict with {ori_name: name, name: ori_name}""" self.node_name_map = {} for node in self.nodes(): if isinstance(node, (BoundInput, BoundParams)): for p in list(node.named_parameters()): if node.ori_name not in self.node_name_map: name = f'{node.name}.{p[0]}' self.node_name_map[node.ori_name] = name self.node_name_map[name] = node.ori_name for p in list(node.named_buffers()): if node.ori_name not in self.node_name_map: name = f'{node.name}.{p[0]}' self.node_name_map[node.ori_name] = name self.node_name_map[name] = node.ori_name # convert a Pytorch model to a model with bounds def _convert(self, model, global_input): if self.verbose: logger.info('Converting the model...') self.num_global_inputs = len(global_input) nodesOP, nodesIn, nodesOut, template = self._convert_nodes( model, global_input) global_input = self._to(global_input, self.device) while True: self._build_graph(nodesOP, nodesIn, nodesOut, template) self.forward(*global_input) # running means/vars changed nodesOP, nodesIn, finished = self._split_complex(nodesOP, nodesIn) if finished: break self._get_node_name_map() ori_state_dict_mapped = OrderedDict() for k, v in self.ori_state_dict.items(): if k in self.node_name_map: ori_state_dict_mapped[self.node_name_map[k]] = v self.load_state_dict(ori_state_dict_mapped) if self.ori_training: model.load_state_dict(self.ori_state_dict) delattr(self, 'ori_state_dict') # The name of the final node used in the last call to `compute_bounds` self.last_final_node_name = None if self.verbose: logger.info('Model converted to support bounds') def check_prior_bounds(self, node, C=None): if node.prior_checked or not (node.used and node.perturbed): return if C is not None and isinstance(node, BoundConcat): # If the last node is a BoundConcat, it's possible that only some of # the input nodes of the BoundConcat are needed in the specification. # In this case, we only check the bounds of the input nodes that are # actually used in the specification. All other branches are # considered as not used, and their bounds are not checked. # FIXME: In this case, node.used of some nodes may be incorrect. offset = 0 assert isinstance(C, torch.Tensor) and C.ndim == 3 C = C.abs().sum(dim=[0, 1]) for node_input in node.inputs: size = prod(node_input.output_shape[1:]) C_s = C[offset:offset+size].sum() if (C_s != 0).any(): self.check_prior_bounds(node_input) offset += size else: for n in node.inputs: self.check_prior_bounds(n) tighten_input_bounds = ( self.bound_opts['optimize_bound_args']['tighten_input_bounds'] ) directly_optimize_layer_names = ( self.bound_opts['optimize_bound_args']['directly_optimize'] ) bound_every_node = ( self.bound_opts['bound_every_node'] ) for i in range(len(node.inputs)): if ( i in node.requires_input_bounds or not node.inputs[i].perturbed or node.inputs[i].name in self.layers_with_constraint # allows to tighten input bounds or (isinstance(node.inputs[i], BoundInput) and tighten_input_bounds) # layers whos optimization is forced # (for consecutive layers introduced as part of invprop) or node.inputs[i].name in directly_optimize_layer_names or bound_every_node ): self.compute_intermediate_bounds( node.inputs[i], prior_checked=True) node.prior_checked = True def compute_intermediate_bounds(self, node: Bound, prior_checked=False): tighten_input_bounds = ( self.bound_opts['optimize_bound_args']['tighten_input_bounds'] ) directly_optimize_layer_names = ( self.bound_opts['optimize_bound_args']['directly_optimize'] ) best_of_oc_and_no_oc = ( self.bound_opts['optimize_bound_args']['best_of_oc_and_no_oc'] ) if ( node.is_lower_bound_current() and not ( isinstance(node, BoundInput) and tighten_input_bounds or node.name in directly_optimize_layer_names ) ): if node.name in self.layers_with_constraint: node.clamp_interim_bounds() return logger.debug(f'Getting the bounds of {node}') if not prior_checked: self.check_prior_bounds(node) if not node.perturbed: fv = self.get_forward_value(node) node.interval = node.lower, node.upper = fv, fv return # FIXME check that weight perturbation is not affected # (from_input=True should be set for weights) if not node.from_input and hasattr(node, 'forward_value'): node.lower = node.upper = self.get_forward_value(node) return reference_bounds = self.reference_bounds if self.use_forward: # forward node.lower, node.upper = self.forward_general( node=node, concretize=True) else: # backward if self.check_IBP_intermediate(node): # Intermediate bounds for some operators are directly # computed from their input nodes by IBP # (such as BoundRelu, BoundNeg) logger.debug('IBP propagation for intermediate bounds on %s', node) # For the first linear layer, IBP can give the same tightness as CROWN. elif not self.check_IBP_first_linear(node): ref_intermediate = self.get_ref_intermediate_bounds(node) sparse_C = self.get_sparse_C(node, ref_intermediate) newC, reduced_dim, unstable_idx, unstable_size = sparse_C # Special case for BoundRelu when sparse intermediate bounds are disabled # Currently sparse intermediate bounds are restricted to ReLU models only skip = False if unstable_idx is None: if (len(node.output_name) == 1 and isinstance(self[node.output_name[0]], BoundTwoPieceLinear) and node.name in self.reference_bounds): lower, upper = self.reference_bounds[node.name] fully_stable = torch.logical_or(lower>=0, upper<=0).all() if fully_stable: node.lower, node.upper = lower, upper skip = True elif unstable_size == 0: skip = True if not skip: apply_output_constraints_to = self.bound_opts[ 'optimize_bound_args']['apply_output_constraints_to'] if self.return_A: node.lower, node.upper, _ = self.backward_general( node, newC, unstable_idx=unstable_idx, apply_output_constraints_to=apply_output_constraints_to) else: # Compute backward bounds only when there are unstable # neurons, or when we don't know which neurons are unstable. node.lower, node.upper = self.backward_general( node, newC, unstable_idx=unstable_idx, apply_output_constraints_to=apply_output_constraints_to) if torch.any((node.upper - node.lower).abs() > 1e10): if len(apply_output_constraints_to) > 0 and not best_of_oc_and_no_oc: warnings.warn('Very weak bounds detected. This can potentially be ' 'fixed by setting best_of_oc_and_no_oc=True.') if reduced_dim: self.restore_sparse_bounds( node, unstable_idx, unstable_size, ref_intermediate) if self.bound_opts['compare_crown_with_ibp']: node.lower, node.upper = self.compare_with_IBP(node, node.lower, node.upper) # node.lower and node.upper (intermediate bounds) are computed in # the above function. If we have bound references, we set them here # to always obtain a better set of bounds. if node.name in reference_bounds: ref_bounds = reference_bounds[node.name] # Initially, the reference bound and the computed bound can be # exactly the same when intermediate layer beta is 0. This will # prevent gradients flow. So we need a small guard here. # Set the intermediate layer bounds using reference bounds, # always choosing the tighter one. # Assert no NaNs in reference bounds before using them assert not torch.isnan(ref_bounds[0]).any(), ( f'NaN detected in reference lower bound of layer {node.name}') node.lower = (torch.max(ref_bounds[0], node.lower).detach() - node.lower.detach() + node.lower) assert not torch.isnan(ref_bounds[1]).any(), ( f'NaN detected in reference upper bound of layer {node.name}') node.upper = (node.upper - (node.upper.detach() - torch.min(ref_bounds[1], node.upper).detach())) # Also update bounds in node.linear (if exist) if hasattr(node, 'linear'): node.linear.lower = node.lower node.linear.upper = node.upper # Otherwise, we only use reference bounds to check which neurons # are unstable. # prior constraint bounds if node.name in self.layers_with_constraint: node.clamp_interim_bounds() # FIXME (12/28): we should be consistent, and only use # node.interval, do not use node.lower or node.upper! node.interval = (node.lower, node.upper) def get_ref_intermediate_bounds(self, node): sparse_intermediate_bounds_with_ibp = self.bound_opts.get( 'sparse_intermediate_bounds_with_ibp', True) # Sparse intermediate bounds can be enabled # if aux_reference_bounds are given. # (this is enabled for ReLU only, and not for other activations.) sparse_intermediate_bounds = (self.bound_opts.get( 'sparse_intermediate_bounds', False) and isinstance(self[node.output_name[0]], BoundRelu)) ref_intermediate_lb, ref_intermediate_ub = None, None if sparse_intermediate_bounds: if node.name not in self.aux_reference_bounds: # If aux_reference_bounds are not available, # we can use IBP to compute these bounds. if sparse_intermediate_bounds_with_ibp: with torch.no_grad(): # Get IBP bounds for this layer; # we set delete_bounds_after_use=True which does # not save extra intermediate bound tensors. ret_ibp = self.IBP_general( node=node, delete_bounds_after_use=True) ref_intermediate_lb = ret_ibp[0] ref_intermediate_ub = ret_ibp[1] else: sparse_intermediate_bounds = False else: aux_bounds = self.aux_reference_bounds[node.name] ref_intermediate_lb, ref_intermediate_ub = aux_bounds return sparse_intermediate_bounds, ref_intermediate_lb, ref_intermediate_ub def merge_A_dict(self, lA_dict, uA_dict): merged_A = {} for output_node_name in lA_dict: merged_A[output_node_name] = {} lA_dict_ = lA_dict[output_node_name] uA_dict_ = uA_dict[output_node_name] for input_node_name in lA_dict_: merged_A[output_node_name][input_node_name] = { 'lA': lA_dict_[input_node_name]['lA'], 'uA': uA_dict_[input_node_name]['uA'], 'lbias': lA_dict_[input_node_name]['lbias'], 'ubias': uA_dict_[input_node_name]['ubias'], } return merged_A def compute_bounds( self, x=None, aux=None, C=None, method='backward', IBP=False, forward=False, bound_lower=True, bound_upper=True, reuse_ibp=False, reuse_alpha=False, return_A=False, needed_A_dict=None, final_node_name=None, average_A=False, interm_bounds=None, reference_bounds=None, intermediate_constr=None, alpha_idx=None, aux_reference_bounds=None, need_A_only=False, cutter=None, decision_thresh=None, update_mask=None, ibp_nodes=None, cache_bounds=False): r"""Main function for computing bounds. Args: x (tuple or None): Input to the model. If it is None, the input from the last `forward` or `compute_bounds` call is reused. Otherwise: the number of elements in the tuple should be equal to the number of input nodes in the model, and each element in the tuple corresponds to the value for each input node respectively. It should look similar as the `global_input` argument when used for creating a `BoundedModule`. aux (object, optional): Auxliary information that can be passed to `Perturbation` classes for initializing and concretizing bounds, e.g., additional information for supporting synonym word subsitution perturbaiton. C (Tensor): The specification matrix that can map the output of the model with an additional linear layer. This is usually used for maping the logits output of the model to classification margins. method (str): The main method for bound computation. Choices: * `IBP`: purely use Interval Bound Propagation (IBP) bounds. * `CROWN-IBP`: use IBP to compute intermediate bounds, but use CROWN (backward mode LiRPA) to compute the bounds of the final node. * `CROWN`: purely use CROWN to compute bounds for intermediate nodes and the final node. * `Forward`: purely use forward mode LiRPA. * `Forward+Backward`: use forward mode LiRPA for intermediate nodes, but further use CROWN for the final node. * `CROWN-Optimized` or `alpha-CROWN`: use CROWN, and also optimize the linear relaxation parameters for activations. * `forward-optimized`: use forward bounds with optimized linear relaxation. * `dynamic-forward`: use dynamic forward bound propagation where new input variables may be dynamically introduced for nonlinearities. * `dynamic-forward+backward`: use dynamic forward mode for intermediate nodes, but use CROWN for the final node. IBP (bool, optional): If `True`, use IBP to compute the bounds of intermediate nodes. It can be automatically set according to `method`. forward (bool, optional): If `True`, use the forward mode bound propagation to compute the bounds of intermediate nodes. It can be automatically set according to `method`. bound_lower (bool, default `True`): If `True`, the lower bounds of the output needs to be computed. bound_upper (bool, default `True`): If `True`, the upper bounds of the output needs to be computed. reuse_ibp (bool, optional): If `True` and `method` is None, reuse the previously saved IBP bounds. final_node_name (str, optional): Set the final node in the computational graph for bound computation. By default, the final node of the originally built computational graph is used. return_A (bool, optional): If `True`, return linear coefficients in bound propagation (`A` tensors) with `needed_A_dict` set. needed_A_dict (dict, optional): A dictionary specifying linear coefficients (`A` tensors) that are needed and should be returned. Each key in the dictionary is the name of a starting node in backward bound propagation, with a list as the value for the key, which specifies the names of the ending nodes in backward bound propagation, and the linear coefficients of the starting node w.r.t. the specified ending nodes are returned. By default, it is empty. reuse_alpha (bool, optional): If `True`, reuse previously saved alpha values when they are not being optimized. decision_thresh (float, optional): In CROWN-optimized mode, we will use this decision_thresh to dynamically optimize those domains that <= the threshold. interm_bounds: A dictionary of 2-element tuple/list containing lower and upper bounds for intermediate layers. The dictionary keys should include the names of the layers whose bounds should be set without recomputation. The layer names can be viewed by setting environment variable AUTOLIRPA_DEBUG=1. The values of each dictionary elements are (lower_bounds, upper_bounds) where "lower_bounds" and "upper_bounds" are two tensors with the same shape as the output shape of this layer. If you only need to set intermediate layer bounds for certain layers, then just include these layers' names in the dictionary. reference_bounds: Format is similar to "interm_bounds". However, these bounds are only used as a reference, and the bounds for intermediate layers will still be computed (e.g., using CROWN, IBP or other specified methods). The computed bounds will be compared to "reference_bounds" and the tighter one between the two will be used. aux_reference_bounds: Format is similar to intermediate layer bounds. However, these bounds are only used for determine which neurons are stable and which neurons are unstable for ReLU networks. Unstable neurons' intermediate layer bounds will be recomputed. cache_bounds: If `True`, the currently set lower and upper bounds will not be deleted, but cached for use by the INVPROP algorithm. This should not be set by the user, but only in `_get_optimized_bounds`. Returns: bound (tuple): When `return_A` is `False`, return a tuple of the computed lower bound and upper bound. When `return_A` is `True`, return a tuple of lower bound, upper bound, and `A` dictionary. """ # This method only prepares everything by setting all required parameters. # The main logic is located in `_compute_bounds_main`. It may be called # repeatedly for CROWN optimizations. logger.debug(f'Compute bounds with {method}') if needed_A_dict is None: needed_A_dict = {} if not bound_lower and not bound_upper: raise ValueError( 'At least one of bound_lower and bound_upper must be True') # Several shortcuts. compute_optimized = False method = method.lower() if method is not None else method if method == 'ibp': # Pure IBP bounds. method, IBP = None, True elif method in ['ibp+backward', 'ibp+crown', 'crown-ibp']: method, IBP = 'backward', True elif method == 'crown': method = 'backward' elif method == 'forward': forward = True self.dynamic = False elif method == 'dynamic-forward': forward = True self.dynamic = True elif method == 'forward+backward' or method == 'forward+crown': method, forward = 'backward', True elif method == 'dynamic-forward+backward' or method == 'dynamic-forward+crown': self.dynamic = True method, forward = 'backward', True elif method in ['crown-optimized', 'alpha-crown', 'forward-optimized']: # Lower and upper bounds need two separate rounds of optimization. if method == 'forward-optimized': method = 'forward' else: method = 'backward' compute_optimized = True if reference_bounds is None: reference_bounds = {} if aux_reference_bounds is None: aux_reference_bounds = {} # If y in self.backward_node_pairs[x], then node y is visited when # doing backward bound propagation starting from node x. self.backward_from = dict([(node, []) for node in self._modules]) if not bound_lower and not bound_upper: raise ValueError( 'At least one of bound_lower and bound_upper in compute_bounds ' 'should be True') A_dict = {} if return_A else None if x is not None: if isinstance(x, torch.Tensor): x = (x,) if self.bound_opts['forward_before_compute_bounds']: self.forward(*x, interm_bounds=interm_bounds, cache_bounds=cache_bounds) else: self.set_input(*x, interm_bounds=interm_bounds, cache_bounds=cache_bounds) roots = self.roots() batch_size = roots[0].value.shape[0] dim_in = 0 for i in range(len(roots)): value = roots[i].forward() if getattr(roots[i], 'perturbation', None) is not None: ret_init = roots[i].perturbation.init( value, aux=aux, forward=forward) roots[i].linear, roots[i].center, roots[i].aux = ret_init # This input/parameter has perturbation. # Create an interval object. roots[i].interval = Interval( roots[i].linear.lower, roots[i].linear.upper, ptb=roots[i].perturbation) if forward: roots[i].dim = roots[i].linear.lw.shape[1] dim_in += roots[i].dim else: # This input/parameter does not has perturbation. # Use plain tuple defaulting to Linf perturbation. roots[i].interval = (value, value) roots[i].forward_value = roots[i].value = value roots[i].center = roots[i].lower = roots[i].upper = value roots[i].lower, roots[i].upper = roots[i].interval if forward: self.init_forward(roots, dim_in) for n in self.nodes(): if isinstance(n, BoundRelu): for node in n.inputs: if hasattr(node, 'relu_followed'): node.relu_followed = True # Inject update mask inside the activations # update_mask: None or bool tensor([batch_size]) # If set to a tensor, only update the alpha and beta of selected # element (with element=1). n.alpha_beta_update_mask = update_mask final = (self.final_node() if final_node_name is None else self[final_node_name]) # BFS to find out whether each node is used given the current final node self._set_used_nodes(final) self.use_forward = forward self.batch_size = batch_size self.dim_in = dim_in self.return_A = return_A self.A_dict = A_dict self.needed_A_dict = needed_A_dict self.intermediate_constr = intermediate_constr self.reference_bounds = reference_bounds self.aux_reference_bounds = aux_reference_bounds self.final_node_name = final.name self.ibp_nodes = ibp_nodes if compute_optimized: kwargs = dict(x=x, C=C, method=method, interm_bounds=interm_bounds, reference_bounds=reference_bounds, return_A=return_A, aux_reference_bounds=aux_reference_bounds, needed_A_dict=needed_A_dict, final_node_name=final_node_name, cutter=cutter, decision_thresh=decision_thresh) if bound_upper: ret2 = self._get_optimized_bounds(bound_side='upper', **kwargs) else: ret2 = None if bound_lower: ret1 = self._get_optimized_bounds(bound_side='lower', **kwargs) else: ret1 = None if bound_lower and bound_upper: if return_A: # Needs to merge the A dictionary. return ret1[0], ret2[1], self.merge_A_dict(ret1[2], ret2[2]) else: return ret1[0], ret2[1] elif bound_lower: return ret1 # ret1[1] is None. elif bound_upper: return ret2 # ret2[0] is None. return self._compute_bounds_main(C=C, method=method, IBP=IBP, bound_lower=bound_lower, bound_upper=bound_upper, reuse_ibp=reuse_ibp, reuse_alpha=reuse_alpha, average_A=average_A, alpha_idx=alpha_idx, need_A_only=need_A_only, update_mask=update_mask) def save_intermediate(self, save_path=None): r"""A function for saving intermediate bounds. Please call this function after `compute_bounds`, or it will output IBP bounds by default. Args: save_path (str, default `None`): If `None`, the intermediate bounds will not be saved, or it will be saved at the designated path. Returns: save_dict (dict): Return a dictionary of lower and upper bounds, with the key being the name of the layer. """ save_dict = OrderedDict() for node in self.nodes(): if node.used and node.perturbed: if not hasattr(node, 'interval'): ibp_lower, ibp_upper = self.IBP_general(node, delete_bounds_after_use=True) dim_output = int(prod(node.output_shape[1:])) C = torch.eye(dim_output, device=self.device).expand( self.batch_size, dim_output, dim_output) crown_lower, crown_upper = self.backward_general(node, C=C) save_dict[node.name] = ( torch.max(crown_lower, ibp_lower), torch.min(crown_upper, ibp_upper)) else: save_dict[node.name] = (node.lower, node.upper) if save_path is not None: torch.save(save_dict, save_path) return save_dict def _compute_bounds_main(self, C=None, method='backward', IBP=False, bound_lower=True, bound_upper=True, reuse_ibp=False, reuse_alpha=False, average_A=False, alpha_idx=None, need_A_only=False, update_mask=None): """The core implementation of compute_bounds. Seperated because compute_bounds may call _get_optimized_bounds which repeatedly calls this method. Otherwise, the preprocessing done in compute_bounds would be executed for each iteration. """ final = (self.final_node() if self.final_node_name is None else self[self.final_node_name]) logger.debug(f'Final node {final.__class__.__name__}({final.name})') if IBP and method is None and reuse_ibp: # directly return the previously saved ibp bounds return self.ibp_lower, self.ibp_upper if IBP: self.ibp_lower, self.ibp_upper = self.IBP_general(node=final, C=C) if method is None: return self.ibp_lower, self.ibp_upper # TODO: if compute_bounds is called with a method that causes alphas to be # optimized, C will be allocated in each iteration. We could allocate it once # in compute_bounds, but e.g. `IBP_general` and code in `_get_optimized_bounds` # relies on the fact that it can be None if C is None: # C is an identity matrix by default if final.output_shape is None: raise ValueError( f'C is not missing while node {final} has no default shape') dim_output = int(prod(final.output_shape[1:])) # TODO: use an eyeC object here. C = torch.eye(dim_output, device=self.device).expand( self.batch_size, dim_output, dim_output) # Reuse previously saved alpha values, # even if they are not optimized now # This must be done here instead of `compute_bounds`, as other code might change # it (e.g. `_get_optimized_bounds`) if reuse_alpha: self.opt_reuse() else: self.opt_no_reuse() for node in self.nodes(): # All nodes may need to be recomputed node.prior_checked = False self.check_prior_bounds(final, C=C) if method == 'backward': apply_output_constraints_to = ( self.bound_opts['optimize_bound_args']['apply_output_constraints_to'] ) # This is for the final output bound. # No need to pass in intermediate layer beta constraints. ret = self.backward_general( final, C, bound_lower=bound_lower, bound_upper=bound_upper, average_A=average_A, need_A_only=need_A_only, unstable_idx=alpha_idx, update_mask=update_mask, apply_output_constraints_to=apply_output_constraints_to) if self.bound_opts['compare_crown_with_ibp']: new_lower, new_upper = self.compare_with_IBP(final, lower=ret[0], upper=ret[1], C=C) ret = (new_lower, new_upper) + ret[2:] # FIXME when C is specified, lower and upper should not be saved to # final.lower and final.upper, because they are not the bounds for # the node. final.lower, final.upper = ret[0], ret[1] return ret elif method == 'forward' or method == 'dynamic-forward': return self.forward_general(C=C, node=final, concretize=True) else: raise NotImplementedError def _set_used_nodes(self, final): # By default, all *.used are initialized to False. # We set the used nodes by BFS from the final node. if final.name != self.last_final_node_name: self.last_final_node_name = final.name final.used = True queue = deque([final]) while len(queue) > 0: n = queue.popleft() for n_pre in n.inputs: if not n_pre.used: n_pre.used = True queue.append(n_pre) # Based on "used" and "perturbed" properties, find out which # layer requires intermediate layer bounds. self.layers_requiring_bounds = self.get_layers_requiring_bounds() def init_infeasible_bounds_constraints(self, batchsize, device): '''Simply initialize the infeasible bound record.''' self.infeasible_bounds_constraints = torch.full((batchsize,), False, device=device) from .interval_bound import ( IBP_general, _IBP_loss_fusion, check_IBP_intermediate, check_IBP_first_linear, compare_with_IBP) from .forward_bound import ( forward_general, forward_general_dynamic, forward_refinement, init_forward) from .backward_bound import ( backward_general, get_sparse_C, check_optimized_variable_sparsity, restore_sparse_bounds, get_alpha_crown_start_nodes, get_unstable_locations, batched_backward, _preprocess_C) from .output_constraints import ( backward_general_with_output_constraint, invprop_enabled, backward_general_invprop, invprop_init_infeasible_bounds, invprop_check_infeasible_bounds) from .optimized_bounds import ( _get_optimized_bounds, init_alpha, update_best_beta, opt_reuse, opt_no_reuse, _to_float64, _to_default_dtype) from .beta_crown import (beta_crown_backward_bound, reset_beta, set_beta, set_beta_cuts, get_split_nodes) from .jacobian import (compute_jacobian_bounds, _expand_jacobian) from .optimize_graph import _optimize_graph from .edit_graph import add_nodes, add_input_node, delete_node, replace_node from .tools import visualize from .concretize_bounds import ( concretize_bounds, concretize_root, backward_concretize, forward_concretize) from .solver_module import ( build_solver_module, _build_solver_input, _build_solver_general, _reset_solver_vars, _reset_solver_model) ================================================ FILE: auto_LiRPA/bound_multi_gpu.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from torch.nn import DataParallel from .perturbations import * from .bounded_tensor import BoundedTensor from itertools import chain class BoundDataParallel(DataParallel): # https://github.com/huanzhang12/CROWN-IBP/blob/master/bound_layers.py # This is a customized DataParallel class for our project def __init__(self, *inputs, **kwargs): super(BoundDataParallel, self).__init__(*inputs, **kwargs) self._replicas = None # Overide the forward method def forward(self, *inputs, **kwargs): disable_multi_gpu = False # forward by single GPU no_replicas = False # forward by multi GPUs but without replicate if "disable_multi_gpu" in kwargs: disable_multi_gpu = kwargs["disable_multi_gpu"] kwargs.pop("disable_multi_gpu") if "no_replicas" in kwargs: no_replicas = kwargs["no_replicas"] kwargs.pop("no_replicas") if not self.device_ids or disable_multi_gpu: if kwargs.pop("get_property", False): return self.get_property(self, *inputs, **kwargs) return self.module(*inputs, **kwargs) if kwargs.pop("get_property", False): if self._replicas is None: assert 0, 'please call IBP/CROWN before get_property' if len(self.device_ids) == 1: return self.get_property(self.module, **kwargs) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) kwargs = list(kwargs) for i in range(len(kwargs)): kwargs[i]['model'] = self._replicas[i] outputs = self.parallel_apply([self.get_property] * len(kwargs), inputs, kwargs) return self.gather(outputs, self.output_device) # Only replicate during forward/IBP propagation. Not during interval bounds # and CROWN-IBP bounds, since weights have not been updated. This saves 2/3 # of communication cost. if not no_replicas: if self._replicas is None: # first time self._replicas = self.replicate(self.module, self.device_ids) elif kwargs.get("method_opt", "forward") == "forward": self._replicas = self.replicate(self.module, self.device_ids) elif kwargs.get("x") is not None and kwargs.get("IBP") is True: # self._replicas = self.replicate(self.module, self.device_ids) # Update the input nodes to the ones within each replica respectively for bounded_module in self._replicas: for node in bounded_module._modules.values(): node.inputs = [bounded_module[name] for name in node.input_name] for t in chain(self.module.parameters(), self.module.buffers()): if t.device != self.src_device_obj: raise RuntimeError("module must have its parameters and buffers " "on device {} (device_ids[0]) but found one of " "them on device: {}".format(self.src_device_obj, t.device)) # TODO: can be done in parallel, only support same ptb for all inputs per forward/IBP propagation if len(inputs) > 0 and hasattr(inputs[0], 'ptb') and inputs[0].ptb is not None: # compute bounds without x # inputs_scatter is a normal tensor, we need to assign ptb to it if inputs is a BoundedTensor inputs_scatter, kwargs = self.scatter((inputs, inputs[0].ptb.x_L, inputs[0].ptb.x_U), kwargs, self.device_ids) # inputs_scatter = inputs_scatter[0] bounded_inputs = [] for input_s in inputs_scatter: # GPU numbers # FIXME other perturbations are not supported yet assert isinstance(inputs[0].ptb, PerturbationLpNorm) ptb = PerturbationLpNorm(norm=inputs[0].ptb.norm, eps=inputs[0].ptb.eps, x_L=input_s[1], x_U=input_s[2]) input_s = list(input_s[0]) input_s[0] = BoundedTensor(input_s[0], ptb) input_s = tuple(input_s) bounded_inputs.append(input_s) # bounded_inputs = tuple(bounded_inputs) elif kwargs.get("x") is not None and hasattr(kwargs.get("x")[0], 'ptb') and kwargs.get("x")[0].ptb is not None: # compute bounds with x # kwargs['x'] is a normal tensor, we need to assign ptb to it x = kwargs.get("x")[0] bounded_inputs = [] inputs_scatter, kwargs = self.scatter((inputs, x.ptb.x_L, x.ptb.x_U), kwargs, self.device_ids) for input_s, kw_s in zip(inputs_scatter, kwargs): # GPU numbers # FIXME other perturbations are not supported yet assert isinstance(x.ptb, PerturbationLpNorm) ptb = PerturbationLpNorm(norm=x.ptb.norm, eps=x.ptb.eps, x_L=input_s[1], x_U=input_s[2]) kw_s['x'] = list(kw_s['x']) kw_s['x'][0] = BoundedTensor(kw_s['x'][0], ptb) kw_s['x'] = (kw_s['x']) bounded_inputs.append(tuple(input_s[0], )) else: # normal forward inputs_scatter, kwargs = self.scatter(inputs, kwargs, self.device_ids) bounded_inputs = inputs_scatter if len(self.device_ids) == 1: return self.module(*bounded_inputs[0], **kwargs[0]) outputs = self.parallel_apply(self._replicas[:len(bounded_inputs)], bounded_inputs, kwargs) return self.gather(outputs, self.output_device) @staticmethod def get_property(model, node_class=None, att_name=None, node_name=None): if node_name: # Find node by name # FIXME If we use `model.named_modules()`, the nodes have the # `BoundedModule` type rather than bound nodes. for node in model._modules.values(): if node.name == node_name: return getattr(node, att_name) else: # Find node by class for _, node in model.named_modules(): # Find the Exp neuron in computational graph if isinstance(node, node_class): return getattr(node, att_name) def state_dict(self, destination=None, prefix='', keep_vars=False): # add 'module.' here before each keys in self.module.state_dict() if needed return self.module.state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars) def _named_members(self, get_members_fn, prefix='', recurse=True, remove_duplicate: bool = True): return self.module._named_members(get_members_fn, prefix, recurse, remove_duplicate) def __getitem__(self, name): return self.module[name] ================================================ FILE: auto_LiRPA/bound_op_map.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .bound_ops import * bound_op_map = { 'onnx::Gemm': BoundLinear, 'prim::Constant': BoundPrimConstant, 'grad::Concat': BoundConcatGrad, 'grad::Relu': BoundReluGrad, 'grad::Conv2d': BoundConv2dGrad, 'grad::Slice': BoundSliceGrad, 'grad::Sqr': BoundSqr, 'grad::jacobian': BoundJacobianOP, 'grad::Tanh': BoundTanhGrad, 'grad::Sigmoid': BoundSigmoidGrad, 'custom::Gelu': BoundGelu, 'onnx::Clip': BoundHardTanh } def register_custom_op(op_name: str, bound_obj: Bound) -> None: bound_op_map[op_name] = bound_obj def unregister_custom_op(op_name: str) -> None: bound_op_map.pop(op_name) ================================================ FILE: auto_LiRPA/bound_ops.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .operators import * ================================================ FILE: auto_LiRPA/bounded_tensor.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import copy import torch.nn as nn from torch import Tensor import torch._C as _C class BoundedTensor(Tensor): @staticmethod # We need to override the __new__ method since Tensor is a C class def __new__(cls, x, ptb=None, *args, **kwargs): if isinstance(x, Tensor): tensor = super().__new__(cls, [], *args, **kwargs) tensor.data = x.data tensor.requires_grad = x.requires_grad return tensor else: return super().__new__(cls, x, *args, **kwargs) def __init__(self, x, ptb=None): self.ptb = ptb def __repr__(self): if hasattr(self, 'ptb') and self.ptb is not None: return ''.format(super().__repr__(), self.ptb.__repr__()) else: return ''.format(super().__repr__()) def clone(self, *args, **kwargs): tensor = BoundedTensor(super().clone(*args, **kwargs), copy.deepcopy(self.ptb)) return tensor def _func(self, func, *args, **kwargs): temp = func(*args, **kwargs) new_obj = BoundedTensor([], self.ptb) new_obj.data = temp.data new_obj.requires_grad = temp.requires_grad return new_obj # Copy to other devices with perturbation def to(self, *args, **kwargs): # FIXME add a general "to" function in perturbation class, not here. if hasattr(self.ptb, 'x_L') and isinstance(self.ptb.x_L, Tensor): self.ptb.x_L = self.ptb.x_L.to(*args, **kwargs) if hasattr(self.ptb, 'x_U') and isinstance(self.ptb.x_U, Tensor): self.ptb.x_U = self.ptb.x_U.to(*args, **kwargs) if hasattr(self.ptb, 'eps') and isinstance(self.ptb.eps, Tensor): self.ptb.eps = self.ptb.eps.to(*args, **kwargs) return self._func(super().to, *args, **kwargs) @classmethod def _convert(cls, ret): if cls is Tensor: return ret if isinstance(ret, Tensor): if True: # The current implementation does not seem to need non-leaf BoundedTensor return ret else: # Enable this branch if non-leaf BoundedTensor should be kept ret = ret.as_subclass(cls) if isinstance(ret, tuple): ret = tuple(cls._convert(r) for r in ret) return ret @classmethod def __torch_function__(cls, func, types, args=(), kwargs=None): if kwargs is None: kwargs = {} if not all(issubclass(cls, t) for t in types): return NotImplemented with _C.DisableTorchFunction(): ret = func(*args, **kwargs) return cls._convert(ret) class BoundedParameter(nn.Parameter): def __new__(cls, data, ptb, requires_grad=True): return BoundedTensor._make_subclass(cls, data, requires_grad) def __init__(self, data, ptb, requires_grad=True): self.ptb = ptb self.requires_grad = requires_grad def __deepcopy__(self, memo): if id(self) in memo: return memo[id(self)] else: result = type(self)(self.data.clone(), self.ptb, self.requires_grad) memo[id(self)] = result return result def __repr__(self): return 'BoundedParameter containing:\n{}\n{}'.format( self.data.__repr__(), self.ptb.__repr__()) def __reduce_ex__(self, proto): raise NotImplementedError ================================================ FILE: auto_LiRPA/concretize_bounds.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch from .utils import eyeC from .bound_ops import * from .patches import Patches from .perturbations import PerturbationLpNorm from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule def concretize_bounds( self: 'BoundedModule', node, lower, upper, concretize_mode='backward', # for `backward_concretize` batch_size=None, output_dim=None, average_A=None, # for `forward_concretize` lw=None, uw=None, # common clip_neuron_selection_value=-1.0, clip_neuron_selection_type="ratio" ): """ If neuron_selection_value >= 0, run an unconstrained/bounds-saving pass then a top-K constrained pass; otherwise just one pass. """ # decide which underlying call to use def _call_concretize(use_constraints, save_bounds=False, heuristic_indices=None): if concretize_mode == 'backward': # backward concretize signature return backward_concretize( self, batch_size, output_dim, lower, upper, average_A=average_A, node_start=node, use_constraints=use_constraints, save_bounds=save_bounds, heuristic_indices=heuristic_indices, ) elif concretize_mode == 'forward': # forward_concretize signature return forward_concretize( self, lower, upper, lw, uw, use_constraints=use_constraints, save_bounds=save_bounds, heuristic_indices=heuristic_indices, ) else: raise ValueError(f"Unknown concretize mode: {concretize_mode}. " "Please use 'backward' or 'forward'.") use_constraints = True save_bounds = False # If clip_neuron_selection_value >= 0, heuristic score-based topk selection is enabled. # And we will only apply constrained concretization on topk neurons based on their heuristics. # In this case, we'll need to 1) concretize all neurons without any constraints to get a looser bound # --> This is for computing the heuristics # 2) concretize topk neurons with constraints. # --> This is for getting tighter bounds for topk neurons. # In conclusion, if neuron_selection_value >= 0, use_consrtaints will be disabled first. # But for the output node in the computational graph we will directly concretize all neurons.. if clip_neuron_selection_value >= 0 and node.name not in self.output_name: use_constraints = False # `output_activations` is the list of output activations from current pre-activation node. # This output_activations is manually assigned outside of auto_lirpa. Please check # complete_verifier/input_split/batch_branch_and_bound.py for more info. # If a node: # a) does not have any output_activation, and # b) heuristic topk selection is enabled, and # c) is not the output node in the computational graph # we will only compute naive bounds on it. # Otherwise, we'll need to do both step 1) and 2). And to accelarate step 2), we will save the bounds in 1). # If 1) this node has at least one output activation node # 2) at least one neuron will be selected # We will need to concretize with constraints, if node.output_activations is not None and clip_neuron_selection_value > 0: save_bounds = True # If heuristic topk selection is enabled, this would be the step 1). new_lower, new_upper, has_constraints = _call_concretize( use_constraints=use_constraints, save_bounds=save_bounds, ) # If heuristic topk selection is enabled, this if-branch would be the step 2). if (has_constraints and node.output_activations is not None and clip_neuron_selection_value > 0 and node.name not in self.output_name): score = 0.0 unstable_masks = False # loop through all the output activations to get a comprehensive unstable mask and heuristic score. # This output_activations is manually assigned outside of auto_lirpa. # Please check complete_verifier/input_split/batch_branch_and_bound.py for o_act_node in node.output_activations: score = score + o_act_node.compute_bound_improvement_heuristics(new_lower, new_upper) unstable_masks = unstable_masks | o_act_node.get_unstable_mask(new_lower, new_upper) score = score.flatten(1) # shape: (Batchsize, Hidden_dim) unstable_masks = unstable_masks.flatten(1) # shape: (Batchsize, Hidden_dim) # Only do second concretize if there exists unstable neurons. if unstable_masks.any(): max_unstable_size = unstable_masks.sum(dim=1).max() heuristic_indices = None # The K value in topk should be at least 1. if clip_neuron_selection_type == "ratio": K = max(int(max_unstable_size * clip_neuron_selection_value + 0.5), 1) else: K = min(clip_neuron_selection_value, max_unstable_size) _, heuristic_indices = torch.topk(score, k=K, dim=1, largest=True, sorted=False) new_lower, new_upper, _ = _call_concretize( use_constraints=True, heuristic_indices=heuristic_indices ) else: # Previously we've stored to aux bounds, now it should be cleared to avoid any confusion. for root in self.roots(): if (hasattr(root, 'perturbation') and root.perturbation is not None and isinstance(root.perturbation, PerturbationLpNorm)): root.perturbation.clear_aux_bounds() return new_lower, new_upper def concretize_root(self, root, batch_size, output_dim, average_A=False, node_start=None, input_shape=None, use_constraints=False, heuristic_indices=None, save_bounds=False): # The last three optional argument are designed for heuristic-driven constrained concretization. # use_constraints: A flag controling whether to enable constraints solving or not. # heuristic_indices: A index tensor, it select EUQAL number of hidden neurons from each batch. # Constrained solving will be further applied on these neurons. Shape (batchsize, n_h_neurons) # save_bounds: A flag determining whether to save naive bounds (to avoid redundant computation) if average_A and isinstance(root, BoundParams): lA = root.lA.mean( node_start.batch_dim + 1, keepdim=True ).expand(root.lA.shape) if (root.lA is not None) else None uA = root.uA.mean( node_start.batch_dim + 1, keepdim=True ).expand(root.uA.shape) if (root.uA is not None) else None else: lA, uA = root.lA, root.uA if not isinstance(root.lA, eyeC) and not isinstance(root.lA, Patches): lA = root.lA.reshape(output_dim, batch_size, -1).transpose(0, 1) if (lA is not None) else None if not isinstance(root.uA, eyeC) and not isinstance(root.uA, Patches): uA = root.uA.reshape(output_dim, batch_size, -1).transpose(0, 1) if (uA is not None) else None has_constraints = False if hasattr(root, 'perturbation') and root.perturbation is not None: if isinstance(root.perturbation, PerturbationLpNorm): # Enable / Disable constraints solving according to `use_constraints` root.perturbation.constraints_enable = use_constraints if root.perturbation.constraints is not None: if self.infeasible_bounds_constraints is not None: root.perturbation.add_infeasible_batches(self.infeasible_bounds_constraints) root.perturbation.add_objective_indices(heuristic_indices) has_constraints = True if isinstance(root, BoundParams): # add batch_size dim for weights node lb = root.perturbation.concretize( root.center.unsqueeze(0), lA, sign=-1, aux=root.aux ) if (lA is not None) else None ub = root.perturbation.concretize( root.center.unsqueeze(0), uA, sign=+1, aux=root.aux ) if (uA is not None) else None else: lb = root.perturbation.concretize( root.center, lA, sign=-1, aux=root.aux ) if lA is not None else None ub = root.perturbation.concretize( root.center, uA, sign=+1, aux=root.aux ) if uA is not None else None if (isinstance(root.perturbation, PerturbationLpNorm) and root.perturbation.constraints is not None and root.perturbation.sorted_out_batches["infeasible_batches"] is not None): if self.infeasible_bounds_constraints is not None: self.infeasible_bounds_constraints = self.infeasible_bounds_constraints | root.perturbation.sorted_out_batches["infeasible_batches"] # else: # self.infeasible_bounds_constraints = root.perturbation.sorted_out_batches["infeasible_batches"] # If required, save current (naive) bounds to prevent redundant computation next time concretize on the same node if isinstance(root.perturbation, PerturbationLpNorm) and root.perturbation.constraints is not None and save_bounds: root.perturbation.add_aux_bounds(lb, ub) elif isinstance(root.perturbation, PerturbationLpNorm): # Otherwise, always clear_aux_bounds to prevent confusion root.perturbation.clear_aux_bounds() else: fv = root.forward_value if type(root) == BoundInput: # Input node with a batch dimension batch_size_ = batch_size else: # Parameter node without a batch dimension batch_size_ = 1 def concretize_constant(A): if isinstance(A, eyeC): return fv.view(batch_size_, -1) elif isinstance(A, Patches): return A.matmul(fv, input_shape=input_shape) elif type(root) == BoundInput: return A.matmul(fv.view(batch_size_, -1, 1)).squeeze(-1) else: return A.matmul(fv.view(-1, 1)).squeeze(-1) lb = concretize_constant(lA) if (lA is not None) else None ub = concretize_constant(uA) if (uA is not None) else None return lb, ub, has_constraints def backward_concretize(self, batch_size, output_dim, lb=None, ub=None, average_A=False, node_start=None, use_constraints=False, heuristic_indices=None, save_bounds=False): # The last three optional argument are designed for heuristic-driven constrained concretization. # use_constraints: A flag controling whether to enable constraints solving or not. # heuristic_indices: A index tensor, it select EUQAL number of hidden neurons from each batch. # Constrained solving will be further applied on these neurons. Shape (batchsize, n_h_neurons) # save_bounds: A flag determining whether to save naive bounds (to avoid redundant computation) roots = self.roots() if isinstance(lb, torch.Tensor) and lb.ndim > 2: lb = lb.reshape(lb.shape[0], -1) if isinstance(ub, torch.Tensor) and ub.ndim > 2: ub = ub.reshape(ub.shape[0], -1) def add_b(b1, b2): if b2 is None: return b1 elif b1 is None: return b2 # Check if b1 is a tensor and if all its elements are infinity if torch.is_tensor(b1) and torch.isinf(b1).all(): return b1 # Check if b2 is a tensor and if all its elements are infinity if torch.is_tensor(b2) and torch.isinf(b2).all(): return b2 else: return b1 + b2 has_constraints = False for root in roots: root.lb = root.ub = None if root.lA is None and root.uA is None: continue root.lb, root.ub, has_constraints_this_root = self.concretize_root( root, batch_size, output_dim, average_A=average_A, node_start=node_start, input_shape=roots[0].center.shape, use_constraints=use_constraints, heuristic_indices=heuristic_indices, save_bounds=save_bounds) has_constraints = has_constraints | has_constraints_this_root lb = add_b(lb, root.lb) ub = add_b(ub, root.ub) return lb, ub, has_constraints def forward_concretize(self, lower, upper, lw, uw, use_constraints=False, heuristic_indices=None, save_bounds=False): """ Concretize function for forward bound. :param lower: Tensor. Intermediate layer lower bounds. :param upper: Tensor. Intermediate layer upper bounds. :param lw: Tensor. Intermediate layer lower A matrix. :param uw: Tensor. Intermediate layer upper A matrix. :param use_constraints: bool. A flag controling whether to enable constraints solving or not. If heuristic ratio is set, the first concretization run should disbale constraints solving. :param heuristic_indices: Index Tensor. A index tensor, it select **equal** number of hidden neurons from each batch. Constrained solving will be further applied on these neurons. Shape (batchsize, n_h_neurons) :param save_bounds: bool. A flag controling whether to save naive bounds. :return res_lower: Tensor. The lower bound tensor. :return res_upper: Tensor. The upper bound tensor. :return has_constraints: bool. Whether constraints has been stored. """ res_lower = 0.0 res_upper = 0.0 prev_dim_in = 0 has_constraints = False roots = self.roots() assert (lw.ndim > 1) lA = lw.reshape(self.batch_size, self.dim_in, -1).transpose(1, 2) uA = uw.reshape(self.batch_size, self.dim_in, -1).transpose(1, 2) for root in roots: if hasattr(root, 'perturbation') and root.perturbation is not None: _lA = lA[:, :, prev_dim_in : (prev_dim_in + root.dim)] _uA = uA[:, :, prev_dim_in : (prev_dim_in + root.dim)] if isinstance(root.perturbation, PerturbationLpNorm): root.perturbation.constraints_enable = use_constraints if root.perturbation.constraints is not None: if self.infeasible_bounds_constraints is not None: root.perturbation.add_infeasible_batches(self.infeasible_bounds_constraints) root.perturbation.add_objective_indices(heuristic_indices) has_constraints = True # Previously added concretized bounds directly to lower/upper. # Now extract them first for reuse (e.g., in aux_bounds). temp_lower = root.perturbation.concretize( root.center, _lA, sign=-1, aux=root.aux ).view(lower.shape) temp_upper = root.perturbation.concretize( root.center, _uA, sign=+1, aux=root.aux ).view(upper.shape) # Update infeasible_batches if (isinstance(root.perturbation, PerturbationLpNorm) and root.perturbation.constraints is not None and root.perturbation.sorted_out_batches["infeasible_batches"] is not None): if self.infeasible_bounds_constraints is not None: self.infeasible_bounds_constraints = self.infeasible_bounds_constraints | root.perturbation.sorted_out_batches["infeasible_batches"] # else: # self.infeasible_bounds_constraints = root.perturbation.sorted_out_batches["infeasible_batches"] # If required, save current (naive) bounds to prevent redundant computation next time concretize on the same node if isinstance(root.perturbation, PerturbationLpNorm) and root.perturbation.constraints is not None and save_bounds: root.perturbation.add_aux_bounds(temp_lower, temp_upper) elif isinstance(root.perturbation, PerturbationLpNorm): # Otherwise, always clear_aux_bounds to prevent confusion root.perturbation.clear_aux_bounds() # Now the concretization result from this root will be accumulated into final bounds. # Here we add temp_lower onto res_lower, instead of lower. # It's because the lower value will be used twice, any modification to it should be avoided. res_lower = res_lower + temp_lower res_upper = res_upper + temp_upper res_lower = res_lower + lower res_upper = res_upper + upper return res_lower, res_upper, has_constraints ================================================ FILE: auto_LiRPA/concretize_func.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch from math import floor, ceil from .utils import eyeC # Declaration of the shape naming: # B / batchsize : The number of batches. In this `concretize_func.py`, if a tensor has batch dimension, we assume # it will only be the first dimention of this tensor . That is: B = tensor.shape[0] # # B_act : The number of active batches. We will only apply constraints to a subset of batches, and these # batches are called active batches. B_act <= B. There are two cases: # -- When `no_return_inf` mode is disabled, we will keep B_act static throughout the entire # BaB iteration. In this case, B_act equals the number of batches not fully covered by # constraints, as determined by `sort_out_constr_batches` function. # -- When `no_return_inf` mode is enabled, then B_act decreases over iterations, since more # batches will be marked as infeasible. See `PerturbationLpNorm.add_infeasible_batches`. # # X / x_dim : The number of input neurons (batch dimension excluded). It stands for the input shape of the # neural network. For tensors such as x0, epsilon, x_U, x_L, X = prod(*tensor.shape[1:]) # # H / hidden_dim : The number of hidden neurons (batch and input dimension excluded). It stands for the output # shape of this hidden layer. For the objective A tensor, there are two cases: # -- The tensor has batch dimention: H = tensor.view(B, -1, X).shape[1] # -- The tensor does not have batch dimention: H = tensor.view(-1, X).shape[0] # # H_act : The number of active batches. We may only apply constraints to a subset of hidden neurons, # and these neurons are called active neurons. H_act <= H. # # N_constr : The number of constraints. For constraints_A matrix: # -- In `sort_out_constr_batches` function, its shape is (B, N_constr, X) # -- In `constraints_solving` function, its shape is (B_act, N_constt, X) def construct_constraints(constr_A: torch.Tensor, constr_b: torch.Tensor, constr_rhs: torch.Tensor, batchsize, x_dim, sign=1): r""" Construct the constraints tuple. This function provides a unified interface to generate this tuple. All the users should carefully read this function to fully understand the standard form of constraints. The first three argument expresses the non-standard form of the constraints: A @ x + b <= rhs We will first convert it into the standard form: A @ x + b' <= 0 The the standard expression of constraints should be (constr_A, constr_b') Args: constr_A: The coefficient A matrix of constraints. It should be able to be reshaped into: (B, N_constr, X) constr_b: The bias term of constraints. It should be able to be reshaped into: (B, N_constr) constr_rhs: The right-hand-side term of constraints. It should be able to be reshaped into: (B, N_constr) batchsize: The batchsize B. x_dim: The input dimension X (batchsize dimension excluded) """ constr_A = sign * constr_A.reshape((batchsize, -1, x_dim)) if constr_rhs is not None and not torch.all(constr_rhs == 0): constr_b = sign * (constr_b - constr_rhs).reshape((batchsize, -1)) else: constr_b = sign * constr_b.reshape((batchsize, -1)) return (constr_A, constr_b) def _sort_out_constraints(A, b, x0, epsilon): r""" Filter out some batches with constraints not intersecting with input region Args: A (Tensor): A matrix of constraints with shape of (batchsize, n_constraints, x_dim) b (Tensor): Bias term of constraints with shape of (batchsize, n_constraints) x0 (Tensor): Centroid of the input space with shape of (batchsize, x_dim, 1) epsilon (Tensor): Offset from the centroid to the input space boundary with shape of (batchsize, x_dim, 1) Return: no_intersection (Tensor): A boolean tensor with shape (batchsize, ), indicating if certain batch is infeasible because a constraint does not intersect with input space fully_covered (Tensor): A boolean tensor with shape (batchsize, ), indicating if all the constraints in a certain batch fully covers the corresponding input region. In this case, we can simply treat the batch as if it has no constraints """ # minimal and maximal value of A*x + b x0_term = A.bmm(x0).squeeze(-1) + b # shape: (B, N_constr) eps_term = A.abs().bmm(epsilon).squeeze(-1) # shape: (B, N_constr) minimal_val = x0_term - eps_term # shape: (B, N_constr) maximal_val = x0_term + eps_term # shape: (B, N_constr) # for any constrains: A * x + b <= 0, # if its min(A * x + b) > 0, it has no intersection with x0 +- epsilon # if its max(A * x + b) <= 0, it fully covers x0 +- epsilon no_intersection = (minimal_val > 0).any(1) # shape: (B, ) if not no_intersection.any(): no_intersection = None fully_covered = (maximal_val <= 0).all(1) # shape: (B, ) return no_intersection, fully_covered @torch.jit.script def _dist_rearrange(constraints_A, constraints_b, x0): r""" Reorder the constraints according to their distance to x_prime Args: constraints_A (Tensor): A matrix of constraints with shape of (batchsize, n_constraints, x_dim) constraints_b (Tensor): Bias term of constraints with shape of (batchsize, n_constraints) x0 (Tensor): x0 tensor with shape of (batchsize, x_dim, 1). Based on the heuristic, this can be the input space centroid x0, or the original optimal point x_prime Return: rearranged_A (Tensor): Rearranged matrix of constraints with shape of (batchsize, n_constraints, x_dim) rearranged_b (Tensor): Bias term of constraints with shape of (batchsize, n_constraints) """ # Compute the normalized, directional distance from x_prime to constraints hyper-plane. distance = (constraints_A.bmm(x0).squeeze(-1) + constraints_b) # shape: (B, N_constr) l2_norm = constraints_A.norm(p=2, dim=-1) # shape: (B, N_constr) normed_dist = distance / l2_norm # shape: (B, N_constr) # Sort the constraints according to this distance. order = torch.sort(normed_dist, descending=True, dim=1)[1] order_expand = order.unsqueeze(-1).expand(-1, -1, constraints_A.size(-1)) rearranged_A = constraints_A.gather(index=order_expand, dim=1) rearranged_b = constraints_b.gather(index=order, dim=1) return rearranged_A, rearranged_b @torch.jit.script def _solve_dual_var(constr_a, object_a, constr_d, epsilon, a_mul_e=None): r""" Solve the following optimization problem: Primal: min_x object_a^T x s.t. constr_a^T x + constr_d <= 0, x0-epsilon <= x <= x0+epsilon Dual: min_x max_beta object_a^T x + beta * (constr_a^T x + constr_d) s.t. x0 - epsilon <= x <= x0 + epsilon beta >= 0 Strong duality: max_{beta >= 0} min_{x \in X} object_a^T x + beta * (constr_a^T x + constr_d) Dual norm: max_{beta >= 0} - |object_a + beta * constr_a|^T epsilon + beta * (constr_a^T x0 + constr_d) + object_a^T x0 Now the sole optimize problem is piece-wise linear, we just have to check each turning point and the end points of beta (0 and +inf) Args: constr_a (Tensor): Constraint A matrix with shape of (batchsize, x_dim) object_a (Tensor): Objective A matrix with shape of (batchsize, h_dim, x_dim) constr_d (Tensor): Pre-computed bias term of constraint with shape of (batchsize, ) constr_d = constr_a^T x0 + constr_b epsilon (Tensor): Offset from the centroid to the input space boundary with shape of (batchsize, x_dim, 1) Return: optimal_beta (Tensor): The optimal beta value with shape of (batchsize, h_dim) """ B_act = constr_a.size(0) H_act = object_a.size(1) device = constr_a.device dtype = constr_a.dtype # --- prepare fill-in tensors zeros = torch.zeros((1, 1, 1), device=device, dtype=dtype).expand(B_act, H_act, 1) infs = torch.full((1, 1, 1), fill_value=torch.inf, dtype=dtype, device=device).expand(B_act, H_act, 1) a_reshape = constr_a.unsqueeze(1) # shape: (B_act, 1, X) epsilon_reshape = epsilon.view((B_act, 1, -1)) # shape: (B_act, 1, X) b_reshape = constr_d.view((-1, 1, 1)) # shape: (B_act, 1, 1) # q is the turning points of the piece-wise linear function. q = - object_a/a_reshape # shape: (B_act, H_act, X) # idx indicates the ascending order of these turning points. q_sort, idx = q.sort(dim=-1) # shape: (B_act, H_act, X) # --- calculating the gradient w.r.t. beta within each interval --- a_mul_e = (a_reshape * epsilon_reshape).expand(-1, H_act, -1) # (B_act, H_act, X) # a_mul_e = a_mul_e.expand(-1, H_act, -1) # (B_act, H_act, X) (B_act, H_act, X) a_sort = torch.gather(a_mul_e, dim=-1, index=idx) # (B_act, H_act, X) a_neg_cumsum = a_sort.abs().cumsum(dim=-1) # shape: (B_act, H, x_dim) a_neg_cumsum = torch.cat((zeros, a_neg_cumsum), dim=-1) # shape: (B_act, H_act, 1+X) a_pos_cumsum = a_neg_cumsum - a_neg_cumsum[:, :, -1:] # shape: (B_act, H_act, 1+X) grad_beta = a_pos_cumsum + a_neg_cumsum - b_reshape # shape: (B_act, H_act, 1+X) # Due to the non-increasing trait of grad_beta, if there is a turning point # then the gradient must change from positive to negative, and this turning point is the optimal beta. sign_change = torch.searchsorted(grad_beta, zeros, right=False) # It might be the case that grad_beta is always positive when beta > 0. # This means the maximization object is ever-increasing, hence it is unbounded. # For this case, a inf value would be returned. # Following comes a case of sign_change where all the turning points q are positive: # (g stands for grad_beta, q stands for turing points) # g[0] = 2 g[1] = 1 g[2] = -1 g[3] = -3 # 0 --------- q[0] --------- q[1] ----------- q[2] ----------- ... --------> +inf # ^ # sign_change=2 # # q should represent the interval endpoints, hence, need to pad the left and right end with 0 and inf separately. # cat shape: (B_act, H_act, 1+X+1) q_new = torch.cat((zeros, q_sort, infs), dim=-1) # shape: (B_act, H_act, X+2) optimal_beta = torch.gather(q_new, dim=-1, index=sign_change).clamp(min=0).squeeze(-1) # shape: (B_act, H_act) return optimal_beta def sort_out_constr_batches(x_L, x_U, constraints, rearrange_constraints=False, no_return_inf=False): r""" Filter and preprocess input batches based on constraint feasibility. This function examines which input regions 1) has no intersection with one of the constraints. 2) is fully covered by the all the constraints. It also optionally rearranges constraint order for better numerical behavior, and converts the constraint form from `(A, b)` to `(A, d)` where `d = A @ x0 + b`. Here x0 means the centroid of the input region, that is x0 = (x_L + x_U) / 2. Args: x_L (Tensor): Lower bound of input box, shape (B, *). x_U (Tensor): Upper bound of input box, shape (B, *). constraints (Tuple[Tensor, Tensor] or None): A tuple `(A, b)` representing per-batch linear constraints. - `A`: shape (B, N_constr, X) - `b`: shape (B, N_constr) If None or empty, the function returns early. rearrange_constraints (bool): Whether to rearrange constraints for better solver performance. Default: False. no_return_inf (bool): If True, infeasible batches will be excluded from `active_indices`. Otherwise, infeasible batches are still marked active. Default: False. Returns: constraints (Optional[Tuple[Tensor, Tensor]]): Filtered and reshaped constraint tuple `(A, d)` for active batches only. - `A`: shape (B_active, N_constr, X) - `d`: shape (B_active, N_constr) If all batches are fully covered, returns None. sorted_out_batches (dict): Diagnostic and filtering info with keys: - 'infeasible_batches' (BoolTensor): Shape (B,), True if batch has no feasible region. If all the elements are False, it would be None. This would save space and time. - 'fully_covered' (BoolTensor): Shape (B,), True if batch is completely covered by constraints. - 'active_indices' (LongTensor): Indices of batches that are neither fully covered nor infeasible. """ sorted_out_batches = None if constraints is None or constraints[0] is None or constraints[0].numel() == 0: return None, sorted_out_batches # Read argument and some necessary reshape assert x_L is not None and x_U is not None, "If constrained concretize is enabled, x_L and x_U cannot be None!" x0 = (x_L + x_U) / 2 epsilon = (x_U - x_L) / 2 constraints_A, constraints_b = constraints batch_size = x0.shape[0] x_dim = x0[0].numel() x0 = x0.view((batch_size, x_dim, 1)) # shape: (B, X, 1) epsilon = epsilon.view((batch_size, x_dim, 1)) # shape: (B, X, 1) no_intersection, fully_covered = _sort_out_constraints(constraints_A, constraints_b, x0, epsilon) if fully_covered.all(): print("All the added constraints fully cover the input space. No need to apply constraints .") return None, sorted_out_batches sorted_out_batches = {} sorted_out_batches["infeasible_batches"] = no_intersection # If there's no infeasible batch, simply set it to be None. # This will provide a shortcut when update the infeasible_batches vector. # When batchsize is large and NN model has a lot of perturbed roots, this can save us some time. sorted_out_batches["fully_covered"] = fully_covered active_mask = ~fully_covered if no_intersection is not None and no_return_inf: active_mask = ~no_intersection & active_mask active_indices = torch.nonzero(active_mask, as_tuple=True)[0] sorted_out_batches["active_indices"] = active_indices # Now constraints tuple only contains active constraints, shape change: (B, N_Constr, X) -> (B_act, N_constr, X) constraints_A = constraints_A[active_indices] # shape: (B_act, N_Constr, X) constraints_b = constraints_b[active_indices] # shape: (B_act, N_Constr) active_x0 = x0[active_indices] if rearrange_constraints: constraints_A, constraints_b = _dist_rearrange(constraints_A, constraints_b, active_x0) # Also, we will replace the constraint_b term with constraints_d term. # For the usage of constraints_d, please check _solve function and constraints_solving function. constraints_d = torch.einsum('bkx, bxo->bk', constraints_A, active_x0) + constraints_b # shape: (B_act, N_Constr) # Only store the constraints for active batches. constraints = (constraints_A, constraints_d) return constraints, sorted_out_batches def constraints_solving( x_L, x_U, objective, constraints, sign=-1.0, sorted_out_batches={}, objective_indices=None, constraints_enable=True, no_return_inf=False, max_chunk_size=None, safety_factor=0.8, solver_memory_factor=2.0, timer=None, aux_bounds=None, x0=None, epsilon=None, act_x0=None, act_eps=None, use_grad=True ): r""" Combined constraint solving function with conditional logic based on objective shape. - If objective is eyeC or broadcastable (shape[0]=1), uses a vectorized, auto-chunked approach. - If objective has batch dim matching input (shape[0]=N_batch), uses the original approach (repeating inputs, no chunking). Solves LP: max / min A_t * x, s.t. A_c * x + b_c <= 0, x_L <= x <= x_U Args: x_L, x_U (Tensor) : Input bounds tensors. objective (Tensor) : Target coefficients (Tensor or eyeC). - Tensor shape: (H, X), (1, H, X), or (N_batch, H, X). - eyeC: Represents identity matrix.W constraints (tuple, optional) : Tuple (A_c, d_c) or None. sign (float, optional) : -1.0 for lower bound, +1.0 for upper bound. sorted_out_batches (dict, optional): Dict with pre-filtered batch masks. Please check `sort_out_constr_batches` for more info. constraints_enable (bool, optional): Flag for enabling constraints solving, this is set for heuristic hybrid solving, should be True by default. no_return_inf (bool, optional) : Flag for returning inf value. If true, this function will return inf for all the infeasible subproblems. Otherwise, return naive bounds for infeasible ones. max_chunk_size, safety_factor, solver_memory_factor: Params for chunking memory. max_chunk_size: A hard upper limit on the number of problems to be processed in a single chunk, regardless of available memory. If set to an integer, the auto-calculated chunk size will not exceed this value. Use Case: Prevents the solver from creating a single, massive chunk that could cause system unresponsiveness, even if memory is technically available. Set to None to allow the function to use its own dynamic calculation. safety_factor: A float between 0.0 and 1.0 that specifies what fraction of the free GPU memory should be considered "usable" for the calculation. For example, a value of 0.8 means the function will only use 80% of the available free memory as its budget. Use Case: This buffer helps prevent "Out of Memory" (OOM) errors by accounting for memory fragmentation, memory used by other processes, or overhead from the CUDA driver itself. A lower value is safer but may result in smaller chunks and thus slower overall processing. solver_memory_factor: A heuristic multiplier used to estimate the memory consumed by the iterative solver loop. The theoretical memory usage is multiplied by this factor to create a more realistic estimate. Use Case: The exact memory allocated for intermediate tensors and computations within the solver can be complex to predict perfectly. This factor provides a "fudge factor" to pad the memory estimation, ensuring that the dynamically created tensors inside the solver loop do not cause an OOM error. Adjust this if you consistently face memory issues during the solver phase. objective_indices (Tensor, optional): Indices tensor of shape (N_batch, H_active) indicating which objectives to compute. If None, all are computed. timer: Optional Timer object. aux_bounds (Tensor, optional) : When hybrid constraint solving is enbaled, constrains_solving function will be called twice. For its second run, we will load the result from the first run to save time computing naive results. x0, eps (Tensor, optional) : x0 and epsilon to solve on. Without these two, we can still compute x0 and eps out of x_L and x_U. act_x0, act_eps (Tensor, optional): Active x0 and epsilon to solve on. use_grad (bool, optional): If False, the main computation is wrapped in `torch.no_grad()` for better performance and lower memory usage. Set to True only when gradients are required (e.g., for clip during alpha crown). Defaults to True. Returns: bound (Tensor): Computed bounds (N_batch, H, 1). infeasible_batches (boolTensor, optional) : If no_return_inf is True, `infeasible_batches` will be returned. It is a boolean tensor with shape (batch_size, ), with True indictating the batch is infeasible. """ if timer: timer.start('init') if timer: timer.start("concretize") device = x_L.device N_batch = x_L.size(0) epsilon = (x_U - x_L) / 2.0 if epsilon is None else epsilon x0 = (x_U + x_L) / 2.0 if x0 is None else x0 epsilon = epsilon.reshape((N_batch, -1, 1)) x0 = x0.reshape((N_batch, -1, 1)) is_eyeC = isinstance(objective, eyeC) # --- Naive Case (No Constraints) --- no_constraints_condition = (constraints is None) or (constraints[0].numel() == 0) if no_constraints_condition or (not constraints_enable): if is_eyeC: solved_obj = x0 + sign * epsilon # Shape: (N_batch, X, 1) else: base_term = torch.einsum('bhx,bxo->bho', objective, x0) # Shape: (N_batch, X, 1) eps_term = torch.einsum('bhx,bxo->bho', objective.abs(), epsilon) # Shape: (N_batch, X, 1) solved_obj = base_term + sign * eps_term # Shape: (N_batch, H, 1) if timer: timer.add("init") if timer: timer.add("concretize") if no_return_inf: return solved_obj, None else: return solved_obj with torch.set_grad_enabled(use_grad): is_broadcastable = False is_batch_specific = False H = -1 # Hidden dimension X = x0.size(1) # Input X dimension if is_eyeC: is_broadcastable = True H = X # Internally represent eyeC as identity matrix for broadcastable path. objective_tensor = torch.eye(X, device=device).unsqueeze(0) # Shape (1, X, X) else: if objective.shape[0] != N_batch: # objective comes in shape of (H, X) or (1, H, X). # It will be broadcasted to (B, H, X) later. # Currently, is_broadcastable is designed for relu-bab, which usually takes much gpu memory, # so is_broadcastable is also a control flag for objective chunking. is_broadcastable = True else: # objective comes in shape of (B, H, X). is_batch_specific = True H = objective.shape[1] objective_tensor = objective if objective.shape[2] != X: raise ValueError("Objective shape mismatch") # --- Constrained Case --- # --- Calculate Naive Bounds (used as default/fallback) --- naive_bounds = torch.zeros(N_batch, H, 1, device=device) if aux_bounds is not None: naive_bounds_all = aux_bounds.flatten(1).unsqueeze(-1) elif is_eyeC: naive_bounds_all = x0 + sign * epsilon # Shape (N_batch, X, 1) -> (N_batch, H, 1) elif is_broadcastable: # obj_tensor is (1, H, X) base_term_naive = torch.einsum('shx,bxo->bho', objective_tensor, x0) eps_term_naive = torch.einsum('shx,bxo->bho', objective_tensor.abs(), epsilon) naive_bounds_all = base_term_naive + sign * eps_term_naive # Shape (N_batch, H, 1) elif is_batch_specific: # obj_tensor is (N, H, X) base_term_naive = torch.einsum('bhx,bxo->bho', objective_tensor, x0) eps_term_naive = torch.einsum('bhx,bxo->bho', objective_tensor.abs(), epsilon) naive_bounds_all = base_term_naive + sign * eps_term_naive # Shape (N_batch, H, 1) else: raise RuntimeError("Internal logic error in naive bound calculation") naive_bounds = naive_bounds_all # Assign calculated bounds # Final bounds tensor initialized as naive bounds final_bounds = naive_bounds fill_value_inf = torch.tensor(torch.inf if sign == -1.0 else -torch.inf, device=device) # --- Initial Batch Filtering (Common Logic) --- active_indices = sorted_out_batches.get("active_indices", None) if active_indices is None: fully_covered = sorted_out_batches.get("fully_covered", torch.zeros(N_batch, dtype=torch.bool, device=device)) active_batches_mask = ~fully_covered # Batches requiring solver if no_return_inf: infeasible_batches = sorted_out_batches.get("infeasible_batches", torch.zeros(N_batch, dtype=torch.bool, device=device)) active_batches_mask = ~infeasible_batches & active_batches_mask active_indices = torch.nonzero(active_batches_mask, as_tuple=True)[0] B_act = active_indices.numel() # Number of batches needing the solver. if timer: timer.add('init') # Combined timing for setup. # --- Early Exit if No Active Batches --- if B_act == 0: print(f"Constrained concretize: No active batches after filtering.") # Ensure non-active parts have naive bounds before returning. # (already done above by initializing with naive/inf) if timer: timer.add("concretize") final_bounds = naive_bounds if no_return_inf: return final_bounds, None else: return final_bounds constraints_A, constraints_d = constraints n_constraints = constraints_A.size(1) # --- Dynamic Chunk Size Calculation --- if is_batch_specific: # If objective is batch-specific, we do not chunk it. num_chunks = 1 final_chunk_size = B_act else: # This block dynamically estimates the optimal chunk size to maximize GPU # utilization while preventing out-of-memory (OOM) errors. calculated_chunk_size = B_act free_mem, total_mem = torch.cuda.mem_get_info() usable_mem = free_mem * safety_factor obj_dtype = objective.dtype dtype_size = torch.finfo(obj_dtype).bits // 8 mem_constraints_per_item = (n_constraints * X + n_constraints) * dtype_size mem_x0eps_per_item = 2 * X * dtype_size mem_ori_c_per_item = H * X * dtype_size mem_dual_obj_per_item = H * dtype_size mem_solver_per_item_bh = H * (X + X + 1 + X + 1) * dtype_size * solver_memory_factor mem_masks_temps_per_item = H * 2 # approx mem_per_item_est = (mem_constraints_per_item + mem_x0eps_per_item + mem_ori_c_per_item + mem_dual_obj_per_item + mem_solver_per_item_bh + mem_masks_temps_per_item) * 5 if mem_per_item_est > 0: estimated_max_chunk = max(1, floor(usable_mem / mem_per_item_est)) calculated_chunk_size = min(B_act, estimated_max_chunk) if max_chunk_size is not None and max_chunk_size > 0: final_chunk_size = min(calculated_chunk_size, max_chunk_size) else: final_chunk_size = calculated_chunk_size final_chunk_size = max(1, final_chunk_size) # Ensure chunk size is at least 1. num_chunks = ceil(B_act / final_chunk_size) if no_return_inf: # Initialize infeasible_batches boolean mask to be None at first. # If an infeasible batch does occur later, we will then initialize it to be a actual vector. infeasible_batches = None for i_chunk in range(num_chunks): # --- Handle size and idx for this chunk --- chunk_start_idx_rel = i_chunk * final_chunk_size chunk_end_idx_rel = min(chunk_start_idx_rel + final_chunk_size, B_act) current_chunk_size = chunk_end_idx_rel - chunk_start_idx_rel if current_chunk_size == 0: continue chunk_indices_abs = active_indices[chunk_start_idx_rel:chunk_end_idx_rel] # --- Get matrices for this chunk --- constr_A_mat = constraints_A[chunk_start_idx_rel:chunk_end_idx_rel] # shape (B_act, n_constraints, X) constr_d_mat = constraints_d[chunk_start_idx_rel:chunk_end_idx_rel] # shape (B_act, n_constraints) if act_x0 is not None: x0_mat = act_x0[chunk_start_idx_rel:chunk_end_idx_rel] else: x0_mat = x0[chunk_indices_abs] # shape (B_act, X, 1) if act_eps is not None: eps_mat = act_eps[chunk_start_idx_rel:chunk_end_idx_rel] else: eps_mat = epsilon[chunk_indices_abs] # shape (B_act, X, 1) if is_broadcastable: ori_c_mat = objective_tensor.expand(current_chunk_size, H, X).clone() else: ori_c_mat = objective_tensor[chunk_indices_abs].clone() # shape: (B_act, H, X) if objective_indices is not None: # shape: (B, H_act) # Select the mask rows corresponding to the active batches in this chunk current_objective_indices = objective_indices[chunk_indices_abs] # shape: (B_act, H_act) idx_unsqueeze = current_objective_indices.unsqueeze(-1) # shape: (B_act, H_act, 1) idx_expand = idx_unsqueeze.expand(-1, -1, X) # shape: (B_act, H_act, X) ori_c_mat = ori_c_mat.gather(index=idx_expand, dim=1) # shape: (B_act, H_act, X) obj_mat = ori_c_mat # shape (B_act, H_act, X) # Initialize dual part and base part # Note that the final minimal value is: # objective^T x0 + base_part # constr_d_0 * beta_0 + constr_d_1 * beta_1 + ... + dual_part 1 # - ( objective+ constr_a_0 * beta_0 + constr_a_1 * beta_1)^T epsilon dual_part 2 base_objective_term = torch.einsum('bhx,bxo->bh', obj_mat, x0_mat) # shape: (B_act, H_act) dual_objective_part = torch.zeros_like(base_objective_term) # shape: (B_act, H_act) # --- Initialize State for Vectorized Loop (Chunk) --- if sign == 1.0: # Adjust for minimization problem solved by _solve obj_mat *= -1.0 # shape (B_act, H_act, X) base_objective_term *= -1.0 # --- Vectorized Constraint Loop (Operating on Chunk) --- for k in range(n_constraints): constr_a_solve = constr_A_mat[:, k, :] # constraint A matrix shape (B_act, X) constr_d_solve = constr_d_mat[:, k] # related bias term shape (B_act,) epsilon_solve = eps_mat # epsilon shape (B_act, X) object_a_solve = obj_mat # objective matrix shape (B_act, H_act, X) with torch.no_grad(): # Otherwise, the gradients will mess up the alpha crown optimization. optimal_beta = _solve_dual_var(constr_a_solve, object_a_solve, constr_d_solve, epsilon_solve) # shape (B_act, H_act) # Accumulation for the parentheses term in dual part 2 obj_mat += optimal_beta.unsqueeze(-1) * constr_a_solve.unsqueeze(1) # shape (B_act, H_act, X) # (B_act, H_act, 1) (B_act, 1, X) # Accumulation of dual part 1 dual_objective_part += optimal_beta * constr_d_solve.unsqueeze(1) # shape (B_act, H_act) # (B_act, H_act) (B_act, 1) # --- End of k loop --- # --- Final Objective Calculation for Unfinished Items in Chunk --- final_obj_abs = obj_mat.abs() # shape: (B_act, H_act, X) final_eps_mat = eps_mat # shape: (B_act, X, 1) final_eps_term = torch.einsum('nhx,nxo->nh', final_obj_abs, final_eps_mat) # shape: (B_act, H_act) dual_objective_part -= final_eps_term # --- Combine terms and handle mask --- final_obj_minimized = base_objective_term + dual_objective_part # shape: (B_act, H_act) if sign == 1.0: final_obj_optimal = -final_obj_minimized # Flip sign back if maximizing. else: final_obj_optimal = final_obj_minimized # Previously we will handle infeasible batches after running through all the chunks, during processing final_bounds. # But that would require to create a copy of naive bounds # To save space and time, we will process final_obj_optimal final_obj_optimal = torch.nan_to_num(final_obj_optimal, nan=fill_value_inf.item(), posinf=fill_value_inf.item(), neginf=-fill_value_inf.item()) if no_return_inf: infeasible_batches_chunk = final_obj_optimal.isinf().any(1) if infeasible_batches_chunk.any(): # Note that infeasible_batches was initialized as None infeasible_batches = torch.full((N_batch, ), fill_value=False, device=device, dtype=torch.bool) if infeasible_batches is None else infeasible_batches infeasible_batches[chunk_indices_abs] = infeasible_batches_chunk # Set the bounds of infeasible batches to be naive bounds infeasible_batches_chunk_indices_abs = chunk_indices_abs[infeasible_batches_chunk] if objective_indices is not None: naive_bounds_chunk = naive_bounds[infeasible_batches_chunk_indices_abs].squeeze(-1) # Get the infeasible objective indices for this chunk. current_infeasible_objective_indices = current_objective_indices[infeasible_batches_chunk] final_obj_optimal[infeasible_batches_chunk] = torch.gather(naive_bounds_chunk, dim=1, index=current_infeasible_objective_indices) else: final_obj_optimal[infeasible_batches_chunk] = naive_bounds[infeasible_batches_chunk_indices_abs].squeeze(-1) # Put the result of this chunk back into the overall final bounds if objective_indices is not None: final_bounds_active_chunk = final_bounds[chunk_indices_abs] final_bounds_active_chunk.scatter_(dim=1, index=idx_unsqueeze, src=final_obj_optimal.unsqueeze(-1)) final_bounds[chunk_indices_abs] = final_bounds_active_chunk else: final_bounds[chunk_indices_abs] = final_obj_optimal.unsqueeze(-1) if no_return_inf: if timer: timer.add("concretize") return final_bounds, infeasible_batches else: if timer: timer.add("concretize") return final_bounds ================================================ FILE: auto_LiRPA/cuda/cuda_kernels.cu ================================================ #include #include #include #include __global__ void cuda_double2float_rd_kernel(const double* __restrict__ inputs, float* __restrict__ outputs, const size_t tensor_size) { const int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < tensor_size) { outputs[idx] = __double2float_rd(inputs[idx]); } } __global__ void cuda_double2float_ru_kernel(const double* __restrict__ inputs, float* __restrict__ outputs, const size_t tensor_size) { const int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < tensor_size) { outputs[idx] = __double2float_ru(inputs[idx]); } } torch::Tensor cuda_double2float_forward(torch::Tensor input, const std::string direction) { auto total_elem = input.numel(); auto output = torch::empty_like(input, torch::ScalarType::Float); const int threads = 1024; const int blocks = (total_elem + threads - 1) / threads; if (direction == "down") { cuda_double2float_rd_kernel<<>>(input.data(), output.data(), total_elem); } else { cuda_double2float_ru_kernel<<>>(input.data(), output.data(), total_elem); } return output; } ================================================ FILE: auto_LiRPA/cuda/cuda_utils.cpp ================================================ #include #include #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") torch::Tensor cuda_double2float_forward( torch::Tensor input, const std::string direction); torch::Tensor double2float_foward( torch::Tensor input, const std::string direction) { TORCH_CHECK((direction == "down") || (direction == "up"), "Unsupported direction, must be down or up."); TORCH_CHECK(input.type().scalarType() == torch::ScalarType::Double, "This function only supports DoubleTensor as inputs."); CHECK_CUDA(input); return cuda_double2float_forward(input, direction); } /* * Usage: double2float(tensor, direction) * "tensor" must be a DoubleTensor on GPU. * "direction" is a string, can be "up" (round up) or "down" (round down). */ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("double2float", &double2float_foward, "Convert double to float with rounding direction control (direction = 'up' or 'down')."); } ================================================ FILE: auto_LiRPA/cuda_utils.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import os import sys import torch from torch.utils.cpp_extension import load, BuildExtension, CUDAExtension from setuptools import setup class DummyCudaClass: """A dummy class with error message when a CUDA function is called.""" def __getattr__(self, attr): if attr == "double2float": # When CUDA module is not built successfully, use a workaround. def _f(x, d): print('WARNING: Missing CUDA kernels. Please enable CUDA build by setting environment variable AUTOLIRPA_ENABLE_CUDA_BUILD=1 for the correct behavior!') return x.float() return _f def _f(*args, **kwargs): raise RuntimeError(f"method {attr} not available because CUDA module was not built.") return _f if __name__ == "__main__" and len(sys.argv) > 1: # Build and install native CUDA modules that can be directly imported later print('Building and installing native CUDA modules...') setup( name='auto_LiRPA_cuda_utils', ext_modules=[CUDAExtension('auto_LiRPA_cuda_utils', [ 'auto_LiRPA/cuda/cuda_utils.cpp', 'auto_LiRPA/cuda/cuda_kernels.cu' ])], cmdclass={'build_ext': BuildExtension.with_options()}, ) exit(0) if torch.cuda.is_available() and os.environ.get('AUTOLIRPA_ENABLE_CUDA_BUILD', False): try: import auto_LiRPA_cuda_utils as _cuda_utils except: print('CUDA modules have not been installed') try: print('Building native CUDA modules...') code_dir = os.path.dirname(os.path.abspath(__file__)) verbose = os.environ.get('AUTOLIRPA_DEBUG_CUDA_BUILD', None) is not None _cuda_utils = load( 'cuda_utils', [os.path.join(code_dir, 'cuda', 'cuda_utils.cpp'), os.path.join(code_dir, 'cuda', 'cuda_kernels.cu')], verbose=verbose) print('CUDA modules have been built.') except: print('CUDA module build failure. Some features will be unavailable.') print('Please make sure the latest CUDA toolkit is installed in your system.') if verbose: print(sys.exc_info()[2]) else: print('Set environment variable AUTOLIRPA_DEBUG_CUDA_BUILD=1 to view build log.') _cuda_utils = DummyCudaClass() else: if os.environ.get('AUTOLIRPA_ENABLE_CUDA_BUILD', False): print('CUDA unavailable. Some features are disabled.') _cuda_utils = DummyCudaClass() double2float = _cuda_utils.double2float def test_double2float(): # Test the double2float function. import time shape = (3,4,5) a = torch.randn(size=shape, dtype=torch.float64, device='cuda') a = a.transpose(0,1) au = _cuda_utils.double2float(a, "up") ad = _cuda_utils.double2float(a, "down") print(a.size(), au.size(), ad.size()) a_flatten = a.reshape(-1) au_flatten = au.reshape(-1) ad_flatten = ad.reshape(-1) for i in range(a_flatten.numel()): ai = a_flatten[i].item() aui = au_flatten[i].item() adi = ad_flatten[i].item() print(adi, ai, aui) assert adi <= ai assert aui >= ai del a, au, ad, a_flatten, au_flatten, ad_flatten # Performance benchmark. for j in [1, 4, 16, 64, 256, 1024]: shape = (j, 512, 1024) print(f'shape: {shape}') t = torch.randn(size=shape, dtype=torch.float64, device='cuda') torch.cuda.synchronize() start_time = time.time() for i in range(10): tt = t.float() torch.cuda.synchronize() del tt pytorch_time = time.time() - start_time print(f'pytorch rounding time: {pytorch_time:.4f}') torch.cuda.synchronize() start_time = time.time() for i in range(10): tu = _cuda_utils.double2float(t, "up") torch.cuda.synchronize() del tu roundup_time = time.time() - start_time print(f'cuda round up time: {roundup_time:.4f}') torch.cuda.synchronize() start_time = time.time() for i in range(10): td = _cuda_utils.double2float(t, "down") torch.cuda.synchronize() del td rounddown_time = time.time() - start_time print(f'cuda round down time: {rounddown_time:.4f}') del t if __name__ == "__main__": if len(sys.argv) == 1: # Some tests. It's not possible to test them automatically because travis does not have CUDA. test_double2float() ================================================ FILE: auto_LiRPA/edit_graph.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """Edit the computational graph in BoundedModule.""" from auto_LiRPA.bound_ops import Bound from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule # Make sure the nodes already have `name` and `input_name` def add_nodes(self: 'BoundedModule', nodes): # TODO check duplicate names nodes = [(node if isinstance(node, Bound) else node.bound_node) for node in nodes] for node in nodes: if node.name in self._modules: raise NameError(f'Node with name {node.name} already exists') self._modules[node.name] = node node.output_name = [] if len(node.inputs) == 0: self.root_names.append(node.name) for node in nodes: for l_pre in node.inputs: l_pre.output_name.append(node.name) if (getattr(node, 'has_constraint', False) and node.name not in self.layers_with_constraint): self.layers_with_constraint.append(node.name) def add_input_node(self: 'BoundedModule', node, index=None): self.add_nodes([node]) self.input_name.append(node.name) # default value for input_index if index == 'auto': index = max([0] + [(i + 1) for i in self.input_index if i is not None]) self.input_index.append(index) def delete_node(self: 'BoundedModule', node): for node_inp in node.inputs: node_inp.output_name.pop(node_inp.output_name.index(node.name)) self._modules.pop(node.name) # TODO Create a list to contain all such lists such as # "relus" and "optimizable_activations" self.relus = [ item for item in self.relus if item != node] self.optimizable_activations = [ item for item in self.optimizable_activations if item != node] def replace_node(self: 'BoundedModule', node_old, node_new): assert node_old != node_new for node in self.nodes(): for i in range(len(node.inputs)): if node.inputs[i] == node_old: node.inputs[i] = node_new node_new.output_name += node_old.output_name if self.final_name == node_old.name: self.final_name = node_new.name for i in range(len(self.output_name)): if self.output_name[i] == node_old.name: self.output_name[i] = node_new.name self.delete_node(node_old) ================================================ FILE: auto_LiRPA/eps_scheduler.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import random from .utils import logger class BaseScheduler(object): def __init__(self, max_eps, opt_str): self.parse_opts(opt_str) self.prev_loss = self.loss = self.max_eps = self.epoch_length = float("nan") self.eps = 0.0 self.max_eps = max_eps self.is_training = True self.epoch = 0 self.batch = 0 def __repr__(self): return ''.format(self.eps, self.max_eps) def parse_opts(self, s): opts = s.split(',') self.params = {} for o in opts: if o.strip(): key, val = o.split('=') self.params[key] = val def get_max_eps(self): return self.max_eps def get_eps(self): return self.eps def reached_max_eps(self): return abs(self.eps - self.max_eps) < 1e-3 def step_batch(self, verbose=False): if self.is_training: self.batch += 1 return def step_epoch(self, verbose=False): if self.is_training: self.epoch += 1 return def update_loss(self, new_loss): self.prev_loss = self.loss self.loss = new_loss def train(self): self.is_training = True def eval(self): self.is_training = False # Set how many batches in an epoch def set_epoch_length(self, epoch_length): self.epoch_length = epoch_length class FixedScheduler(BaseScheduler): def __init__(self, max_eps, opt_str=""): super(FixedScheduler, self).__init__(max_eps, opt_str) self.eps = self.max_eps class LinearScheduler(BaseScheduler): def __init__(self, max_eps, opt_str): super(LinearScheduler, self).__init__(max_eps, opt_str) self.schedule_start = int(self.params['start']) self.schedule_length = int(self.params['length']) self.epoch_start_eps = self.epoch_end_eps = 0 def __repr__(self): return ''.format( self.epoch_start_eps, self.epoch_end_eps) def step_epoch(self, verbose = True): self.epoch += 1 self.batch = 0 if self.epoch < self.schedule_start: self.epoch_start_eps = 0 self.epoch_end_eps = 0 else: eps_epoch = self.epoch - self.schedule_start if self.schedule_length == 0: self.epoch_start_eps = self.epoch_end_eps = self.max_eps else: eps_epoch_step = self.max_eps / self.schedule_length self.epoch_start_eps = min(eps_epoch * eps_epoch_step, self.max_eps) self.epoch_end_eps = min((eps_epoch + 1) * eps_epoch_step, self.max_eps) self.eps = self.epoch_start_eps if verbose: logger.info("Epoch {:3d} eps start {:7.5f} end {:7.5f}".format(self.epoch, self.epoch_start_eps, self.epoch_end_eps)) def step_batch(self): if self.is_training: self.batch += 1 eps_batch_step = (self.epoch_end_eps - self.epoch_start_eps) / self.epoch_length self.eps = self.epoch_start_eps + eps_batch_step * (self.batch - 1) if self.batch > self.epoch_length: logger.warning('Warning: we expect {} batches in this epoch but this is batch {}'.format(self.epoch_length, self.batch)) self.eps = self.epoch_end_eps class RangeScheduler(BaseScheduler): def __init__(self, max_eps, opt_str): super(RangeScheduler, self).__init__(max_eps, opt_str) self.schedule_start = int(self.params['start']) self.schedule_length = int(self.params['length']) def __repr__(self): return ''.format( self.schedule_start, self.schedule_start + self.schedule_length) def step_epoch(self, verbose = True): self.epoch += 1 if self.epoch >= self.schedule_start and self.epoch < self.schedule_start + self.schedule_length: self.eps = self.max_eps else: self.eps = 0 def step_batch(self): pass class BiLinearScheduler(LinearScheduler): def __init__(self, max_eps, opt_str): super(BiLinearScheduler, self).__init__(max_eps, opt_str) self.schedule_start = int(self.params['start']) self.schedule_length = int(self.params['length']) self.schedule_length_half = self.schedule_length / 2 self.epoch_start_eps = self.epoch_end_eps = 0 def __repr__(self): return ''.format( self.epoch_start_eps, self.epoch_end_eps) def step_epoch(self, verbose = True): self.epoch += 1 self.batch = 0 if self.epoch < self.schedule_start: self.epoch_start_eps = 0 self.epoch_end_eps = 0 else: eps_epoch = self.epoch - self.schedule_start eps_epoch_step = self.max_eps / self.schedule_length_half if eps_epoch < self.schedule_length_half: self.epoch_start_eps = min(eps_epoch * eps_epoch_step, self.max_eps) self.epoch_end_eps = min((eps_epoch + 1) * eps_epoch_step, self.max_eps) else: self.epoch_start_eps = max(0, self.max_eps - ((eps_epoch - self.schedule_length_half) * eps_epoch_step)) self.epoch_end_eps = max(0, self.epoch_start_eps - eps_epoch_step) self.eps = self.epoch_start_eps if verbose: logger.info("Epoch {:3d} eps start {:7.5f} end {:7.5f}".format(self.epoch, self.epoch_start_eps, self.epoch_end_eps)) class SmoothedScheduler(BaseScheduler): def __init__(self, max_eps, opt_str): super(SmoothedScheduler, self).__init__(max_eps, opt_str) # Epoch number to start schedule self.schedule_start = int(self.params['start']) # Epoch length for completing the schedule self.schedule_length = int(self.params['length']) # Mid point to change exponential to linear schedule self.mid_point = float(self.params.get('mid', 0.25)) # Exponential self.beta = float(self.params.get('beta', 4.0)) assert self.beta >= 2. assert self.mid_point >= 0. and self.mid_point <= 1. self.batch = 0 # Set how many batches in an epoch def set_epoch_length(self, epoch_length): if self.epoch_length != self.epoch_length: self.epoch_length = epoch_length else: if self.epoch_length != epoch_length: raise ValueError("epoch_length must stay the same for SmoothedScheduler") def step_epoch(self, verbose = True): super(SmoothedScheduler, self).step_epoch() # FIXME if verbose == False: for i in range(self.epoch_length): self.step_batch() # Smooth schedule that slowly morphs into a linear schedule. # Code is based on DeepMind's IBP implementation: # https://github.com/deepmind/interval-bound-propagation/blob/2c1a56cb0497d6f34514044877a8507c22c1bd85/interval_bound_propagation/src/utils.py#L84 def step_batch(self, verbose=False): if self.is_training: self.batch += 1 init_value = 0.0 final_value = self.max_eps beta = self.beta step = self.batch - 1 # Batch number for schedule start init_step = (self.schedule_start - 1) * self.epoch_length # Batch number for schedule end final_step = (self.schedule_start + self.schedule_length - 1) * self.epoch_length # Batch number for switching from exponential to linear schedule mid_step = int((final_step - init_step) * self.mid_point) + init_step t = (mid_step - init_step) ** (beta - 1.) # find coefficient for exponential growth, such that at mid point the gradient is the same as a linear ramp to final value alpha = (final_value - init_value) / ((final_step - mid_step) * beta * t + (mid_step - init_step) * t) # value at switching point mid_value = init_value + alpha * (mid_step - init_step) ** beta # return init_value when we have not started is_ramp = float(step > init_step) # linear schedule after mid step is_linear = float(step >= mid_step) exp_value = init_value + alpha * float(step - init_step) ** beta linear_value = min(mid_value + (final_value - mid_value) * (step - mid_step) / (final_step - mid_step), final_value) self.eps = is_ramp * ((1.0 - is_linear) * exp_value + is_linear * linear_value) + (1.0 - is_ramp) * init_value class AdaptiveScheduler(BaseScheduler): def __init__(self, max_eps, opt_str): super(AdaptiveScheduler, self).__init__(max_eps, opt_str) self.schedule_start = int(self.params['start']) self.min_eps_step = float(self.params.get('min_step', 1e-9)) self.max_eps_step = float(self.params.get('max_step', 1e-4)) self.eps_increase_thresh = float(self.params.get('increase_thresh', 1.0)) self.eps_increase_factor = float(self.params.get('increase_factor', 1.5)) self.eps_decrease_thresh = float(self.params.get('decrease_thresh', 1.5)) self.eps_decrease_factor = float(self.params.get('decrease_factor', 2.0)) self.small_loss_thresh = float(self.params.get('small_loss_thresh', 0.05)) self.epoch = 0 self.eps_step = self.min_eps_step def step_batch(self): if self.eps < self.max_eps and self.epoch >= self.schedule_start and self.is_training: if self.loss != self.loss or self.prev_loss != self.prev_loss: # First 2 steps. Use min eps step self.eps += self.min_eps_step else: # loss decreasing or loss very small. Increase eps step if self.loss < self.eps_increase_thresh * self.prev_loss or self.loss < self.small_loss_thresh: self.eps_step = min(self.eps_step * self.eps_increase_factor, self.max_eps_step) # loss increasing. Decrease eps step elif self.loss > self.eps_decrease_thresh * self.prev_loss: self.eps_step = max(self.eps_step / self.eps_decrease_factor, self.min_eps_step) # print("loss {:7.5f} prev_loss {:7.5f} eps_step {:7.5g}".format(self.loss, self.prev_loss, self.eps_step)) # increase eps according to loss self.eps = min(self.eps + self.eps_step, self.max_eps) # print("eps step size {:7.5f}, eps {:7.5f}".format(self.eps_step, self.eps)) if __name__ == "__main__": s = SmoothedScheduler(0.1, "start=2,length=10,mid=0.3") epochs = 20 batches = 10 loss = 1.0 eps = [] s.set_epoch_length(batches) for epoch in range(1,epochs+1): s.step_epoch() for batch in range(1,batches+1): s.step_batch() loss = loss * (0.975 + random.random() / 20) eps.append(s.get_eps()) print('epoch {:5d} batch {:5d} eps {:7.5f} loss {:7.5f}'.format(epoch, batch, s.get_eps(), loss)) # update_loss is only necessary for adaptive eps scheduler s.update_loss(loss) # plot epsilon values import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt plt.figure(figsize=(10,8)) plt.plot(eps) plt.xticks(range(0, epochs*batches+batches, batches)) plt.grid() plt.tight_layout() plt.savefig('epsilon.pdf') ================================================ FILE: auto_LiRPA/forward_bound.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch import warnings from .bound_ops import * from .utils import * from .linear_bound import LinearBound from .perturbations import PerturbationLpNorm from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule import sys sys.setrecursionlimit(1000000) def forward_general(self: 'BoundedModule', C=None, node:'Bound'=None, concretize=False, offset=0, from_node=False): if self.dynamic: return self.forward_general_dynamic(C=C, node=node, concretize=concretize, offset=offset) if C is None: if (hasattr(node, 'linear') and node.linear.lower is not None and node.linear.upper is not None): return node.linear.lower, node.linear.upper if not node.from_input: node.linear = LinearBound(None, node.value, None, node.value, node.value, node.value) return node.value, node.value if not node.perturbed: node.lower = node.upper = self.get_forward_value(node) if node.is_lower_bound_current(): node.linear = LinearBound(None, node.lower, None, node.upper, node.lower, node.upper) return node.lower, node.upper for l_pre in node.inputs: if not hasattr(l_pre, 'linear'): self.forward_general(node=l_pre, offset=offset, from_node=from_node) inp = [l_pre.linear for l_pre in node.inputs] node._start = '_forward' if (C is not None and type(node) is BoundLinear and not node.is_input_perturbed(1) and not node.is_input_perturbed(2)): linear = node.bound_forward(self.dim_in, *inp, C=C) C_merged = True else: linear = node.linear = node.bound_forward(self.dim_in, *inp) C_merged = False lw, uw = linear.lw, linear.uw lower, upper = linear.lb, linear.ub # Combine linear bounds with C matrix if C is not None and not C_merged: # FIXME use bound_forward of BoundLinear C_pos, C_neg = C.clamp(min=0), C.clamp(max=0) # Flatten lw, uw for matrix multiplication lw = lw.reshape(self.batch_size, self.dim_in, -1) uw = uw.reshape(self.batch_size, self.dim_in, -1) _lw = torch.matmul(lw, C_pos.transpose(-1, -2)) + torch.matmul(uw, C_neg.transpose(-1, -2)) _uw = torch.matmul(uw, C_pos.transpose(-1, -2)) + torch.matmul(lw, C_neg.transpose(-1, -2)) lw, uw = _lw, _uw # Flatten lower, upper for matrix multiplication lower = lower.reshape(self.batch_size, -1) upper = upper.reshape(self.batch_size, -1) _lower = ( torch.matmul(lower.unsqueeze(1), C_pos.transpose(-1, -2)) + torch.matmul(upper.unsqueeze(1), C_neg.transpose(-1, -2)) ) _upper = ( torch.matmul(upper.unsqueeze(1), C_pos.transpose(-1, -2)) + torch.matmul(lower.unsqueeze(1), C_neg.transpose(-1, -2)) ) lower, upper = _lower.squeeze(1), _upper.squeeze(1) logger.debug(f'Forward bounds to {node}') if concretize: if lw is not None or uw is not None: lower, upper = self.concretize_bounds( node=node, lower=lower, upper=upper, concretize_mode='forward', lw=lw, uw=uw, clip_neuron_selection_value=self.clip_neuron_selection_value, clip_neuron_selection_type=self.clip_neuron_selection_type ) linear.lower, linear.upper = lower, upper if C is None: node.linear = linear node.lower, node.upper = lower, upper if self.bound_opts['forward_refinement']: need_refinement = False for out in node.output_name: out_node = self[out] for i in getattr(out_node, 'requires_input_bounds', []): if out_node.inputs[i] == node: need_refinement = True break if need_refinement: self.forward_refinement(node) return lower, upper def forward_general_dynamic(self: 'BoundedModule', C=None, node:'Bound'=None, concretize=False, offset=0): max_dim = self.bound_opts['forward_max_dim'] if C is None: if hasattr(node, 'linear'): assert not concretize linear = node.linear if offset == 0: if linear.lw is None: return linear elif linear.lw.shape[1] <= max_dim: return linear if linear.lw is not None: lw = linear.lw[:, offset:offset+max_dim] x_L = linear.x_L[:, offset:offset+max_dim] x_U = linear.x_U[:, offset:offset+max_dim] tot_dim = linear.tot_dim if offset == 0: lb = linear.lb else: lb = torch.zeros_like(linear.lb) else: lw = x_L = x_U = None tot_dim = 0 lb = linear.lb return LinearBound( lw, lb, lw, lb, x_L=x_L, x_U=x_U, offset=offset, tot_dim=tot_dim, ) # These cases have no coefficient tensor if not node.from_input: if concretize: return node.value, node.value else: node.linear = LinearBound( None, node.value, None, node.value, node.value, node.value) return node.linear if not node.perturbed: if not node.is_lower_bound_current(): node.lower = node.upper = self.get_forward_value(node) if concretize: return node.lower, node.upper else: if offset > 0: lb = torch.zeros_like(node.lower) else: lb = node.lower node.linear = LinearBound(None, lb, None, lb, node.lower, node.upper) return node.linear if offset == 0: logger.debug(f'forward_general_dynamic: node={node}') inp = [] for l_pre in node.inputs: linear_inp = self.forward_general_dynamic(node=l_pre, offset=offset) linear_inp.lower = l_pre.lower linear_inp.upper = l_pre.upper inp.append(linear_inp) node._start = '_forward' if (C is not None and isinstance(node, BoundLinear) and not node.is_input_perturbed(1) and not node.is_input_perturbed(2)): linear = node.bound_dynamic_forward( *inp, C=C, max_dim=max_dim, offset=offset) C_merged = True else: linear = node.bound_dynamic_forward( *inp, max_dim=max_dim, offset=offset) C_merged = False if offset > 0: linear.lb = linear.ub = torch.zeros_like(linear.lb) lw, lb, tot_dim = linear.lw, linear.lb, linear.tot_dim #logger.debug(f'forward_general_dynamic: node={node}, w_size={lw.shape[1]}, tot_dim={tot_dim}') if C is not None and not C_merged: # FIXME use bound_forward of BoundLinear lw = torch.matmul(lw, C.transpose(-1, -2)) lb = torch.matmul(lb.unsqueeze(1), C.transpose(-1, -2)).squeeze(1) if concretize: lower = upper = lb if lw is not None: batch_size = lw.shape[0] assert (lw.ndim > 1) if lw.shape[1] > 0: A = lw.reshape(batch_size, lw.shape[1], -1).transpose(1, 2) ptb = PerturbationLpNorm(x_L=linear.x_L, x_U=linear.x_U) lower = lower + ptb.concretize(x=None, A=A, sign=-1).view(lb.shape) upper = upper + ptb.concretize(x=None, A=A, sign=1).view(lb.shape) offset_next = offset + max_dim more = offset_next < tot_dim else: more = False if C is None and offset == 0 and not more: node.linear = linear if more: if lw is not None and lw.shape[1] > 0: del A del ptb del lw del linear del inp # TODO make it non-recursive lower_next, upper_next = self.forward_general_dynamic( C, node, concretize=True, offset=offset_next) lower = lower + lower_next upper = upper + upper_next if C is None: node.lower, node.upper = lower, upper return lower, upper else: return linear def clean_memory(self: 'BoundedModule', node): """ Remove linear bounds that are no longer needed. """ # TODO add an option to retain these bounds for inp in node.inputs: if hasattr(inp, 'linear') and inp.linear is not None: clean = True for out in inp.output_name: out_node = self[out] if not (hasattr(out_node, 'linear') and out_node.linear is not None): clean = False if clean: if isinstance(inp.linear, tuple): for item in inp.linear: del item delattr(inp, 'linear') def forward_refinement(self: 'BoundedModule', node): """ Refine forward bounds with backward bound propagation (only refine unstable positions). """ unstable_size_before = torch.logical_and(node.lower < 0, node.upper > 0).sum() if unstable_size_before == 0: return unstable_idx, unstable_size = self.get_unstable_locations( node.lower, node.upper, conv=isinstance(node, BoundConv)) logger.debug(f'Forward refinement for {node}') batch_size = node.lower.shape[0] ret = self.batched_backward( node, C=None, unstable_idx=unstable_idx, batch_size=batch_size) self.restore_sparse_bounds( node, unstable_idx, unstable_size, node.lower, node.upper, new_lower=ret[0], new_upper=ret[1]) unstable_size_after = torch.logical_and(node.lower < 0, node.upper > 0).sum() logger.debug(f' Unstable neurons: {unstable_size_before} -> {unstable_size_after}') # TODO also update linear bounds? def init_forward(self: 'BoundedModule', roots, dim_in): if dim_in == 0: raise ValueError("At least one node should have a specified perturbation") prev_dim_in = 0 # Assumption: roots[0] is the input node which implies batch_size batch_size = roots[0].value.shape[0] for i in range(len(roots)): if hasattr(roots[i], 'perturbation') and roots[i].perturbation is not None: shape = roots[i].linear.lw.shape if self.dynamic: if shape[1] != dim_in: raise NotImplementedError('Dynamic forward bound is not supported yet when there are multiple perturbed inputs.') ptb = roots[i].perturbation if (type(ptb) != PerturbationLpNorm or ptb.norm < np.inf or ptb.x_L is None or ptb.x_U is None): raise NotImplementedError( 'For dynamic forward bounds, only Linf (box) perturbations are supported, and x_L and x_U must be explicitly provided.') roots[i].linear.x_L = ( ptb.x_L_sparse.view(batch_size, -1) if ptb.sparse else ptb.x_L.view(batch_size, -1)) roots[i].linear.x_U = ( ptb.x_U_sparse.view(batch_size, -1) if ptb.sparse else ptb.x_U.view(batch_size, -1)) else: lw = torch.zeros(shape[0], dim_in, *shape[2:]).to(roots[i].linear.lw) lw[:, prev_dim_in:(prev_dim_in+shape[1])] = roots[i].linear.lw if roots[i].linear.lw.data_ptr() == roots[i].linear.uw.data_ptr(): uw = lw else: uw = torch.zeros(shape[0], dim_in, *shape[2:]).to(roots[i].linear.uw) uw[:, prev_dim_in:(prev_dim_in+shape[1])] = roots[i].linear.uw roots[i].linear.lw = lw roots[i].linear.uw = uw if i >= self.num_global_inputs: roots[i].forward_value = roots[i].forward_value.unsqueeze(0).repeat( *([batch_size] + [1] * self.forward_value.ndim)) prev_dim_in += shape[1] else: b = fv = roots[i].forward_value shape = fv.shape if roots[i].from_input: w = torch.zeros(shape[0], dim_in, *shape[1:], device=self.device) warnings.warn(f'Creating a LinearBound with zero weights with shape {w.shape}') else: w = None roots[i].linear = LinearBound(w, b, w, b, b, b) roots[i].lower = roots[i].upper = b ================================================ FILE: auto_LiRPA/interval_bound.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch from .bound_ops import * from .utils import logger from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule def IBP_general(self: 'BoundedModule', node=None, C=None, delete_bounds_after_use=False): logger.debug('IBP for %s', node) def _delete_unused_bounds(node_list: List[Bound]): """Delete bounds from input layers after use to save memory. Used when sparse_intermediate_bounds_with_ibp is true.""" if delete_bounds_after_use: for n in node_list: del n.interval n.delete_lower_and_upper_bounds() if self.bound_opts.get('loss_fusion', False): res = self._IBP_loss_fusion(node, C) if res is not None: return res if not node.perturbed: fv = self.get_forward_value(node) node.lower, node.upper = node.interval = (fv, fv) to_be_deleted_bounds = [] if not hasattr(node, 'interval'): for n in node.inputs: if not hasattr(n, 'interval'): # Node n does not have interval bounds; we must compute it. self.IBP_general( n, delete_bounds_after_use=delete_bounds_after_use) to_be_deleted_bounds.append(n) inp = [n_pre.interval for n_pre in node.inputs] if (C is not None and isinstance(node, BoundLinear) and not node.is_input_perturbed(1)): # merge the last BoundLinear node with the specification, available # when weights of this layer are not perturbed ret = node.interval_propagate(*inp, C=C) _delete_unused_bounds(to_be_deleted_bounds) return ret else: node.interval = node.interval_propagate(*inp) node.lower, node.upper = node.interval if isinstance(node.lower, torch.Size): node.lower = torch.tensor(node.lower) if isinstance(node.upper, torch.Size): node.upper = torch.tensor(node.upper) # Handle NaNs in lower and upper bounds if torch.isnan(node.lower).any(): print( f'[Interval Warning] NaN detected in lower bounds of node {node}. ' f'Replacing with -inf.' ) node.lower = torch.where( torch.isnan(node.lower), torch.full_like(node.lower, float('-inf')), node.lower ) if torch.isnan(node.upper).any(): print( f'[Interval Warning] NaN detected in upper bounds of node {node}. ' f'Replacing with +inf.' ) node.upper = torch.where( torch.isnan(node.upper), torch.full_like(node.upper, float('inf')), node.upper ) node.interval = Interval.make_interval(node.lower, node.upper, other=node.interval) if C is not None: _delete_unused_bounds(to_be_deleted_bounds) return BoundLinear.interval_propagate(None, node.interval, C=C) else: _delete_unused_bounds(to_be_deleted_bounds) return node.interval def _IBP_loss_fusion(self: 'BoundedModule', node, C): """Merge BoundLinear, BoundGatherElements and BoundSub. Improvement when loss fusion is used in training. """ # not using loss fusion if not self.bound_opts.get('loss_fusion', False): return None # Currently this function has issues in more complicated networks. if self.bound_opts.get('no_ibp_loss_fusion', False): return None if (C is None and isinstance(node, BoundSub) and isinstance(node.inputs[1], BoundGatherElements) and isinstance(node.inputs[0], BoundLinear)): node_gather = node.inputs[1] node_linear = node.inputs[0] node_start = node_linear.inputs[0] w = node_linear.inputs[1].param b = node_linear.inputs[2].param labels = node_gather.inputs[1] if not hasattr(node_start, 'interval'): self.IBP_general(node_start) for n in node_gather.inputs: if not hasattr(n, 'interval'): self.IBP_general(n) if torch.isclose(labels.lower, labels.upper, 1e-8).all(): labels = labels.lower batch_size = labels.shape[0] w = w.expand(batch_size, *w.shape) w = w - torch.gather( w, dim=1, index=labels.unsqueeze(-1).repeat(1, w.shape[1], w.shape[2])) b = b.expand(batch_size, *b.shape) b = b - torch.gather(b, dim=1, index=labels.repeat(1, b.shape[1])) lower, upper = node_start.interval lower, upper = lower.unsqueeze(1), upper.unsqueeze(1) node.lower, node.upper = node_linear.interval_propagate( (lower, upper), (w, w), (b.unsqueeze(1), b.unsqueeze(1))) node.interval = node.lower, node.upper = ( node.lower.squeeze(1), node.upper.squeeze(1)) return node.interval return None def check_IBP_intermediate(self: 'BoundedModule', node): """ Check if we use IBP bounds to compute intermediate bounds on this node. Currently, assume all eligible operators have exactly one input. """ tighten_input_bounds = ( self.bound_opts['optimize_bound_args']['tighten_input_bounds'] ) directly_optimize_layer_names = ( self.bound_opts['optimize_bound_args']['directly_optimize'] ) if isinstance(node, BoundInput) and tighten_input_bounds: return False if node.name in directly_optimize_layer_names: return False if self.ibp_nodes is not None and node.name in self.ibp_nodes: self.IBP_general(node) return True if (isinstance(node, BoundReshape) and node.inputs[0].is_lower_bound_current() and hasattr(node.inputs[1], 'value')): # Node for input value. val_input = node.inputs[0] # Node for input parameter (e.g., shape, permute) arg_input = node.inputs[1] node.lower = node.forward(val_input.lower, arg_input.value) node.upper = node.forward(val_input.upper, arg_input.value) node.interval = (node.lower, node.upper) return True # Use IBP if node.ibp_intermediate == True (for nodes such as ReLU) nodes = [] while (not node.is_lower_bound_current() or not node.is_upper_bound_current()): if not node.ibp_intermediate: return False nodes.append(node) node = node.inputs[0] nodes.reverse() for n in nodes: self.IBP_general(n) return True def check_IBP_first_linear(self: 'BoundedModule', node): """Here we avoid creating a big C matrix in the first linear layer. Disable this optimization when we have beta for intermediate layer bounds. Disable this optimization when we need the A matrix of the first nonlinear layer, forcibly use CROWN to record A matrix. """ tighten_input_bounds = ( self.bound_opts['optimize_bound_args']['tighten_input_bounds'] ) directly_optimize_layer_names = ( self.bound_opts['optimize_bound_args']['directly_optimize'] ) if isinstance(node, BoundInput) and tighten_input_bounds: return False if node.name in directly_optimize_layer_names: return False # This is the list of all intermediate layers where we need to refine. if self.intermediate_constr is not None: intermediate_beta_enabled_layers = [ k for v in self.intermediate_constr.values() for k in v] else: intermediate_beta_enabled_layers = [] if (node.name not in self.needed_A_dict.keys() and (type(node) == BoundLinear or type(node) == BoundConv and node.name not in intermediate_beta_enabled_layers)): if type(node.inputs[0]) == BoundInput: node.lower, node.upper = self.IBP_general(node) return True return False def compare_with_IBP(self, node, lower, upper, C=None): """Re-compute the bounds by IBP given the existing intermediate bounds. Update the bounds if IBP gives tighter bounds.""" lower_ibp, upper_ibp = self.IBP_general(node, C=C, delete_bounds_after_use=True) if lower is not None: lower = torch.max(lower, lower_ibp) if upper is not None: upper = torch.min(upper, upper_ibp) return lower, upper ================================================ FILE: auto_LiRPA/jacobian.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """Handle Jacobian bounds.""" import torch from auto_LiRPA.bound_ops import JacobianOP, GradNorm # pylint: disable=unused-import from auto_LiRPA.bound_ops import ( BoundInput, BoundAdd, BoundRelu, BoundJacobianInit, BoundJacobianOP) from auto_LiRPA.utils import logger, prod from collections import deque from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule def _expand_jacobian(self): self.jacobian_start_nodes = [] for node in list(self.nodes()): if isinstance(node, BoundJacobianOP): self.jacobian_start_nodes.append(node.inputs[0]) expand_jacobian_node(self, node) if self.jacobian_start_nodes: # Disable unstable options self.bound_opts.update({ 'sparse_intermediate_bounds': False, 'sparse_conv_intermediate_bounds': False, 'sparse_intermediate_bounds_with_ibp': False, 'sparse_features_alpha': False, 'sparse_spec_alpha': False, }) # Optimize new nodes if possible self._optimize_graph() for node in self.nodes(): if isinstance(node, BoundRelu): node.use_sparse_spec_alpha = node.use_sparse_features_alpha = False # If Jacobian nodes are added, we need to redo the forward pass to update the # properties of newly added nodes (e.g., output shape, forward value, etc.) self.forward(*self.global_input) def expand_jacobian_node(self, jacobian_node): logger.info(f'Expanding Jacobian node {jacobian_node}') output_node = jacobian_node.inputs[0] input_node = jacobian_node.inputs[1] batch_size = output_node.output_shape[0] output_dim = prod(output_node.output_shape[1:]) # Gradient values in `grad` may not be accurate. We do not consider gradient # accumulation from multiple succeeding nodes. We only want the shapes but # not the accurate values. grad = {} # Dummy values in grad_start grad_start = torch.ones(batch_size, output_dim, *output_node.output_shape[1:], device=self.device) grad[output_node.name] = grad_start input_node_found = False # First BFS pass: traverse the graph, count degrees, and build gradient # layers. # Degrees of nodes. degree = {} # Original layer for gradient computation. node_grad_ori = {} degree[output_node.name] = 0 queue = deque([output_node]) while len(queue) > 0: node = queue.popleft() if node == input_node: input_node_found = True continue elif node.no_jacobian or not node.from_input: continue else: node_grad_ori[node.name] = node.build_gradient_node(grad[node.name]) node_grad_ori[node.name] += [None] * ( len(node.inputs) - len(node_grad_ori[node.name])) logger.debug(f'Building gradient node for {node}') if not isinstance(node, BoundInput): for i in range(len(node.inputs)): if node_grad_ori[node.name][i] is None: continue grad[node.inputs[i].name] = node_grad_ori[ node.name][i][0](*node_grad_ori[node.name][i][1]) if not node.inputs[i].name in degree: degree[node.inputs[i].name] = 0 queue.append(node.inputs[i]) degree[node.inputs[i].name] += 1 if not input_node_found: raise RuntimeError('Input node not found') # Second BFS pass: build the backward computational graph grad_node = {} initial_name = f'/jacobian{output_node.name}{output_node.name}' grad_node[output_node.name] = BoundJacobianInit(inputs=[output_node]) grad_node[output_node.name].name = initial_name self.add_nodes([grad_node[output_node.name]]) queue = deque([output_node]) while len(queue) > 0: node = queue.popleft() if node == input_node: self.replace_node(jacobian_node, grad_node[node.name]) continue if node.no_jacobian or not node.from_input: continue logger.debug(f'Converting gradient node for {node}') for k in range(len(node.inputs)): if node_grad_ori[node.name][k] is None: continue nodes_op, nodes_in, nodes_out, _ = self._convert_nodes( node_grad_ori[node.name][k][0], tuple(item.detach() for item in node_grad_ori[node.name][k][1])) rename_dict = {} assert isinstance(nodes_in[0], BoundInput) rename_dict[nodes_in[0].name] = grad_node[node.name].name for i in range(1, len(nodes_in)): # Assume it's a parameter here new_name = f'/jacobian{output_node.name}{node.name}/{k}/params{nodes_in[i].name}' rename_dict[nodes_in[i].name] = new_name for i in range(len(nodes_op)): # intermediate nodes if not nodes_op[i].name in rename_dict: new_name = f'/jacobian{output_node.name}{node.name}/{k}/tmp{nodes_op[i].name}' rename_dict[nodes_op[i].name] = new_name assert len(nodes_out) == 1 nodes_out = nodes_out[0] rename_dict[nodes_out.name] = f'/jacobian{output_node.name}{node.name}/{k}/output' self.rename_nodes(nodes_op, nodes_in, rename_dict) input_nodes_replace = ( [self._modules[nodes_in[0].name]] + node_grad_ori[node.name][k][2]) for i in range(len(input_nodes_replace)): for n in nodes_op: for j in range(len(n.inputs)): if n.inputs[j].name == nodes_in[i].name: n.inputs[j] = input_nodes_replace[i] self.add_nodes(nodes_op + nodes_in[len(input_nodes_replace):]) if node.inputs[k].name in grad_node: node_cur = grad_node[node.inputs[k].name] node_add = BoundAdd( attr=None, inputs=[node_cur, nodes_out], output_index=0, options={}) node_add.name = f'{nodes_out.name}/add' grad_node[node.inputs[k].name] = node_add self.add_nodes([node_add]) else: grad_node[node.inputs[k].name] = nodes_out degree[node.inputs[k].name] -= 1 if degree[node.inputs[k].name] == 0: queue.append(node.inputs[k]) def compute_jacobian_bounds(self: 'BoundedModule', x, optimize=True, optimize_output_node=None, bound_lower=True, bound_upper=True): """Compute jacobian bounds on the pre-augmented graph (new API).""" if isinstance(x, torch.Tensor): x = (x,) if optimize: if optimize_output_node is None: if len(self.jacobian_start_nodes) == 1: optimize_output_node = self.jacobian_start_nodes[0] else: raise NotImplementedError( 'Multiple Jacobian nodes found.' 'An output node for optimizable bounds (optimize_output_node) ' 'must be specified explicitly') self.compute_bounds( method='CROWN-Optimized', C=None, x=x, bound_upper=False, final_node_name=optimize_output_node.name) intermediate_bounds = {} for node in self._modules.values(): if node.is_lower_bound_current(): intermediate_bounds[node.name] = (node.lower, node.upper) else: intermediate_bounds = None lb, ub = self.compute_bounds( method='CROWN', x=x, bound_lower=bound_lower, bound_upper=bound_upper, interm_bounds=intermediate_bounds) return lb, ub ================================================ FILE: auto_LiRPA/linear_bound.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### class LinearBound: def __init__( self, lw=None, lb=None, uw=None, ub=None, lower=None, upper=None, from_input=None, x_L=None, x_U=None, offset=0, tot_dim=None): self.lw = lw self.lb = lb self.uw = uw self.ub = ub self.lower = lower self.upper = upper self.from_input = from_input self.x_L = x_L self.x_U = x_U # Offset for input variables. Used for batched forward bound # propagation. self.offset = offset if tot_dim is not None: self.tot_dim = tot_dim elif lw is not None: self.tot_dim = lw.shape[1] else: self.tot_dim = 0 def is_single_bound(self): """Check whether the linear lower bound and the linear upper bound are the same.""" if (self.lw is not None and self.uw is not None and self.lb is not None and self.ub is not None): return (self.lw.data_ptr() == self.uw.data_ptr() and self.lb.data_ptr() == self.ub.data_ptr() and self.x_L is not None and self.x_U is not None) else: return True ================================================ FILE: auto_LiRPA/operators/__init__.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .base import * from .linear import * from .convolution import * from .pooling import * from .activation_base import * from .activations import * from .s_shaped import * from .relu import * from .bivariate import * from .add_sub import * from .normalization import * from .shape import * from .reduce import * from .rnn import * from .softmax import * from .constant import * from .leaf import * from .logical import * from .dropout import * from .dtype import * from .trigonometric import * from .cut_ops import * from .solver_utils import grb from .resize import * from .jacobian import * from .indexing import * from .slice_concat import * from .reshape import * from .minmax import * from .convex_concave import * from .gelu import * from .tile import * ================================================ FILE: auto_LiRPA/operators/activation_base.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Activation operators or other unary nonlinear operators""" import torch from torch import Tensor from collections import OrderedDict from .base import * from .clampmult import multiply_by_A_signs torch._C._jit_set_profiling_executor(False) torch._C._jit_set_profiling_mode(False) class BoundActivation(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.requires_input_bounds = [0] self.use_default_ibp = True self.splittable = False # "core" region of input where precomputation can be done self.range_l = -10 self.range_u = 10 def _init_masks(self, x): self.mask_pos = x.lower >= 0 self.mask_neg = x.upper <= 0 self.mask_both = torch.logical_not(torch.logical_or(self.mask_pos, self.mask_neg)) def init_linear_relaxation(self, x): self._init_masks(x) self.lw = torch.zeros_like(x.lower) self.lb = self.lw.clone() self.uw = self.lw.clone() self.ub = self.lw.clone() def add_linear_relaxation(self, mask, type, k, x0, y0=None): if y0 is None: y0 = self.forward(x0) if type == 'lower': w_out, b_out = self.lw, self.lb else: w_out, b_out = self.uw, self.ub if mask is None: if isinstance(k, Tensor) and k.ndim > 0: w_out[:] = k else: w_out.fill_(k) else: w_out[..., mask] = (k[..., mask].to(w_out) if isinstance(k, Tensor) else k) if (not isinstance(x0, Tensor) and x0 == 0 and not isinstance(y0, Tensor) and y0 == 0): pass else: b = -x0 * k + y0 if mask is None: if b.ndim > 0: b_out[:] = b else: b_out.fill_(b) else: b_out[..., mask] = b[..., mask] def bound_relax(self, x, init=False): return not_implemented_op(self, 'bound_relax') def bound_backward(self, last_lA, last_uA, x, reduce_bias=True, **kwargs): self.bound_relax(x, init=True) def _bound_oneside(last_A, sign=-1): if last_A is None: return None, 0 if sign == -1: w_pos, b_pos, w_neg, b_neg = ( self.lw.unsqueeze(0), self.lb.unsqueeze(0), self.uw.unsqueeze(0), self.ub.unsqueeze(0)) else: w_pos, b_pos, w_neg, b_neg = ( self.uw.unsqueeze(0), self.ub.unsqueeze(0), self.lw.unsqueeze(0), self.lb.unsqueeze(0)) w_pos = maybe_unfold_patches(w_pos, last_A) w_neg = maybe_unfold_patches(w_neg, last_A) b_pos = maybe_unfold_patches(b_pos, last_A) b_neg = maybe_unfold_patches(b_neg, last_A) if self.batch_dim == 0: _A, _bias = multiply_by_A_signs( last_A, w_pos, w_neg, b_pos, b_neg, reduce_bias=reduce_bias) elif self.batch_dim == -1: # FIXME: why this is different from above? assert reduce_bias mask = torch.gt(last_A, 0.).to(torch.float) _A = last_A * (mask * w_pos.unsqueeze(1) + (1 - mask) * w_neg.unsqueeze(1)) _bias = last_A * (mask * b_pos.unsqueeze(1) + (1 - mask) * b_neg.unsqueeze(1)) if _bias.ndim > 2: _bias = torch.sum(_bias, dim=list(range(2, _bias.ndim))) else: raise NotImplementedError return _A, _bias lA, lbias = _bound_oneside(last_lA, sign=-1) uA, ubias = _bound_oneside(last_uA, sign=+1) return [(lA, uA)], lbias, ubias @staticmethod @torch.jit.script def bound_forward_w( relax_lw: Tensor, relax_uw: Tensor, x_lw: Tensor, x_uw: Tensor, dim: int): lw = (relax_lw.unsqueeze(dim).clamp(min=0) * x_lw + relax_lw.unsqueeze(dim).clamp(max=0) * x_uw) uw = (relax_uw.unsqueeze(dim).clamp(max=0) * x_lw + relax_uw.unsqueeze(dim).clamp(min=0) * x_uw) return lw, uw @staticmethod @torch.jit.script def bound_forward_b( relax_lw: Tensor, relax_uw: Tensor, relax_lb: Tensor, relax_ub: Tensor, x_lb: Tensor, x_ub: Tensor): lb = relax_lw.clamp(min=0) * x_lb + relax_lw.clamp(max=0) * x_ub + relax_lb ub = relax_uw.clamp(max=0) * x_lb + relax_uw.clamp(min=0) * x_ub + relax_ub return lb, ub def bound_forward(self, dim_in, x): self.bound_relax(x, init=True) assert (x.lw is None) == (x.uw is None) dim = 1 if self.lw.ndim > 0 else 0 if x.lw is not None: lw, uw = BoundActivation.bound_forward_w( self.lw, self.uw, x.lw, x.uw, dim) else: lw = uw = None lb, ub = BoundActivation.bound_forward_b( self.lw, self.uw, self.lb, self.ub, x.lb, x.ub) return LinearBound(lw, lb, uw, ub) def interval_propagate(self, *v): h_L, h_U = v[0][0], v[0][1] return self.forward(h_L), self.forward(h_U) def get_split_mask(self, lower, upper, input_index): """Return a mask to indicate if each neuron potentially needs a split. 0: Stable (linear) neuron; 1: unstable (nonlinear) neuron. """ return torch.ones_like(lower, dtype=torch.bool) # Return heuristic to select which neuron should use constraints_solving concretization def compute_bound_improvement_heuristics(self, lower, upper): """Return a heuristic score for each lower-upper bound pair. It indicates the possible bound improvement for each neuron. We will then choose if a neuron's bound needs further tightened based on the heuristic. """ return (-lower * upper).clamp(min=0) def get_unstable_mask(self, lower, upper): """Return a mask to indicate if each neuron is unstable. Here we mark all the neurons as stable by default. 0: Stable (linear) neuron; 1: unstable (nonlinear) neuron. """ return torch.ones_like(lower, dtype=torch.bool) class BoundOptimizableActivation(BoundActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) if 'optimize_bound_args' not in self.options: self.options['optimize_bound_args'] = {} self.optimizable = True # Stages: # * `init`: initializing parameters # * `opt`: optimizing parameters # * `reuse`: not optimizing parameters but reuse saved values # If `None`, it means activation optimization is currently not used. self.opt_stage = None self.alpha = OrderedDict() # Save patch sizes during bound_backward() for each output_node. self.patch_size = {} # A torch.bool mask of shape Tensor([batch_size]) that conditions the # sample of alpha and beta to update # If set to None, update all samples # If not None, select those corresponding to 1 to update def opt_init(self): """Enter the stage for initializing bound optimization. Optimized bounds are not used in this stage.""" self.opt_stage = 'init' def opt_start(self): """Start optimizing bounds.""" self.opt_stage = 'opt' def opt_reuse(self): """ Reuse optimizing bounds """ self.opt_stage = 'reuse' def opt_no_reuse(self): """ Finish reusing optimized bounds """ if self.opt_stage == 'reuse': self.opt_stage = None def opt_end(self): """ End optimizing bounds """ self.opt_stage = None def clip_alpha(self): pass def init_opt_parameters(self, start_nodes): """ start_nodes: a list of starting nodes [(node, size)] during CROWN backward bound propagation""" self.alpha = OrderedDict() for start_node in start_nodes: ns, size_s = start_node[:2] # TODO do not give torch.Size if isinstance(size_s, (torch.Size, list, tuple)): size_s = prod(size_s) self.alpha[ns] = self._init_opt_parameters_impl(size_s, name_start=ns) def _init_opt_parameters_impl(self, size_spec, name_start=None): """Implementation of init_opt_parameters for each start_node.""" raise NotImplementedError def init_linear_relaxation(self, x, dim_opt=None): self._init_masks(x) # The first dimension of size 2 is used for lA and uA respectively, # when computing intermediate bounds. if self.opt_stage in ['opt', 'reuse'] and dim_opt is not None: # For optimized bounds, we have independent lw for each output # dimension for bound optimization. # If the output layer is a fully connected layer, len(dim_opt) = 1. # If the output layer is a conv layer, len(dim_opt) = 3 but we only # use the out_c dimension to create slopes/bias. # Variables are shared among out_h, out_w dimensions so far. if isinstance(dim_opt, int): dim = dim_opt elif isinstance(dim_opt, torch.Size): dim = prod(dim_opt) else: dim = dim_opt[0] self.lw = torch.zeros(2, dim, *x.lower.shape).to(x.lower) else: # Without optimized bounds, the lw, lb (slope, biase) etc only # depend on intermediate layer bounds, # and are shared among different output dimensions. self.lw = torch.zeros_like(x.lower) self.lb = self.lw.clone() self.uw = self.lw.clone() self.ub = self.lw.clone() def bound_relax(self, x, init=False, dim_opt=None): return not_implemented_op(self, 'bound_relax') def bound_backward(self, last_lA, last_uA, x, start_node=None, start_shape=None, reduce_bias=True, **kwargs): self._start = start_node.name if self.opt_stage not in ['opt', 'reuse']: last_A = last_lA if last_lA is not None else last_uA # Returned [(lA, uA)], lbias, ubias As, lbias, ubias = super().bound_backward( last_lA, last_uA, x, reduce_bias=reduce_bias) if isinstance(last_A, Patches): A_prod = As[0][1].patches if As[0][0] is None else As[0][1].patches # FIXME: Unify this function with BoundReLU # Save the patch size, which will be used in init_slope() to # determine the number of optimizable parameters. if start_node is not None: if last_A.unstable_idx is not None: # Sparse patches, we need to construct the full patch size: # (out_c, batch, out_h, out_w, c, h, w). self.patch_size[start_node.name] = [ last_A.output_shape[1], A_prod.size(1), last_A.output_shape[2], last_A.output_shape[3], A_prod.size(-3), A_prod.size(-2), A_prod.size(-1)] else: # Regular patches. self.patch_size[start_node.name] = A_prod.size() return As, lbias, ubias assert self.batch_dim == 0 self.bound_relax(x, init=True, dim_opt=start_shape) def _bound_oneside(last_A, sign=-1): if last_A is None: return None, 0 if sign == -1: w_pos, b_pos, w_neg, b_neg = self.lw[0], self.lb[0], self.uw[0], self.ub[0] else: w_pos, b_pos, w_neg, b_neg = self.uw[1], self.ub[1], self.lw[1], self.lb[1] w_pos = maybe_unfold_patches(w_pos, last_A) w_neg = maybe_unfold_patches(w_neg, last_A) b_pos = maybe_unfold_patches(b_pos, last_A) b_neg = maybe_unfold_patches(b_neg, last_A) unstable_idx = kwargs.get('unstable_idx', None) if unstable_idx is not None: assert isinstance(unstable_idx, Tensor) and unstable_idx.ndim == 1 # Shape is (spec, batch, neurons). # FIXME: Sigmoid and other activation functions should also support # sparse-spec alpha, so alpha will be created with a smaller shape. w_pos = self.non_deter_index_select(w_pos, index=unstable_idx, dim=0) w_neg = self.non_deter_index_select(w_neg, index=unstable_idx, dim=0) b_pos = self.non_deter_index_select(b_pos, index=unstable_idx, dim=0) b_neg = self.non_deter_index_select(b_neg, index=unstable_idx, dim=0) A_prod, _bias = multiply_by_A_signs( last_A, w_pos, w_neg, b_pos, b_neg, reduce_bias) return A_prod, _bias lA, lbias = _bound_oneside(last_lA, sign=-1) uA, ubias = _bound_oneside(last_uA, sign=+1) return [(lA, uA)], lbias, ubias def _no_bound_parameters(self): raise AttributeError('Bound parameters have not been initialized.' 'Please call `compute_bounds` with `method=CROWN-optimized`' ' at least once.') def _transfer_alpha(self, alpha, device=None, dtype=None, non_blocking=False, require_grad=False): alpha = {spec_name: transfer(alpha_value, device=device, dtype=dtype, non_blocking=non_blocking).detach().requires_grad_(require_grad) for spec_name, alpha_value in alpha.items()} return alpha def dump_alpha(self, device=None, dtype=None, non_blocking=False): """ Dump alpha parameters to a dictionary. """ return {'alpha': self._transfer_alpha(self.alpha, device=device, dtype=dtype, non_blocking=non_blocking, require_grad=False)} def restore_alpha(self, alpha, device=None, dtype=None, non_blocking=False): """ Restore alpha parameters from a dictionary. """ self.alpha = self._transfer_alpha(alpha['alpha'], device=device, dtype=dtype, non_blocking=non_blocking, require_grad=True) def drop_unused_alpha(self, keep_nodes): """ Drop unused alpha parameters based on the keep_nodes. This function is not used in auto_LiRPA for now, but is used in alpha-beta-CROWN. """ for spec_name in list(self.alpha.keys()): if spec_name not in keep_nodes: del self.alpha[spec_name] ================================================ FILE: auto_LiRPA/operators/activations.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Activation operators or other unary nonlinear operators, not including those placed in separate files.""" import torch from torch.nn import Module from .base import * from .activation_base import BoundActivation, BoundOptimizableActivation from .clampmult import multiply_by_A_signs torch._C._jit_set_profiling_executor(False) torch._C._jit_set_profiling_mode(False) class BoundSoftplus(BoundActivation): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.softplus = nn.Softplus() def forward(self, x): return self.softplus(x) class BoundAbs(BoundActivation): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.ibp_intermediate = True def forward(self, x): return x.abs() def bound_relax(self, x, init=False): if init: self.init_linear_relaxation(x) x_L = x.lower.clamp(max=0) x_U = torch.max(x.upper.clamp(min=0), x_L + 1e-8) # upper_k: connect (x_L, |x_L|) and (x_U, |x_U|) upper_k = (x_U.abs() - x_L.abs()) / (x_U - x_L) # lower_k: choose between -1 and 1 depending on which is closer to zero lower_k = (x_U > -x_L).to(x_L) * 2 - 1 self.add_linear_relaxation(mask=None, type='upper', k=upper_k, x0=x_L) self.add_linear_relaxation(mask=None, type='lower', k=lower_k, x0=0, y0=0) def bound_backward(self, last_lA, last_uA, x, **kwargs): x_L = x.lower.clamp(max=0) x_U = torch.max(x.upper.clamp(min=0), x_L + 1e-8) mask_neg = x_U <= 0 mask_pos = x_L >= 0 y_L = x_L.abs() y_U = x_U.abs() upper_k = (y_U - y_L) / (x_U - x_L) upper_b = y_L - upper_k * x_L # TODO: Here for the "mask_both" case lower_k = 0, but not sure if it's optimal. # lower_b should just be 0? lower_k = (mask_neg * (-1.0) + mask_pos * 1.0) lower_b = (mask_neg + mask_pos) * (y_L - lower_k * x_L) if last_uA is not None: # Special case if we only want the upper bound with non-negative coefficients if last_uA.min() >= 0: uA = last_uA * upper_k ubias = self.get_bias(last_uA, upper_b) else: last_uA_pos = last_uA.clamp(min=0) last_uA_neg = last_uA.clamp(max=0) uA = last_uA_pos * upper_k + last_uA_neg * lower_k ubias = (self.get_bias(last_uA_pos, upper_b) + self.get_bias(last_uA_neg, lower_b)) else: uA, ubias = None, 0 if last_lA is not None: if last_lA.max() <= 0: lA = last_lA * upper_k lbias = self.get_bias(last_lA, upper_b) else: last_lA_pos = last_lA.clamp(min=0) last_lA_neg = last_lA.clamp(max=0) lA = last_lA_pos * lower_k + last_lA_neg * upper_k lbias = (self.get_bias(last_lA_pos, lower_b) + self.get_bias(last_lA_neg, upper_b)) else: lA, lbias = None, 0 return [(lA, uA)], lbias, ubias def interval_propagate(self, *v): h_L, h_U = v[0][0], v[0][1] lower = ((h_U < 0) * h_U.abs() + (h_L > 0) * h_L.abs()) upper = torch.max(h_L.abs(), h_U.abs()) return lower, upper class BoundATenHeaviside(BoundOptimizableActivation): def forward(self, *x): self.input_shape = x[0].shape # x[0]: input; x[1]: value when x == 0 return torch.heaviside(x[0], x[1]) def interval_propagate(self, *v): assert not self.is_input_perturbed(1) return self.forward(v[0][0], v[1][0]), self.forward(v[0][1], v[1][0]) def _init_opt_parameters_impl(self, size_spec, name_start): """Implementation of init_opt_parameters for each start_node.""" l = self.inputs[0].lower return torch.zeros_like(l).unsqueeze(0).repeat(2, *[1] * l.ndim) def clip_alpha(self): for v in self.alpha.values(): v.data = torch.clamp(v.data, 0., 1.) def bound_backward(self, last_lA, last_uA, *x, start_node=None, start_shape=None, **kwargs): x = x[0] if x is not None: lb_r = x.lower ub_r = x.upper else: lb_r = self.lower ub_r = self.upper if self.opt_stage not in ['opt', 'reuse']: # zero slope: upper_d = torch.zeros_like(lb_r, device=lb_r.device, dtype=lb_r.dtype) lower_d = torch.zeros_like(ub_r, device=ub_r.device, dtype=ub_r.dtype) else: upper_d = self.alpha[start_node.name][0].clamp(0, 1) * (1. / (-lb_r).clamp(min=1e-3)) lower_d = self.alpha[start_node.name][1].clamp(0, 1) * (1. / (ub_r.clamp(min=1e-3))) upper_b = torch.ones_like(lb_r, device=lb_r.device, dtype=lb_r.dtype) lower_b = torch.zeros_like(lb_r, device=lb_r.device, dtype=lb_r.dtype) # For stable neurons, set fixed slope and bias. ub_mask = (ub_r <= 0).to(dtype=ub_r.dtype) lb_mask = (lb_r >= 0).to(dtype=lb_r.dtype) upper_b = upper_b - upper_b * ub_mask lower_b = lower_b * (1. - lb_mask) + lb_mask upper_d = upper_d - upper_d * ub_mask - upper_d * lb_mask lower_d = lower_d - lower_d * lb_mask - lower_d * ub_mask upper_d = upper_d.unsqueeze(0) lower_d = lower_d.unsqueeze(0) # Choose upper or lower bounds based on the sign of last_A uA = lA = None ubias = lbias = 0 if last_uA is not None: neg_uA = last_uA.clamp(max=0) pos_uA = last_uA.clamp(min=0) uA = upper_d * pos_uA + lower_d * neg_uA ubias = (pos_uA * upper_b + neg_uA * lower_b).flatten(2).sum(-1) if last_lA is not None: neg_lA = last_lA.clamp(max=0) pos_lA = last_lA.clamp(min=0) lA = upper_d * neg_lA + lower_d * pos_lA lbias = (pos_lA * lower_b + neg_lA * upper_b).flatten(2).sum(-1) return [(lA, uA), (None, None)], lbias, ubias class BoundSqr(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.splittable = True def forward(self, x): return x ** 2 def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) upper_k = x.lower + x.upper # Upper bound: connect the two points (x_l, x_l^2) and (x_u, x_u^2). # The upper bound should always be better than IBP. self.add_linear_relaxation( mask=None, type='upper', k=upper_k, x0=x.lower) if self.opt_stage in ['opt', 'reuse']: mid = self.alpha[self._start] else: # Lower bound is a z=0 line if x_l and x_u have different signs. # Otherwise, the lower bound is a tangent line at x_l. # The lower bound should always be better than IBP. # If both x_l and x_u < 0, select x_u. If both > 0, select x_l. # If x_l < 0 and x_u > 0, we use the z=0 line as the lower bound. mid = F.relu(x.lower) - F.relu(-x.upper) self.add_linear_relaxation(mask=None, type='lower', k=2 * mid, x0=mid) def _init_opt_parameters_impl(self, size_spec, **kwargs): """Implementation of init_opt_parameters for each start_node.""" l, u = self.inputs[0].lower, self.inputs[0].upper alpha = torch.empty(2, size_spec, *l.shape, device=l.device) alpha.data[:2] = F.relu(l) - F.relu(-u) return alpha def interval_propagate(self, *v): h_L, h_U = v[0][0], v[0][1] lower = ((h_U < 0) * (h_U ** 2) + (h_L > 0) * (h_L ** 2)) upper = torch.max(h_L ** 2, h_U ** 2) return lower, upper def build_gradient_node(self, grad_upstream): return [(SqrGrad(), (grad_upstream, self.inputs[0].forward_value), [self.inputs[0]])] class SqrGrad(Module): def forward(self, grad_last, preact): # (x^2)' = 2*x return grad_last * 2 * preact.unsqueeze(1) class BoundHardTanh(BoundActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.splittable = True self.activation_name = "HardTanh" self.patch_size = {} self.hardtanh_options = options.get('hardtanh', 'same-slope') def forward(self, x, min_val, max_val): return F.hardtanh(x, min_val, max_val) def bound_backward(self, last_lA, last_uA, x, min_val, max_val, start_node=None, unstable_idx=None, reduce_bias=True, **kwargs): if self.is_input_perturbed(1) or self.is_input_perturbed( 2): # Checking if min_value and max_value are not perturbed raise NotImplementedError( f'{self.activation_name} is unsupported with perturbed min_val and max_val') self.bound_relax(x, min_val, max_val, init=True) def _bound_oneside(last_A, sign=-1): if last_A is None: return None, 0 if sign == -1: w_pos, b_pos, w_neg, b_neg = ( self.lw.unsqueeze(0), self.lb.unsqueeze(0), self.uw.unsqueeze(0), self.ub.unsqueeze(0)) else: w_pos, b_pos, w_neg, b_neg = ( self.uw.unsqueeze(0), self.ub.unsqueeze(0), self.lw.unsqueeze(0), self.lb.unsqueeze(0)) w_pos = maybe_unfold_patches(w_pos, last_A) w_neg = maybe_unfold_patches(w_neg, last_A) b_pos = maybe_unfold_patches(b_pos, last_A) b_neg = maybe_unfold_patches(b_neg, last_A) # Shapes of w_pos, w_neg, b_pos, b_neg # For toy.py - Final Shape - torch.Size([1, 1, 2]) torch.Size([1, 1, 2]) torch.Size([1, 1, 2]) torch.Size([1, 1, 2]) # For simple_verification.py - Final Shape - torch.Size([1, 2, 16, 14, 14]) torch.Size([1, 2, 16, 14, 14]) torch.Size([1, 2, 16, 14, 14]) torch.Size([1, 2, 16, 14, 14]) # For all tensors having batch as the first dimension (batch,.....) _A, _bias = multiply_by_A_signs( last_A, w_pos, w_neg, b_pos, b_neg) return _A, _bias lA, lbias = _bound_oneside(last_lA, sign=-1) uA, ubias = _bound_oneside(last_uA, sign=+1) return [(lA, uA), (None, None), (None, None)], lbias, ubias def bound_relax(self, x, min_val, max_val, init=False, dim_opt=None): epsilon = 1e-8 preact_lb = x.lower.clamp(max=max_val.value) preact_ub = torch.max(x.upper.clamp(min=min_val.value), preact_lb + epsilon) min_val = min_val.value max_val = max_val.value uw = torch.zeros_like(preact_ub) ub = torch.zeros_like(preact_ub) lw = torch.zeros_like(preact_lb) lb = torch.zeros_like(preact_lb) # Case 1: # When upper bound is smaller than min value, # the activated value will always be min value, # so the upper bound and lower bound are both # min value. case1 = (preact_ub <= min_val).to(preact_ub.dtype) # Computing intermediate values only once for Case 1 value = case1 * min_val ub += value lb += value # Case 2: # When lower bound is larger than max value, # the activated value will always be max value, # so the upper bound and lower bound are both # max value. case2 = (preact_lb >= max_val).to(preact_ub.dtype) # Computing intermediate values only once for Case 2 value = case2 * max_val ub += value lb += value # Case 3: # In this case, the activated output for x is always x # so the bias is always zero and slope will also always # be one. case3 = ((preact_lb >= min_val) & (preact_ub <= max_val)).to(preact_ub.dtype) uw += case3 lw += case3 # Case 4: # Upper bound is larger than max val and lower bound is # smaller than min val, in this case, we will use two # line to bound, the upper bound will pass through points # (max_val, max_val) and (lb_r, min_val) and the lower # bound will pass through (min_val, min_val) and (ub_r, max_val). # So, the slope d of the upper line is (max_val - min_val)/(max_val - lb_r) # and the intercept of the upper line is max_val - d * max_val # Similarly, the slope d of the lower line is (max_val - min_val)/(ub_r - min_val) # and the intercept of the lower line is min_val - d * min_val. # Computing intermediate values only once for Case 4 diff = max_val - min_val val1 = max_val - preact_lb + epsilon case4 = ((preact_lb < min_val) & (preact_ub > max_val)).to(preact_ub.dtype) uw += case4 * diff / val1 lw += case4 * diff / (preact_ub - min_val + epsilon) ub = case4 * (max_val - diff / val1 * max_val) lb = case4 * (min_val - diff / (preact_ub - min_val + epsilon) * min_val) # Computing intermediate values only once ( Case 5 & 6 ) denom = preact_ub - preact_lb + epsilon # Case 5: # Lower bound is smaller than the min val and the upper bound # is larger than or equal to the min val and smaller or # equal to max val. In this case, we use a single line that # pass through (lb_r, min_val) and (ub_r, ub_r) as the upper # bound. And for lower bound, we use a line with the same slope # as the upper bound and passes through (min_val, min_val) as # lower bound. # So, the slope d of the upper bound is (ub_r - min_val)/(ub_r - lb_r) # and the intercept of the upper bound is ub_r - d * ub_r. # The slope d of the lower bound is same as upper bound and the # intercept of the lower bound is min_val - d * min_val # Computing intermediate values only once for Case 5 val1 = preact_ub - min_val case5 = ((preact_lb < min_val) & (min_val <= preact_ub) & (preact_ub <= max_val)).to(preact_ub.dtype) uw += case5 * val1 / denom ub += case5 * (preact_ub - val1 / denom * preact_ub) if self.hardtanh_options == "same-slope": lw += case5 * val1 / denom lb += case5 * (min_val - val1 / denom * min_val) elif self.hardtanh_options == "adaptive": cond = (uw > 0.5).to(uw) lw += case5 * cond lb += case5 * min_val * (1 - cond) # Case 6: # Upper bound is larger than the max val and the lower bound # is larger than or equal to the min val and smaller or # equal to max val. In this case, we use a single line that # pass through (ub_r, max_val) and (lb_r, lb_r) as the lower # bound. And for upper bound, we use a line with the same slope # as lower bound which passes through (max_val, max_val) as the # upper bound. # So, the slope d of the lower bound is (max_val - lb_r)/(ub_r - lb_r). # And the intercept of the lower bound is lb_r - d * lb_r. # The slope d of the upper bound is (max_val - lb_r)/(ub_r - lb_r), # and the intercept of the upper bound is max_val - d * max_val. # Computing intermediate values only once for Case 6 val1 = max_val - preact_lb case6 = ((min_val <= preact_lb) & (preact_lb <= max_val) & (preact_ub > max_val)).to(preact_ub.dtype) lw += case6 * val1 / denom lb += case6 * (preact_lb - val1 / denom * preact_lb) if self.hardtanh_options == "same-slope": uw += case6 * val1 / denom ub += case6 * (max_val - val1 / denom * max_val) elif self.hardtanh_options == "adaptive": cond = (lw > 0.5).to(lw) uw += case6 * cond ub += (case6 * max_val) * (1 - cond) self.uw = uw self.lw = lw self.ub = ub self.lb = lb def interval_propagate(self, *v): h_L, h_U = v[0][0], v[0][1] min_val = v[1][0] max_val = v[2][0] assert v[1][0] == v[1][1] and v[2][0] == v[2][1] return self.forward(h_L, min_val, max_val), self.forward(h_U, min_val, max_val) class BoundFloor(BoundActivation): def forward(self, x): return torch.floor(x) def bound_relax(self, x, init=False): if init: self.init_linear_relaxation(x) self.lb += torch.floor(x.lower) self.ub += torch.floor(x.upper) class BoundMultiPiecewiseNonlinear(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.splittable = True def forward(self, x, weight, offset): return (F.relu(x.unsqueeze(-1) - offset) * weight).sum(dim=-1) def clip_alpha(self): for v in self.alpha.values(): v.data = torch.clamp(v.data, 0., 1.) def bound_backward(self, last_lA, last_uA, x, weight, offset, reduce_bias=True, start_node=None, **kwargs): assert not self.is_input_perturbed(1) assert not self.is_input_perturbed(2) weight = ( self.inputs[1].forward_value if hasattr(self.inputs[1], 'forward_value') else self.inputs[1].forward() ) offset = ( self.inputs[2].forward_value if hasattr(self.inputs[2], 'forward_value') else self.inputs[2].forward() ) relu_x_lower = (x.lower.unsqueeze(-1) - offset).clamp(max=0) relu_x_upper = (x.upper.unsqueeze(-1) - offset).clamp(min=0) relu_x_upper = torch.max(relu_x_upper, relu_x_lower + 1e-8) relu_upper_k = relu_x_upper / (relu_x_upper - relu_x_lower) relu_upper_b = -relu_x_lower * relu_upper_k if self.opt_stage not in ['opt', 'reuse']: self.init_lower_k = relu_lower_k = (relu_upper_k > 0.5).to(relu_upper_k) relu_lower_k_for_lA = relu_lower_k_for_uA = relu_lower_k.unsqueeze(0) else: relu_lower_k = self.alpha[start_node.name] relu_lower_k_for_lA = relu_lower_k[0] relu_lower_k_for_uA = relu_lower_k[1] relu_lower_b = torch.zeros_like(relu_upper_b) relu_lower_b = relu_lower_b.unsqueeze(0) relu_upper_k = relu_upper_k.unsqueeze(0) relu_upper_b = relu_upper_b.unsqueeze(0) def _bound_oneside(last_A, pos_k, pos_b, neg_k, neg_b, weight, offset, reduce_bias): if last_A is None: return None, 0 last_A = last_A.unsqueeze(-1) * weight A_pos = last_A.clamp(min=0) A_neg = last_A.clamp(max=0) A = A_pos * pos_k + A_neg * neg_k b = -A * offset + A_pos * pos_b + A_neg * neg_b A = A.sum(dim=-1) if reduce_bias: b = b.sum(dim=[-1, -2]) else: b = b.sum(dim=-1) return A, b lA, lb = _bound_oneside(last_lA, relu_lower_k_for_lA, relu_lower_b, relu_upper_k, relu_upper_b, weight, offset, reduce_bias) uA, ub = _bound_oneside(last_uA, relu_upper_k, relu_upper_b, relu_lower_k_for_uA, relu_lower_b, weight, offset, reduce_bias) return [(lA, uA), (None, None), (None, None)], lb, ub def _init_opt_parameters_impl(self, size_spec, **kwargs): alpha = torch.empty(2, size_spec, *self.init_lower_k.shape, device=self.init_lower_k.device) alpha.data[:2] = self.init_lower_k return alpha def get_split_mask(self, lower, upper, input_index): offset = ( self.inputs[2].forward_value if hasattr(self.inputs[2], 'forward_value') else self.inputs[2].forward() ) return ((lower.unsqueeze(-1) < offset) & (upper.unsqueeze(-1) > offset)).any(dim=-1) ================================================ FILE: auto_LiRPA/operators/add_sub.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from torch.nn import Module from .base import * from .constant import BoundConstant from .solver_utils import grb class BoundAdd(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) options = options or {} # FIXME: This is not the right way to enable patches mode. # Instead we must traverse the graph and determine when patches mode needs to be used. self.mode = options.get("conv_mode", "matrix") def forward(self, x, y): self.x_shape = x.shape self.y_shape = y.shape return x + y def bound_backward(self, last_lA, last_uA, x, y, **kwargs): def _bound_oneside(last_A, w): if last_A is None: return None return self.broadcast_backward(last_A, w) uA_x = _bound_oneside(last_uA, x) uA_y = _bound_oneside(last_uA, y) lA_x = _bound_oneside(last_lA, x) lA_y = _bound_oneside(last_lA, y) return [(lA_x, uA_x), (lA_y, uA_y)], 0, 0 def bound_forward(self, dim_in, x, y): lb, ub = x.lb + y.lb, x.ub + y.ub def add_w(x_w, y_w, x_b, y_b): if x_w is None and y_w is None: return None elif x_w is not None and y_w is not None: return x_w + y_w elif y_w is None: return x_w + torch.zeros_like(y_b) else: return y_w + torch.zeros_like(x_b) lw = add_w(x.lw, y.lw, x.lb, y.lb) uw = add_w(x.uw, y.uw, x.ub, y.ub) return LinearBound(lw, lb, uw, ub) def interval_propagate(self, x, y): assert (not isinstance(y, Tensor)) return x[0] + y[0], x[1] + y[1] def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): if isinstance(v[0], Tensor) and isinstance(v[1], Tensor): # constants if both inputs are tensors self.solver_vars = self.forward(v[0], v[1]) return # we have both gurobi vars as inputs this_layer_shape = self.output_shape gvar_array1 = np.array(v[0]) if isinstance(v[1], Tensor): var2 = v[1].cpu().numpy() # flatten to create vars and constrs first gvar_array1 = gvar_array1.reshape(-1) new_layer_gurobi_vars = [] for neuron_idx, var1 in enumerate(gvar_array1): var = model.addVar(lb=-float('inf'), ub=float('inf'), obj=0, vtype=grb.GRB.CONTINUOUS, name=f'lay{self.name}_{neuron_idx}') model.addConstr(var == (var1 + var2), name=f'lay{self.name}_{neuron_idx}_eq') new_layer_gurobi_vars.append(var) else: gvar_array2 = np.array(v[1]) assert gvar_array1.shape == gvar_array2.shape and gvar_array1.shape == this_layer_shape[1:] # flatten to create vars and constrs first gvar_array1 = gvar_array1.reshape(-1) gvar_array2 = gvar_array2.reshape(-1) new_layer_gurobi_vars = [] for neuron_idx, (var1, var2) in enumerate(zip(gvar_array1, gvar_array2)): var = model.addVar(lb=-float('inf'), ub=float('inf'), obj=0, vtype=grb.GRB.CONTINUOUS, name=f'lay{self.name}_{neuron_idx}') model.addConstr(var == (var1 + var2), name=f'lay{self.name}_{neuron_idx}_eq') new_layer_gurobi_vars.append(var) # reshape to the correct list shape of solver vars self.solver_vars = np.array(new_layer_gurobi_vars).reshape(this_layer_shape[1:]).tolist() model.update() def build_gradient_node(self, grad_upstream): if not self.inputs[0].no_jacobian: grad0_node = AddGrad(self.inputs[0].output_shape if self.inputs[0].batch_dim != -1 else torch.Size((1,) + self.inputs[0].output_shape)) grad0 = (grad0_node, (grad_upstream,), []) else: grad0 = None if not self.inputs[1].no_jacobian: grad1_node = AddGrad(self.inputs[1].output_shape if self.inputs[1].batch_dim != -1 else torch.Size((1,) + self.inputs[1].output_shape)) grad1 = (grad1_node, (grad_upstream,), []) else: grad1 = None return [grad0, grad1] class BoundSub(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) # FIXME: This is not the right way to enable patches mode. Instead we must traverse the graph and determine when patches mode needs to be used. self.mode = options.get("conv_mode", "matrix") def forward(self, x, y): self.x_shape = x.shape self.y_shape = y.shape return x - y def bound_backward(self, last_lA, last_uA, x, y, **kwargs): def _bound_oneside(last_A, w, sign=-1): if last_A is None: return None if isinstance(last_A, torch.Tensor): return self.broadcast_backward(sign * last_A, w) elif isinstance(last_A, Patches): if sign == 1: # Patches shape requires no broadcast. return last_A else: # Multiply by the sign. return last_A.create_similar(sign * last_A.patches) else: raise ValueError(f'Unknown last_A type {type(last_A)}') uA_x = _bound_oneside(last_uA, x, sign=1) uA_y = _bound_oneside(last_uA, y, sign=-1) lA_x = _bound_oneside(last_lA, x, sign=1) lA_y = _bound_oneside(last_lA, y, sign=-1) return [(lA_x, uA_x), (lA_y, uA_y)], 0, 0 def bound_forward(self, dim_in, x, y): lb, ub = x.lb - y.ub, x.ub - y.lb def add_w(x_w, y_w, x_b, y_b): if x_w is None and y_w is None: return None elif x_w is not None and y_w is not None: return x_w + y_w elif y_w is None: return x_w + torch.zeros_like(y_b) else: return y_w + torch.zeros_like(x_b) # Some nodes such as BoundConstant does not have uw and lw. lw = add_w(x.lw, -y.uw if y.uw is not None else None, x.lb, y.lb) uw = add_w(x.uw, -y.lw if y.lw is not None else None, x.ub, y.ub) return LinearBound(lw, lb, uw, ub) def interval_propagate(self, x, y): return x[0] - y[1], x[1] - y[0] def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): if isinstance(v[0], Tensor) and isinstance(v[1], Tensor): # constants if both inputs are tensors self.solver_vars = self.forward(v[0], v[1]) return # we have both gurobi vars as inputs this_layer_shape = self.output_shape gvar_array1 = np.array(v[0]) gvar_array2 = np.array(v[1]) assert gvar_array1.shape == gvar_array2.shape and gvar_array1.shape == this_layer_shape[1:] # flatten to create vars and constrs first gvar_array1 = gvar_array1.reshape(-1) gvar_array2 = gvar_array2.reshape(-1) new_layer_gurobi_vars = [] for neuron_idx, (var1, var2) in enumerate(zip(gvar_array1, gvar_array2)): var = model.addVar(lb=-float('inf'), ub=float('inf'), obj=0, vtype=grb.GRB.CONTINUOUS, name=f'lay{self.name}_{neuron_idx}') model.addConstr(var == (var1 - var2), name=f'lay{self.name}_{neuron_idx}_eq') new_layer_gurobi_vars.append(var) # reshape to the correct list shape of solver vars self.solver_vars = np.array(new_layer_gurobi_vars).reshape(this_layer_shape[1:]).tolist() model.update() def build_gradient_node(self, grad_upstream): if not self.inputs[0].no_jacobian: grad_node_0 = AddGrad(self.inputs[0].output_shape if self.inputs[0].batch_dim != -1 else torch.Size((1,) + self.inputs[0].output_shape), w=1.0) grad0 = (grad_node_0, (grad_upstream,), []) else: grad0 = None if not self.inputs[1].no_jacobian: grad_node_1 = AddGrad(self.inputs[1].output_shape if self.inputs[1].batch_dim != -1 else torch.Size((1,) + self.inputs[1].output_shape), w=-1.0) grad1 = (grad_node_1, (grad_upstream,), []) else: grad1 = None return [grad0, grad1] class AddGrad(Module): def __init__(self, input_shape, w=1.0): super().__init__() # We need the input shape to handle broadcasting. self.input_shape = input_shape self.w = w def forward(self, grad_last): return reduce_broadcast_dims(grad_last * self.w, self.input_shape) ================================================ FILE: auto_LiRPA/operators/base.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Base class and functions for implementing bound operators""" from typing import Optional, List import warnings import torch import torch.nn as nn from torch import Tensor import numpy as np from ..perturbations import * from ..utils import * from ..patches import * torch._C._jit_set_profiling_executor(False) torch._C._jit_set_profiling_mode(False) epsilon = 1e-12 def not_implemented_op(node, func): message = ( f'Function `{func}` of `{node}` is not supported yet.' ' Please help to open an issue at https://github.com/Verified-Intelligence/auto_LiRPA' ' or implement this function in auto_LiRPA/bound_ops.py' ' or auto_LiRPA/operators by yourself.') raise NotImplementedError(message) class Interval(tuple): """Interval object for interval bound propagation.""" # Subclassing tuple object so that all previous code can be reused. def __new__(self, lb=None, ub=None, ptb=None): return tuple.__new__(Interval, (lb, ub)) def __init__(self, lb, ub, ptb=None): if ptb is None: self.ptb = None # `self.ptb == None` means that this interval # is not perturbed and it shall be treated as a constant and lb = ub. # To avoid mistakes, in this case the caller must make sure lb and ub are the same object. assert lb is ub else: if not isinstance(ptb, Perturbation): raise ValueError("ptb must be a Perturbation object or None. Got type {}".format(type(ptb))) else: self.ptb = ptb def __str__(self): return "({}, {}) with ptb={}".format(self[0], self[1], self.ptb) def __repr__(self): return "Interval(lb={}, ub={}, ptb={})".format(self[0], self[1], self.ptb) @staticmethod def make_interval(lb, ub, other=None): """Checking if the other interval is tuple, keep the perturbation.""" if isinstance(other, Interval): return Interval(lb, ub, ptb=other.ptb) else: return (lb, ub) @staticmethod def get_perturbation(interval): """Given a tuple or Interval object, returns the norm and eps.""" if isinstance(interval, Interval) and interval.ptb is not None: if isinstance(interval.ptb, PerturbationLpNorm): return interval.ptb.norm, interval.ptb.eps elif isinstance(interval.ptb, PerturbationSynonym): return torch.inf, 1.0 elif isinstance(interval.ptb, PerturbationL0Norm): return 0, interval.ptb.eps, interval.ptb.ratio elif isinstance(interval.ptb, PerturbationLinear): return torch.inf, 0.0 else: raise RuntimeError("get_perturbation() does not know how to handle {}".format(type(interval.ptb))) else: # Tuple object. Assuming L infinity norm lower and upper bounds. return torch.inf, np.nan @staticmethod def is_perturbed(interval): """Checking if a Interval or tuple object has perturbation enabled.""" if isinstance(interval, Interval) and interval.ptb is None: return False else: return True class Bound(nn.Module): r""" Base class for supporting the bound computation of an operator. Please see examples at `auto_LiRPA/operators`. Args: attr (dict): Attributes of the operator. inputs (list): A list of input nodes. output_index (int): The index in the output if the operator has multiple outputs. Usually output_index=0. options (dict): Bound options. Be sure to run `super().__init__(attr, inputs, output_index, options, device)` first in the `__init__` function. """ def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__() attr = {} if attr is None else attr inputs = [] if inputs is None else inputs options = {} if options is None else options self.name: Optional[str] = None self.output_name = [] self.device = attr.get('device') self.attr = attr self.inputs: List['Bound'] = inputs self.output_index = output_index self.options = options # Mark if this node is used in the bound computation (from the output node). self.used = False self.forward_value = None self.output_shape = None self.from_input = False self.bounded = False self.IBP_rets = None self.requires_input_bounds = [] self.from_complex_node = None # If True, when building the Jacobian graph, this node should be treated # as a constant and there is no need to further propagate Jacobian. self.no_jacobian = False # If True, when we are computing intermediate bounds for these ops, # we simply use IBP to propagate bounds from its input nodes # instead of CROWN. Currently only operators with a single input can be # supported. self.ibp_intermediate = False self.splittable = self.force_not_splittable = False # Determine if this node has a perturbed output or not. The function BoundedModule._mark_perturbed_nodes() will set this property. self.perturbed = False self.never_perturbed = False if options is not None and 'loss_fusion' in options: self.loss_fusion = options['loss_fusion'] else: self.loss_fusion = False self.options = options # Use `default_interval_propagate` self.use_default_ibp = False # If set to true, the backward bound output of this node is 0. self.zero_backward_coeffs_l = False self.zero_backward_coeffs_u = False # If set to true, the A matrix accumulated on this node is 0. self.zero_lA_mtx = False self.zero_uA_mtx = False self.patches_start = False self.alpha_beta_update_mask = None self.is_final_node = False # By default, we assue this node has no batch dimension. # It will be updated in BoundedModule.get_forward_value(). self.batch_dim = -1 # The .lower and .upper properties are written to as part of the bound propagation. # Usually, in iterative refinement, each bound only depends on bounds previously # computed in the same iteration. However, this changes if INVPROP is used to incorporate # output constraints. Then, we also need bounds of layers *after* the currently bounded # layer. Therefore, we have to cache the older bounds. self._is_lower_bound_current = False self._lower = None self._is_upper_bound_current = False self._upper = None # A list containing the output ACTIVATIONS node from this node. # Please check backward_bound.py, forward_bound.py, batch_branch_and_bound.py for more info. self.output_activations = None def __repr__(self, attrs=None): inputs = ', '.join([node.name for node in self.inputs]) ret = (f'{self.__class__.__name__}(name={self.name}, ' f'inputs=[{inputs}], perturbed={self.perturbed}') if attrs is not None: for k, v in attrs.items(): ret += f', {k}={v}' ret += ')' return ret @property def lower(self): return self._lower @lower.setter def lower(self, value): if not (value is None or isinstance(value, torch.Tensor)): raise TypeError(f'lower must be a tensor or None, got {type(value)}') if value is None: self._is_lower_bound_current = False else: self._is_lower_bound_current = True self._lower = value @property def upper(self): return self._upper @upper.setter def upper(self, value): if not (value is None or isinstance(value, torch.Tensor)): raise TypeError(f'upper must be a tensor or None, got {type(value)}') if value is None: self._is_upper_bound_current = False else: self._is_upper_bound_current = True self._upper = value def move_lower_and_upper_bounds_to_cache(self): if self._lower is not None: self._lower = self._lower.detach().requires_grad_(False) self._is_lower_bound_current = False if self._upper is not None: self._upper = self._upper.detach().requires_grad_(False) self._is_upper_bound_current = False def delete_lower_and_upper_bounds(self): self._lower = None self._upper = None self._is_lower_bound_current = False self._is_upper_bound_current = False def is_lower_bound_current(self): return self._is_lower_bound_current def is_upper_bound_current(self): return self._is_upper_bound_current def are_output_constraints_activated_for_layer( self: 'Bound', apply_output_constraints_to: Optional[List[str]], ): if self.is_final_node: return False if apply_output_constraints_to is None: return False for layer_type_or_name in apply_output_constraints_to: if layer_type_or_name.startswith('/'): if self.name == layer_type_or_name: return True else: assert layer_type_or_name.startswith('Bound'), ( 'To apply output constraints to tighten layer bounds, pass either the layer name ' '(starting with "/", e.g. "/input.7") or the layer type (starting with "Bound", ' 'e.g. "BoundLinear")' ) if type(self).__name__ == layer_type_or_name: return True return False def init_gammas(self, num_constraints): if not self.are_output_constraints_activated_for_layer( self.options.get('optimize_bound_args', {}).get('apply_output_constraints_to', []) ): return assert len(self.output_shape) > 0, self neurons_in_this_layer = 1 for d in self.output_shape[1:]: neurons_in_this_layer *= d init_gamma_value = 0.0 # We need a different number of gammas depending on whether or not they are shared # However, to the code outside of this class, this should be transparent. # We create the correct number of gammas in gammas_underlying_tensor and if necessary # expand it to simulate a larger tensor. This is just a view, no additional memory is created. # By the outside, only .gammas should be used. However, we must take care to update this view # whenever gammas_underlying_tensor was changed (see clip_gammas) # Note that _set_gammas in optimized_bounds.py needs to refer to the gammas_underlying_tensor, # because that's the leaf tensor for which we need to compute gradients. if self.options.get('optimize_bound_args', {}).get('share_gammas', False): self.gammas_underlying_tensor = torch.full((2, num_constraints, 1), init_gamma_value, requires_grad=True, device=self.device) self.gammas = self.gammas_underlying_tensor.expand(-1, -1, neurons_in_this_layer) else: self.gammas_underlying_tensor = torch.full((2, num_constraints, neurons_in_this_layer), init_gamma_value, requires_grad=True, device=self.device) self.gammas = self.gammas_underlying_tensor def clip_gammas(self): if not hasattr(self, "gammas"): return self.gammas_underlying_tensor.data = torch.clamp(self.gammas_underlying_tensor.data, min=0.0) # If gammas are shared, self.gammas != self.gammas_underlying_tensor # We've changed self.gammas_underlying_tensor, those changes must be propagated to self.gammas neurons_in_this_layer = 1 for d in self.output_shape[1:]: neurons_in_this_layer *= d if self.options.get('optimize_bound_args', {}).get('share_gammas', False): self.gammas = self.gammas_underlying_tensor.expand(-1, -1, neurons_in_this_layer) def is_input_perturbed(self, i=0): r"""Check if the i-th input is with perturbation or not.""" return i < len(self.inputs) and self.inputs[i].perturbed def clear(self): """ Clear attributes when there is a new input to the network""" pass @property def input_name(self): return [node.name for node in self.inputs] def forward(self, *x): r""" Function for standard/clean forward. Args: x: A list of input values. The length of the list is equal to the number of input nodes. Returns: output (Tensor): The standard/clean output of this node. """ return not_implemented_op(self, 'forward') def interval_propagate(self, *v): r""" Function for interval bound propagation (IBP) computation. There is a default function `self.default_interval_propagate(*v)` in the base class, which can be used if the operator is *monotonic*. To use it, set `self.use_default_ibp = True` in the `__init__` function, and the implementation of this function can be skipped. Args: v: A list of the interval bound of input nodes. Generally, for each element `v[i]`, `v[i][0]` is the lower interval bound, and `v[i][1]` is the upper interval bound. Returns: bound: The interval bound of this node, in a same format as v[i]. """ if self.use_default_ibp or self.never_perturbed: return self.default_interval_propagate(*v) else: return not_implemented_op(self, 'interval_propagate') def default_interval_propagate(self, *v): """Default IBP using the forward function. For unary monotonous functions or functions for altering shapes only but not values. """ if len(v) == 0: return Interval.make_interval(self.forward(), self.forward()) else: if len(v) > 1: for i in range(1, len(v)): assert not self.is_input_perturbed(i) return Interval.make_interval( self.forward(v[0][0], *[vv[0] for vv in v[1:]]), self.forward(v[0][1], *[vv[0] for vv in v[1:]]), v[0]) def bound_forward(self, dim_in, *x): r""" Function for forward mode bound propagation. Forward mode LiRPA computs a `LinearBound` instance representing the linear bound for each involved node. Major attributes of `LinearBound` include `lw`, `uw`, `lb`, `ub`, `lower`, and `upper`. `lw` and `uw` are coefficients of linear bounds w.r.t. model input. Their shape is `(batch_size, dim_in, *standard_shape)`, where `dim_in` is the total dimension of perturbed input nodes of the model, and `standard_shape` is the shape of the standard/clean output. `lb` and `ub` are bias terms of linear bounds, and their shape is equal to the shape of standard/clean output. `lower` and `upper` are concretized lower and upper bounds that will be computed later in BoundedModule. Args: dim_in (int): Total dimension of perturbed input nodes of the model. x: A list of the linear bound of input nodes. Each element in x is a `LinearBound` instance. Returns: bound (LinearBound): The linear bound of this node. """ return not_implemented_op(self, 'bound_forward') def bound_dynamic_forward(self, *x, max_dim=None, offset=0): raise NotImplementedError(f'bound_dynamic_forward is not implemented for {self}.') def bound_backward(self, last_lA, last_uA, *x, **kwargs): r""" Function for backward mode bound propagation. Args: last_lA (Tensor): `A` matrix for lower bound computation propagated to this node. It can be `None` if lower bound is not needed. last_uA (Tensor): `A` matrix for upper bound computation propagated to this node. It can be `None` if upper bound is not needed. x: A list of input nodes, with x[i].lower and x[i].upper that can be used as pre-activation bounds. Returns: A: A list of A matrices for the input nodes. Each element is a tuple (lA, uA). lbias (Tensor): The bias term for lower bound computation, introduced by the linear relaxation of this node. . ubias (Tensor): The bias term for upper bound computation, introduced by the linear relaxation of this node. """ return not_implemented_op(self, 'bound_backward') def broadcast_backward(self, A, x): """ Adjust shape of A, adding or removing broadcast dimensions, based on the other operand x. Typically, A has [spec, batch, ...]. The other operand x may have shape [batch, ...], or no batch dimension. Here the "..." dimensions may be different. We need to make sure the two match, by adding or removing dimensions in A. """ if isinstance(A, Tensor): shape = x.output_shape if x.batch_dim == -1: # The other operand has no batch dimension. (e.g., constants). # Add batch dimension to it. if len(shape) < len(A.shape) - 1: shape = torch.Size([1] + list(shape)) else: # The not-from-input operand has batch dimension. # This can happen when the user explicitly unsqueezes the batch dimension on # a constant tensor when building the computation graph. warnings.warn(f"Constant operand of node \033[96m{self}\033[0m has batch dimension. " "Please check your model implementation. " "Constant operands \033[93mSHOULD NOT\033[0m have batch dimension.") A = reduce_broadcast_dims(A, shape) else: pass return A def build_gradient_node(self, grad_upstream): r""" Function for building the gradient node to bound the Jacobian. Args: grad_upstream: Upstream gradient in the gradient back-propagation. Returns: A list. Each item contains the following for computing the gradient of each input: module_grad (torch.nn.Module): Gradient node. grad_input (list): Inputs to the gradient node. Values do not matter. We only want the shapes. grad_extra_nodes (list): Extra nodes needed for the gradient. """ return not_implemented_op(self, 'build_gradient_node') def get_bias(self, A, bias): if A is None: return 0 if not Benchmarking: assert not isnan(A) assert not isnan(bias) if torch.isinf(bias).any(): warnings.warn('There is an inf value in the bias of LiRPA bounds.') if isinstance(A, Tensor): if self.batch_dim != -1: bias_new = torch.einsum('sb...,b...->sb', A, bias) else: bias_new = torch.einsum('sb...,...->sb', A, bias) if isnan(bias_new): # NaN can be caused by 0 * inf, if 0 appears in `A` and inf appears in `bias`. # Force the whole bias to be 0, to avoid gradient issues. # FIXME maybe find a more robust solution. return 0 else: # FIXME (09/17): handle the case for pieces.unstable_idx. return bias_new elif isinstance(A, eyeC): batch_size = A.shape[1] if self.batch_dim != -1: return bias.reshape(batch_size, -1).t() else: return bias.reshape(-1).unsqueeze(-1).repeat(1, batch_size) elif type(A) == Patches: # the shape of A.patches is [batch, L, out_c, in_c, K, K] if self.batch_dim != -1: # Input A patches has shape (spec, batch, out_h, out_w, in_c, H, W) or (unstable_size, batch, in_c, H, W). patches = A.patches # Here the size of bias is [batch_size, out_h, out_w, in_c, H, W] bias = inplace_unfold(bias, kernel_size=A.patches.shape[-2:], stride=A.stride, padding=A.padding, inserted_zeros=A.inserted_zeros, output_padding=A.output_padding) if A.unstable_idx is not None: # Sparse bias has shape [unstable_size, batch_size, in_c, H, W]. No need to select over the out_c dimension. bias = bias[:, A.unstable_idx[1], A.unstable_idx[2]] # bias_new has shape (unstable_size, batch). bias_new = torch.einsum('bschw,sbchw->sb', bias, patches) else: # Sum over the in_c, H, W dimension. Use torch.einsum() to save memory, equal to: # bias_new = (bias * patches).sum(-1,-2,-3).transpose(-2, -1) # bias_new has shape (spec, batch, out_h, out_w). bias_new = torch.einsum('bijchw,sbijchw->sbij', bias, patches) else: # Similar to BoundConstant. (BoundConstant does not have batch_dim). # FIXME (09/16): bias size is different for BoundConstant. We should use the same size! patches = A.patches bias_new = torch.sum(patches, dim=(-1, -2, -3)) * bias.to(self.device) # Return shape is (spec, batch, out_h, out_w) or (unstable_size, batch). return bias_new return bias_new else: return NotImplementedError() def make_axis_non_negative(self, axis, shape='input'): """Convert negative axis to non-negative axis. Args: axis (int or tuple or list): The axis to be converted. shape (str or torch.Size): The shape of the tensor. If 'input', use self.input_shape. If 'output', use self.output_shape. Otherwise, it should be a torch.Size object. For example, if the tensor shape is (2, 3, 4), then axis -1 will be converted to 2. For the "squeeze" operation, the shape should be the 'input' shape. While for the "unsqueeze" operation, the shape should be the 'output' shape. Returns: axis (int or tuple): The non-negative axis. """ if isinstance(axis, (tuple, list)): return tuple(sorted([self.make_axis_non_negative(item, shape) for item in axis])) if shape == 'input': shape = self.input_shape elif shape == 'output': shape = self.output_shape else: assert isinstance(shape, torch.Size) if axis < 0: return axis + len(shape) else: return axis def update_requires_input_bounds(self): """Update requires_input_bounds. This function is called once we know if the input nodesare perturbed. """ pass def clamp_interim_bounds(self): """Clamp intermediate bounds.""" pass def check_constraint_available(self, node, flag=False): if hasattr(node, 'cstr_interval'): flag = True for n in node.inputs: if not n.from_input: flag = flag or self.check_constraint_available(n, flag) return flag def _ibp_constraint(self, node: 'Bound', delete_bounds_after_use=False): def _delete_unused_bounds(node_list): """Delete bounds from input layers after use to save memory. Used when sparse_intermediate_bounds_with_ibp is true.""" if delete_bounds_after_use: for n in node_list: del n.cstr_interval del n.cstr_lower del n.cstr_upper if not node.perturbed and hasattr(node, 'forward_value'): node.cstr_lower, node.cstr_upper = node.cstr_interval = ( node.forward_value, node.forward_value) to_be_deleted_bounds = [] if not hasattr(node, 'cstr_interval'): for n in node.inputs: if not hasattr(n, 'cstr_interval'): # Node n does not have interval bounds; we must compute it. self._ibp_constraint( n, delete_bounds_after_use=delete_bounds_after_use) to_be_deleted_bounds.append(n) inp = [n_pre.cstr_interval for n_pre in node.inputs] node.cstr_interval = node.interval_propagate(*inp) node.cstr_lower, node.cstr_upper = node.cstr_interval if isinstance(node.cstr_lower, torch.Size): node.cstr_lower = torch.tensor(node.cstr_lower) node.cstr_interval = (node.cstr_lower, node.cstr_upper) if isinstance(node.cstr_upper, torch.Size): node.cstr_upper = torch.tensor(node.cstr_upper) node.cstr_interval = (node.cstr_lower, node.cstr_upper) if node.is_lower_bound_current(): node.lower = torch.where(node.lower >= node.cstr_lower, node.lower, node.cstr_lower) node.upper = torch.where(node.upper <= node.cstr_upper, node.upper, node.cstr_upper) node.interval = (node.lower, node.upper) _delete_unused_bounds(to_be_deleted_bounds) return node.cstr_interval def _check_weight_perturbation(self): weight_perturbation = False for n in self.inputs[1:]: if hasattr(n, 'perturbation'): if n.perturbation is not None: weight_perturbation = True if weight_perturbation: self.requires_input_bounds = list(range(len(self.inputs))) else: self.requires_input_bounds = [] return weight_perturbation def non_deter_wrapper(self, op, *args, **kwargs): """Some operations are non-deterministic and deterministic mode will fail. So we temporary disable it.""" if self.options.get('deterministic', False): torch.use_deterministic_algorithms(False) ret = op(*args, **kwargs) if self.options.get('deterministic', False): torch.use_deterministic_algorithms(True) return ret def non_deter_scatter_add(self, *args, **kwargs): return self.non_deter_wrapper(torch.scatter_add, *args, **kwargs) def non_deter_index_select(self, *args, **kwargs): return self.non_deter_wrapper(torch.index_select, *args, **kwargs) ================================================ FILE: auto_LiRPA/operators/bivariate.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Bivariate operators""" import torch from torch import Tensor from torch.nn import Module from typing import Dict, Optional from .base import * from .activation_base import BoundOptimizableActivation from .convex_concave import BoundSqrt from .clampmult import multiply_by_A_signs from ..utils import * class MulHelper: """Handle linear relaxation for multiplication. This helper can be used by BoundMul, BoundMatMul, BoundLinear (with weight perturbation). """ def __init__(self): pass @staticmethod def interpolated_relaxation(x_l: Tensor, x_u: Tensor, y_l: Tensor, y_u: Tensor, r_l: Optional[Tensor] = None, r_u: Optional[Tensor] = None, middle: bool = False, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: """Interpolate two optimal linear relaxations for optimizable bounds.""" if r_l is None and r_u is None: if middle: # This option is equivalent to optimized linear relaxation # with 0.5 as the fixed parameter. # It interpolates two valid linear relaxations. # See Appendix C in https://openreview.net/pdf?id=BJxwPJHFwS alpha_l = (y_l - y_u) * 0.5 + y_u beta_l = (x_l - x_u) * 0.5 + x_u gamma_l = (y_u * x_u - y_l * x_l) * 0.5 - y_u * x_u alpha_u = (y_u - y_l) * 0.5 + y_l beta_u = (x_l - x_u) * 0.5 + x_u gamma_u = (y_l * x_u - y_u * x_l) * 0.5 - y_l * x_u else: alpha_l, beta_l, gamma_l = y_l, x_l, -y_l * x_l alpha_u, beta_u, gamma_u = y_u, x_l, -y_u * x_l return alpha_l, beta_l, gamma_l, alpha_u, beta_u, gamma_u else: assert isinstance(r_l, Tensor) and isinstance(r_u, Tensor) # TODO (for zhouxing/qirui): this function may benefit from JIT, # because it has many element-wise operation which can be fused. # Need to benchmark and see performance. alpha_l = (y_l - y_u) * r_l + y_u beta_l = (x_l - x_u) * r_l + x_u gamma_l = (y_u * x_u - y_l * x_l) * r_l - y_u * x_u alpha_u = (y_u - y_l) * r_u + y_l beta_u = (x_l - x_u) * r_u + x_u gamma_u = (y_l * x_u - y_u * x_l) * r_u - y_l * x_u return alpha_l, beta_l, gamma_l, alpha_u, beta_u, gamma_u @staticmethod def get_relaxation(x_l: Tensor, x_u: Tensor, y_l: Tensor, y_u: Tensor, opt_stage: Optional[str], alphas: Optional[Dict[str, Tensor]], start_name: Optional[str], middle: bool = False, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: if opt_stage in ['opt', 'reuse']: assert x_l.ndim == y_l.ndim ns = start_name alphas[ns].data[:] = alphas[ns].data[:].clamp(min=0, max=1) return MulHelper.interpolated_relaxation( x_l, x_u, y_l, y_u, alphas[ns][:2], alphas[ns][2:4]) else: return MulHelper.interpolated_relaxation( x_l, x_u, y_l, y_u, middle=middle) @staticmethod def get_forward_relaxation(x_l, x_u, y_l, y_u, opt_stage, alpha, start_name): # Broadcast # FIXME perhaps use a more efficient way x_l = x_l + torch.zeros_like(y_l) x_u = x_u + torch.zeros_like(y_u) y_l = y_l + torch.zeros_like(x_l) y_u = y_u + torch.zeros_like(x_u) return MulHelper.get_relaxation(x_l, x_u, y_l, y_u, opt_stage, alpha, start_name) @staticmethod def _get_gap(x, y, alpha, beta): return x * y - alpha * x - beta * y class BoundMul(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.splittable = True self.mul_helper = MulHelper() if options is None: options = {} self.middle = options.get('mul', {}).get('middle', False) def forward(self, x, y): self.x_shape = x.shape self.y_shape = y.shape return x * y def get_relaxation_opt(self, x_l, x_u, y_l, y_u): return self.mul_helper.get_relaxation( x_l, x_u, y_l, y_u, self.opt_stage, getattr(self, 'alpha', None), getattr(self, '_start', None), middle=self.middle) def _init_opt_parameters_impl(self, size_spec, **kwargs): """Implementation of init_opt_parameters for each start_node.""" x_l = self.inputs[0].lower y_l = self.inputs[1].lower assert x_l.ndim == y_l.ndim shape = [max(x_l.shape[i], y_l.shape[i]) for i in range(x_l.ndim)] alpha = torch.ones(4, size_spec, *shape, device=x_l.device) return alpha def _is_softmax(self): """This multiplication comes from softmax. It is the division converted to BoundMul + BoundReciprocal. """ return ( self.from_complex_node == 'BoundSoftmax' and type(self.inputs[0]).__name__ == 'BoundExp' and type(self.inputs[1]).__name__ == 'BoundReciprocal' and type(self.inputs[1].inputs[0]).__name__ == 'BoundReduceSum' and type(self.inputs[1].inputs[0].inputs[0]).__name__ == 'BoundExp') def bound_relax(self, x, y, init=False, dim_opt=None): if init: pass (alpha_l, beta_l, gamma_l, alpha_u, beta_u, gamma_u) = self.get_relaxation_opt( x.lower, x.upper, y.lower, y.upper) # Check NaN which can happen in softmax if Exp's bounds are too loose if self._is_softmax(): assert alpha_l.shape[:-1] == beta_l.shape[:-1] assert alpha_l.shape[-1] == 1 or alpha_l.shape[-1] == beta_l.shape[-1] assert beta_l.shape == gamma_l.shape mask = (alpha_l.isnan().expand(beta_l.shape) | alpha_l.isinf().expand(beta_l.shape) | beta_l.isnan() | beta_l.isinf() | gamma_l.isnan() | gamma_l.isinf()) if mask.any(): alpha_l = alpha_l.clone() alpha_l[mask.any(dim=-1)] = 0 beta_l = beta_l.clone() beta_l[mask] = 0 gamma_l = gamma_l.clone() gamma_l[mask] = 0 assert alpha_u.shape[:-1] == beta_u.shape[:-1] assert alpha_u.shape[-1] == 1 or alpha_u.shape[-1] == beta_u.shape[-1] assert beta_u.shape == gamma_u.shape mask = (alpha_u.isnan().expand(beta_u.shape) | alpha_u.isinf().expand(beta_u.shape) | beta_u.isnan() | beta_u.isinf() | gamma_u.isnan() | gamma_u.isinf()) if mask.any(): alpha_u = alpha_u.clone() alpha_u[mask.any(dim=-1)] = 0 beta_u = beta_u.clone() beta_u[mask] = 0 gamma_u = gamma_u.clone() gamma_u[mask] = 1. self.lw = [alpha_l, beta_l] self.lb = gamma_l self.uw = [alpha_u, beta_u] self.ub = gamma_u @staticmethod def _multiply_by_const(x, const): if isinstance(x, torch.Tensor): return x * const elif isinstance(x, Patches): # Multiplies patches by a const. Assuming const is a tensor, and it must be in nchw format. assert isinstance(const, torch.Tensor) and const.ndim == 4 if (const.size(0) == 1 or const.size(0) == x.patches.size(1)) and const.size(1) == x.patches.size(-3) and const.size(2) == const.size(3) == 1: # The case that we can do channel-wise broadcasting multiplication # Shape of const: (batch, in_c, 1, 1) # Shape of patches when unstable_idx is None: (spec, batch, in_c, patch_h, patch_w) # Shape of patches when unstable_idx is not None: (out_c, batch, out_h, out_w, in_c, patch_h, patch_w) const_reshaped = const else: assert x.unstable_idx is None and (x.padding == 0 or x.padding == [0,0,0,0]) and x.stride == 1 and x.patches.size(-1) == x.patches.size(-2) == 1 # The assumed dimension is (out_c, N, out_h, out_w, in_c, 1, 1) with padding =1 and stride = 0. # In this special case we can directly multiply. # After reshape it is (1, N, H, W, C, 1, 1) const_reshaped = const.permute(0, 2, 3, 1).unsqueeze(0).unsqueeze(-1).unsqueeze(-1) return x.create_similar(x.patches * const_reshaped) else: raise ValueError(f'Unsupported x type {type(x)}') def bound_backward_constant(self, last_lA, last_uA, x, y, op=None, reduce_bias=True, **kwargs): assert reduce_bias op = BoundMul._multiply_by_const if op is None else op # Handle the case of multiplication by a constant. factor = None if x.perturbed: factor = y.forward_value if y.perturbed: factor = x.forward_value # No need to compute A matrix if it is Constant. lAx = (None if not x.perturbed or last_lA is None else self.broadcast_backward(op(last_lA, factor), x)) uAx = (None if not x.perturbed or last_uA is None else self.broadcast_backward(op(last_uA, factor), x)) lAy = (None if not y.perturbed or last_lA is None else self.broadcast_backward(op(last_lA, factor), y)) uAy = (None if not y.perturbed or last_uA is None else self.broadcast_backward(op(last_uA, factor), y)) return [(lAx, uAx), (lAy, uAy)], 0., 0. def bound_backward(self, last_lA, last_uA, x, y, start_node=None, **kwargs): if start_node is not None: self._start = start_node.name if self.is_linear_op: ret = self.bound_backward_constant(last_lA, last_uA, x, y, **kwargs) else: ret = self.bound_backward_both_perturbed( last_lA, last_uA, x, y, **kwargs) return ret def bound_backward_both_perturbed(self, last_lA, last_uA, x, y, reduce_bias=True, **kwargs): self.bound_relax(x, y) def _bound_oneside(last_A, alpha_pos, beta_pos, gamma_pos, alpha_neg, beta_neg, gamma_neg, opt=False): if last_A is None: return None, None, 0 if type(last_A) == Patches: assert reduce_bias assert last_A.identity == 0 # last_A shape: [out_c, batch_size, out_h, out_w, in_c, H, W]. # Here out_c is the spec dimension. # for patches mode, we need to unfold the alpha_pos/neg and beta_pos/neg alpha_pos = maybe_unfold_patches(alpha_pos, last_A) alpha_neg = maybe_unfold_patches(alpha_neg, last_A) beta_pos = maybe_unfold_patches(beta_pos, last_A) beta_neg = maybe_unfold_patches(beta_neg, last_A) gamma_pos = maybe_unfold_patches(gamma_pos, last_A) gamma_neg = maybe_unfold_patches(gamma_neg, last_A) A_x, bias = multiply_by_A_signs( last_A, alpha_pos, alpha_neg, gamma_pos, gamma_neg) A_y, _ = multiply_by_A_signs( last_A, beta_pos, beta_neg, None, None) elif type(last_A) == Tensor: last_A_pos, last_A_neg = last_A.clamp(min=0), last_A.clamp(max=0) A_x, _ = multiply_by_A_signs(last_A, alpha_pos, alpha_neg, None, None) A_y, _ = multiply_by_A_signs(last_A, beta_pos, beta_neg, None, None) A_x = self.broadcast_backward(A_x, x) A_y = self.broadcast_backward(A_y, y) if reduce_bias: if opt: bias = (torch.einsum('sb...,sb...->sb', last_A_pos, gamma_pos) + torch.einsum('sb...,sb...->sb', last_A_neg, gamma_neg)) else: bias = (self.get_bias(last_A_pos, gamma_pos.squeeze(0)) + self.get_bias(last_A_neg, gamma_neg.squeeze(0))) else: assert not opt bias = last_A_pos * gamma_pos + last_A_neg * gamma_neg assert len(x.output_shape) == bias.ndim - 1 assert len(y.output_shape) == bias.ndim - 1 bias_x = bias_y = bias for i in range(2, bias.ndim): if bias_x.shape[i] != x.output_shape[i - 1]: assert x.output_shape[i - 1] == 1 bias_x = bias_x.sum(i, keepdim=True) for i in range(2, bias.ndim): if bias_y.shape[i] != y.output_shape[i - 1]: assert y.output_shape[i - 1] == 1 bias_y = bias_y.sum(i, keepdim=True) bias = (bias_x, bias_y) else: raise NotImplementedError(last_A) return A_x, A_y, bias alpha_l, beta_l, gamma_l = self.lw[0], self.lw[1], self.lb alpha_u, beta_u, gamma_u = self.uw[0], self.uw[1], self.ub if self.opt_stage in ['opt', 'reuse']: lA_x, lA_y, lbias = _bound_oneside( last_lA, alpha_l[0], beta_l[0], gamma_l[0], alpha_u[0], beta_u[0], gamma_u[0], opt=True) uA_x, uA_y, ubias = _bound_oneside( last_uA, alpha_u[1], beta_u[1], gamma_u[1], alpha_l[1], beta_l[1], gamma_l[1], opt=True) else: alpha_l, alpha_u = alpha_l.unsqueeze(0), alpha_u.unsqueeze(0) beta_l, beta_u = beta_l.unsqueeze(0), beta_u.unsqueeze(0) gamma_l, gamma_u = gamma_l.unsqueeze(0), gamma_u.unsqueeze(0) lA_x, lA_y, lbias = _bound_oneside( last_lA, alpha_l, beta_l, gamma_l, alpha_u, beta_u, gamma_u) uA_x, uA_y, ubias = _bound_oneside( last_uA, alpha_u, beta_u, gamma_u, alpha_l, beta_l, gamma_l) return [(lA_x, uA_x), (lA_y, uA_y)], lbias, ubias def bound_forward(self, dim_in, x, y): if self.is_linear_op: if not self.inputs[0].perturbed: return self.bound_forward_constant(x, y, self.inputs[0].batch_dim != -1) elif not self.inputs[1].perturbed: return self.bound_forward_constant(y, x, self.inputs[1].batch_dim != -1) else: assert False, "When is_linear_op is True, at least one input should be constant." return self.bound_forward_both_perturbed(dim_in, x, y) def bound_forward_constant(self, x, y, batched_constant): # x is constant const = x.lb const_pos, const_neg = const.clamp(min=0), const.clamp(max=0) lb = const_pos * y.lb + const_neg * y.ub ub = const_pos * y.ub + const_neg * y.lb if batched_constant: # If x is batched, its first dimension will be the batch dimension # We need to unsqueeze an extra dimension to align the batch dimension # x and y both have shape (B, a_1, a_2, ..., a_n) # lw/uw has shape (B, dim_in, a_1, a_2, ..., a_n) const_pos = const_pos.unsqueeze(1) const_neg = const_neg.unsqueeze(1) lw = const_pos * y.lw + const_neg * y.uw uw = const_pos * y.uw + const_neg * y.lw return LinearBound(lw, lb, uw, ub) def bound_forward_both_perturbed(self, dim_in, x, y): x_lw, x_lb, x_uw, x_ub = x.lw, x.lb, x.uw, x.ub y_lw, y_lb, y_uw, y_ub = y.lw, y.lb, y.uw, y.ub (alpha_l, beta_l, gamma_l, alpha_u, beta_u, gamma_u) = MulHelper.get_forward_relaxation( x.lower, x.upper, y.lower, y.upper, self.opt_stage, getattr(self, 'alpha', None), self._start) if x_lw is None: x_lw = 0 if y_lw is None: y_lw = 0 if x_uw is None: x_uw = 0 if y_uw is None: y_uw = 0 lw = alpha_l.unsqueeze(1).clamp(min=0) * x_lw + alpha_l.unsqueeze(1).clamp(max=0) * x_uw lw = lw + beta_l.unsqueeze(1).clamp(min=0) * y_lw + beta_l.unsqueeze(1).clamp(max=0) * y_uw lb = (alpha_l.clamp(min=0) * x_lb + alpha_l.clamp(max=0) * x_ub + beta_l.clamp(min=0) * y_lb + beta_l.clamp(max=0) * y_ub + gamma_l) uw = alpha_u.unsqueeze(1).clamp(max=0) * x_lw + alpha_u.unsqueeze(1).clamp(min=0) * x_uw uw = uw + beta_u.unsqueeze(1).clamp(max=0) * y_lw + beta_u.unsqueeze(1).clamp(min=0) * y_uw ub = (alpha_u.clamp(max=0) * x_lb + alpha_u.clamp(min=0) * x_ub + beta_u.clamp(max=0) * y_lb + beta_u.clamp(min=0) * y_ub + gamma_u) return LinearBound(lw, lb, uw, ub) @staticmethod def interval_propagate_constant(x, y, op=lambda x, const: x * const): # x is constant const = x[0] inp_lb = y[0] inp_ub = y[1] pos_mask = (const > 0).to(dtype=inp_lb.dtype) neg_mask = 1. - pos_mask lb = op(inp_lb, const * pos_mask) + op(inp_ub, const * neg_mask) ub = op(inp_ub, const * pos_mask) + op(inp_lb, const * neg_mask) return lb, ub def interval_propagate(self, x, y): if self.is_linear_op: if not self.inputs[0].perturbed: return self.interval_propagate_constant(x, y) elif not self.inputs[1].perturbed: return self.interval_propagate_constant(y, x) else: assert False, "When is_linear_op is True, at least one input should be constant." else: lower, upper = self.interval_propagate_both_perturbed(x, y) if self._is_softmax(): lower = lower.clamp(min=0) upper = upper.clamp(max=1) return lower, upper @staticmethod def interval_propagate_both_perturbed(*v): x, y = v[0], v[1] if x is y: # A shortcut for x * x. h_L, h_U = v[0] r0 = h_L * h_L r1 = h_U * h_U # When h_L < 0, h_U > 0, lower bound is 0. # When h_L < 0, h_U < 0, lower bound is h_U * h_U. # When h_L > 0, h_U > 0, lower bound is h_L * h_L. l = F.relu(h_L) - F.relu(-h_U) return l * l, torch.max(r0, r1) r0, r1, r2, r3 = x[0] * y[0], x[0] * y[1], x[1] * y[0], x[1] * y[1] lower = torch.min(torch.min(r0, r1), torch.min(r2, r3)) upper = torch.max(torch.max(r0, r1), torch.max(r2, r3)) return lower, upper def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): if isinstance(v[0], Tensor): self.solver_vars = self.forward(*v) return gvar_array = np.array(v[0]) gvar_array = gvar_array * v[1].cpu().numpy() self.solver_vars = gvar_array.tolist() def update_requires_input_bounds(self): self.is_linear_op = False for inp in self.inputs: if not inp.perturbed: # If any of the two inputs are constant, we do not need input bounds. self.is_linear_op = True if self.is_linear_op: # One input is constant; no bounds required. self.requires_input_bounds = [] self.splittable = False else: # Both inputs are perturbed. Need relaxation. self.requires_input_bounds = [0, 1] if not self.force_not_splittable: self.splittable = True def build_gradient_node(self, grad_upstream): grad_node_0 = MulGrad(self.inputs[0].output_shape if self.inputs[0].batch_dim != -1 else torch.Size((1,) + self.inputs[0].output_shape)) grad_node_1 = MulGrad(self.inputs[1].output_shape if self.inputs[1].batch_dim != -1 else torch.Size((1,) + self.inputs[1].output_shape)) return [(grad_node_0, (grad_upstream, self.inputs[1].forward_value), [self.inputs[1]]), (grad_node_1, (grad_upstream, self.inputs[0].forward_value), [self.inputs[0]])] class MulGrad(Module): def __init__(self, input_shape): super().__init__() # We need the input shape to handle broadcasting self.input_shape = input_shape def forward(self, grad_last, y): # z = x * y # ∂z/∂x = y if y.ndim > 0: # If y is not a constant scalar, its second dimension is for spec y = y.unsqueeze(1) return reduce_broadcast_dims(grad_last * y, self.input_shape) class BoundDiv(Bound): def forward(self, x, y): # FIXME (05/11/2022): ad-hoc implementation for layer normalization if isinstance(self.inputs[1], BoundSqrt): input = self.inputs[0].inputs[0] x = input.forward_value n = input.forward_value.shape[-1] dev = x * (1. - 1. / n) - (x.sum(dim=-1, keepdim=True) - x) / n dev_sqr = dev ** 2 s = (dev_sqr.sum(dim=-1, keepdim=True) - dev_sqr) / dev_sqr.clamp(min=epsilon) sqrt = torch.sqrt(1. / n * (s + 1)) return torch.sign(dev) * (1. / sqrt) return x / y ================================================ FILE: auto_LiRPA/operators/clampmult.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """Element multiplication with the A matrix based on its sign.""" import torch from typing import Optional, Tuple from torch import Tensor from ..patches import Patches torch._C._jit_set_profiling_executor(False) torch._C._jit_set_profiling_mode(False) class ClampedMultiplication(torch.autograd.Function): @staticmethod @torch.no_grad() @torch.jit.script def clamp_mutiply_forward(A: Tensor, d_pos: Tensor, d_neg: Tensor, b_pos: Optional[Tensor], b_neg: Optional[Tensor], patches_mode: bool, reduce_bias: bool = False, same_slope: bool = False ) -> Tuple[Tensor, Tensor]: """Forward operations; actually the same as the reference implementation.""" A_pos = A.clamp(min=0) A_neg = A.clamp(max=0) if same_slope: # "same-slope" option is enabled; lower and upper bounds use the same A. A_new = d_pos * A else: A_new = d_pos * A_pos + d_neg * A_neg bias_pos = bias_neg = torch.zeros( (), dtype=A_new.dtype, device=A_new.device) if b_pos is not None: if not reduce_bias: bias_pos = A_pos * b_pos else: if patches_mode: bias_pos = torch.einsum('sb...chw,sb...chw->sb...', A_pos, b_pos) else: bias_pos = torch.einsum('sb...,sb...->sb', A_pos, b_pos) if b_neg is not None: if not reduce_bias: bias_neg = A_neg * b_neg else: if patches_mode: bias_neg = torch.einsum('sb...chw,sb...chw->sb...', A_neg, b_neg) else: bias_neg = torch.einsum('sb...,sb...->sb', A_neg, b_neg) return A_new, bias_pos + bias_neg @staticmethod @torch.no_grad() @torch.jit.script def clamp_mutiply_backward(A: Tensor, d_pos: Tensor, d_neg: Tensor, b_pos: Optional[Tensor], b_neg: Optional[Tensor], grad_output_A: Tensor, grad_output_bias: Optional[Tensor], same_slope: bool = False ) -> Tuple[Tensor, Tensor, Tensor, Optional[Tensor], Optional[Tensor], None, None, None]: """Improved backward operation. This should be better than the backward function generated by Pytorch.""" if grad_output_bias is not None: extension_dim = len(A.shape) - len(grad_output_bias.shape) grad_output_bias = grad_output_bias.view( grad_output_bias.shape + (1, ) * extension_dim) A_pos_mask = (A >= 0).to(dtype=grad_output_A.dtype) A_neg_mask = 1. - A_pos_mask A_pos_grad_output_A = A_pos_mask * grad_output_A A_neg_grad_output_A = A_neg_mask * grad_output_A # Although d_pos is d_neg, we still need to get gd_pos and gd_neg separately. gd_pos = A * A_pos_grad_output_A gd_neg = A * A_neg_grad_output_A if b_pos is not None and b_neg is not None and grad_output_bias is not None: A_pos_grad_output_bias = A_pos_mask * grad_output_bias A_neg_grad_output_bias = A_neg_mask * grad_output_bias gb_neg = A * A_neg_grad_output_bias gb_pos = A * A_pos_grad_output_bias if same_slope: gA = (d_pos * grad_output_A + b_pos * A_pos_grad_output_bias + b_neg * A_neg_grad_output_bias) else: gA = (d_pos * A_pos_grad_output_A + d_neg * A_neg_grad_output_A + b_pos * A_pos_grad_output_bias + b_neg * A_neg_grad_output_bias) elif b_neg is not None and grad_output_bias is not None: A_neg_grad_output_bias = A_neg_mask * grad_output_bias gb_neg = A * A_neg_grad_output_bias gb_pos = None if same_slope: gA = (d_pos * grad_output_A + b_neg * A_neg_grad_output_bias) else: gA = (d_pos * A_pos_grad_output_A + d_neg * A_neg_grad_output_A + b_neg * A_neg_grad_output_bias) elif b_pos is not None and grad_output_bias is not None: A_pos_grad_output_bias = A_pos_mask * grad_output_bias gb_pos = A * A_pos_grad_output_bias gb_neg = None if same_slope: gA = (d_pos * grad_output_A + b_pos * A_pos_grad_output_bias) else: gA = (d_pos * A_pos_grad_output_A + d_neg * A_neg_grad_output_A + b_pos * A_pos_grad_output_bias) else: if same_slope: gA = d_pos * grad_output_A else: gA = d_pos * A_pos_grad_output_A + d_neg * A_neg_grad_output_A gb_pos = gb_neg = None return gA, gd_pos, gd_neg, gb_pos, gb_neg, None, None, None @staticmethod def forward(ctx, A, d_pos, d_neg, b_pos, b_neg, patches_mode, reduce_bias=True, same_slope=False): # No need to save the intermediate A_pos, A_neg as they have been fused into the computation. ctx.save_for_backward(A, d_pos, d_neg, b_pos, b_neg) ctx.patches_mode = patches_mode ctx.reduce_bias = reduce_bias ctx.same_slope = same_slope return ClampedMultiplication.clamp_mutiply_forward( A, d_pos, d_neg, b_pos, b_neg, patches_mode, reduce_bias, same_slope) @staticmethod def backward(ctx, grad_output_A, grad_output_bias): A, d_pos, d_neg, b_pos, b_neg = ctx.saved_tensors assert ctx.reduce_bias return ClampedMultiplication.clamp_mutiply_backward( A, d_pos, d_neg, b_pos, b_neg, grad_output_A, grad_output_bias, ctx.same_slope) def multiply_by_A_signs(A, d_pos, d_neg, b_pos, b_neg, contiguous='auto', reduce_bias=True, same_slope=False): if isinstance(A, Tensor): if contiguous is True or contiguous == 'auto': # For dense mode, convert d_pos and d_neg to contiguous tensor by default. d_pos = d_pos.contiguous() d_neg = d_neg.contiguous() if d_pos.ndim == 1: # Special case for LSTM, the bias term is 1-dimension. (FIXME) assert d_neg.ndim == 1 and b_pos.ndim == 1 and b_neg.ndim == 1 new_A = A.clamp(min=0) * d_pos + A.clamp(max=0) * d_neg new_bias = A.clamp(min=0) * b_pos + A.clamp(max=0) * b_neg return new_A, new_bias return ClampedMultiplication.apply( A, d_pos, d_neg, b_pos, b_neg, False, reduce_bias, same_slope) elif isinstance(A, Patches): if contiguous: # For patches mode, do not convert d_pos and d_neg to contiguous tensor by default. d_pos = d_pos.contiguous() d_neg = d_neg.contiguous() assert A.identity == 0 # TODO: handle the A.identity = 1 case. Currently not used. patches = A.patches patches_shape = patches.shape # patches shape: [out_c, batch_size, out_h, out_w, in_c, H, W]. Here out_c is the spec dimension. # or (unstable_size, batch_size, in_c, H, W) when it is sparse. if len(patches_shape) == 6: patches = patches.view(*patches_shape[:2], -1, *patches_shape[-2:]) d_pos = d_pos.view(*patches_shape[:2], -1, *patches_shape[-2:]) if d_pos is not None else None d_neg = d_neg.view(*patches_shape[:2], -1, *patches_shape[-2:]) if d_neg is not None else None b_pos = b_pos.view(*patches_shape[:2], -1, *patches_shape[-2:]) if b_pos is not None else None b_neg = b_neg.view(*patches_shape[:2], -1, *patches_shape[-2:]) if b_neg is not None else None # Apply the multiplication based on signs. A_prod, bias = ClampedMultiplication.apply( patches, d_pos, d_neg, b_pos, b_neg, True, reduce_bias, same_slope) # prod has shape [out_c, batch_size, out_h, out_w, in_c, H, W] or (unstable_size, batch_size, in_c, H, W) when it is sparse. # For sparse patches the return bias size is (unstable_size, batch). # For regular patches the return bias size is (spec, batch, out_h, out_w). if len(patches_shape) == 6: A_prod = A_prod.view(*patches_shape) return A.create_similar(A_prod), bias ================================================ FILE: auto_LiRPA/operators/constant.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Constant operators, including operators that are usually fixed nodes and not perturbed """ from .base import * class BoundConstant(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.value = attr['value'].to(self.device) self.use_default_ibp = True self.no_jacobian = True def __repr__(self): if self.value.numel() == 1: return f'BoundConstant(name={self.name}, value={self.value})' else: return super().__repr__() def forward(self): return self.value.to(self.device) def bound_backward(self, last_lA, last_uA, **kwargs): def _bound_oneside(A): if A is None: return 0.0 if type(A) == Tensor: if A.ndim > 2: A = torch.sum(A, dim=list(range(2, A.ndim))) elif type(A) == Patches: assert A.padding == 0 or A.padding == (0, 0, 0, 0) or self.value == 0 # FIXME (09/19): adding padding here. patches_reshape = torch.sum(A.patches, dim=(-1, -2, -3)) * self.value.to(self.device) # Expected shape for bias is (spec, batch, out_h, out_w) or (unstable_size, batch) return patches_reshape return A * self.value.to(self.device) lbias = _bound_oneside(last_lA) ubias = _bound_oneside(last_uA) return [], lbias, ubias def bound_forward(self, dim_in): lw = uw = torch.zeros(dim_in, device=self.device) lb = ub = self.value return LinearBound(lw, lb, uw, ub) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.value class BoundPrimConstant(Bound): def forward(self): return torch.tensor([], device=self.device) class BoundConstantOfShape(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.value = attr['value'].to(self.device) self.no_jacobian = True def forward(self, x): self.x = x self.from_input = True return self.value.expand(*list(x)) def bound_backward(self, last_lA, last_uA, x, **kwargs): if last_lA is not None: lower_sum_b = last_lA * self.value while lower_sum_b.ndim > 2: lower_sum_b = torch.sum(lower_sum_b, dim=-1) else: lower_sum_b = 0 if last_uA is not None: upper_sum_b = last_uA * self.value while upper_sum_b.ndim > 2: upper_sum_b = torch.sum(upper_sum_b, dim=-1) else: upper_sum_b = 0 return [(None, None)], lower_sum_b, upper_sum_b def bound_forward(self, dim_in, x): assert (len(self.x) >= 1) lb = ub = torch.ones(self.output_shape, device=self.device) * self.value lw = uw = torch.zeros(self.x[0], dim_in, *self.x[1:], device=self.device) return LinearBound(lw, lb, uw, ub) def interval_propagate(self, *v): self.x = v[0][0] value = torch.ones(tuple(v[0][0]), device=self.device) * self.value return value, value def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(v) class BoundRange(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.device = attr['device'] def forward(self, start, end, step): if start.dtype == end.dtype == step.dtype == torch.int64: return torch.arange(start, end, step, dtype=torch.int64, device=self.device) else: return torch.arange(start, end, step, device=self.device) class BoundATenDiag(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.device = attr['device'] def forward(self, x, diagonal=0): return torch.diag(x, diagonal=diagonal) def interval_propagate(self, *v): return Interval.make_interval(torch.diag(v[0][0], v[1][0]), torch.diag(v[0][1], v[1][0]), v[0]) class BoundATenDiagonal(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.device = attr['device'] def forward(self, x, offset=0, dim1=0, dim2=1): return torch.diagonal(x, offset=offset, dim1=dim1, dim2=dim2) def interval_propagate(self, *v): params = (v[1][0], v[2][0], v[3][0]) return Interval.make_interval(torch.diagonal(v[0][0], *params), torch.diagonal(v[0][1], *params), v[0]) def bound_backward(self, last_lA, last_uA, *args, **kwargs): for i in range(1, 4): assert isinstance(self.inputs[i], BoundConstant) def _bound_oneside(last_A): if last_A is None: return None A = torch.zeros(*last_A.shape[:2], *self.inputs[0].output_shape[1:]).to(last_A) dim1, dim2 = self.inputs[2].value, self.inputs[3].value assert dim1 != 0 and dim2 != 0 if dim1 > 0: dim1 += 1 if dim2 > 0: dim2 += 1 A = torch.diagonal_scatter( A, last_A, offset=self.inputs[1].value, dim1=dim1, dim2=dim2) return A return ([(_bound_oneside(last_lA), _bound_oneside(last_uA))] + [(None, None)] * 3), 0, 0 ================================================ FILE: auto_LiRPA/operators/convex_concave.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """Nonlinear functions that are either convex or convave within the entire domain.""" import torch from torch.nn import Module from .base import * from .activation_base import BoundActivation, BoundOptimizableActivation class BoundLog(BoundActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.range_l = 1e-6 def forward(self, x): # NOTE adhoc implementation for loss fusion if self.loss_fusion: return torch.logsumexp(self.inputs[0].inputs[0].inputs[0].forward_value, dim=-1) return torch.log(x.clamp(min=epsilon)) def bound_relax(self, x, init=False): if init: self.init_linear_relaxation(x) rl, ru = self.forward(x.lower), self.forward(x.upper) ku = (ru - rl) / (x.upper - x.lower + epsilon) self.add_linear_relaxation(mask=None, type='lower', k=ku, x0=x.lower, y0=rl) m = (x.lower + x.upper) / 2 k = torch.reciprocal(m) rm = self.forward(m) self.add_linear_relaxation(mask=None, type='upper', k=k, x0=m, y0=rm) def interval_propagate(self, *v): # NOTE adhoc implementation for loss fusion if self.loss_fusion: par = self.inputs[0].inputs[0].inputs[0] lower = torch.logsumexp(par.lower, dim=-1) upper = torch.logsumexp(par.upper, dim=-1) return lower, upper return super().interval_propagate(*v) def bound_backward(self, last_lA, last_uA, x, **kwargs): A, lbias, ubias = super().bound_backward(last_lA, last_uA, x) # NOTE adhoc implementation for loss fusion if self.loss_fusion: assert A[0][0] is None exp_module = self.inputs[0].inputs[0] ubias = ubias + self.get_bias(A[0][1], exp_module.max_input.squeeze(-1)) return A, lbias, ubias class BoundSqrt(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.use_prior_constraint = True self.has_constraint = True self.range_l = 1e-6 def forward(self, x): return torch.sqrt(x) def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) if self.opt_stage in ['opt', 'reuse']: self.alpha[self._start].data[:2] = torch.min(torch.max( self.alpha[self._start].data[:2], x.lower), x.upper) mid = self.alpha[self._start] else: mid = (x.lower + x.upper) / 2 k = 0.5 / self.forward(mid) self.add_linear_relaxation(mask=None, type='upper', k=k, x0=mid) sqrt_l = self.forward(x.lower) sqrt_u = self.forward(x.upper) k = (sqrt_u - sqrt_l) / (x.upper - x.lower).clamp(min=1e-8) self.add_linear_relaxation(mask=None, type='lower', k=k, x0=x.lower) def bound_backward(self, last_lA, last_uA, x, **kwargs): if self.use_prior_constraint and self.check_constraint_available(x): if hasattr(x, 'cstr_interval'): del x.cstr_interval del x.cstr_lower del x.cstr_upper x_l, x_u = self._ibp_constraint(x, delete_bounds_after_use=True) x_u = torch.max(x_u, x_l + 1e-8) return super().bound_backward(last_lA, last_uA, x, **kwargs) def clamp_interim_bounds(self): self.cstr_lower = self.lower.clamp(min=0) self.cstr_upper = self.upper.clamp(min=0) self.cstr_interval = (self.cstr_lower, self.cstr_upper) def _init_opt_parameters_impl(self, size_spec, **kwargs): """Implementation of init_opt_parameters for each start_node.""" l, u = self.inputs[0].lower, self.inputs[0].upper alpha = torch.empty(2, size_spec, *l.shape, device=l.device) alpha.data[:2] = (l + u) / 2 return alpha class BoundReciprocal(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.splittable = True self.range_l = 1e-6 def forward(self, x): return torch.reciprocal(x) def interval_propagate(self, *v): h_L = v[0][0].to(dtype=torch.get_default_dtype()) h_U = v[0][1].to(dtype=torch.get_default_dtype()) assert h_L.min() > 0, 'Only positive values are supported in BoundReciprocal' return torch.reciprocal(h_U), torch.reciprocal(h_L) def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) assert x.lower.min() >= 0 ku = -1. / (x.lower * x.upper) self.add_linear_relaxation(mask=None, type='upper', k=ku, x0=x.lower) if self.opt_stage in ['opt', 'reuse']: self.alpha[self._start].data[:2] = torch.min(torch.max( self.alpha[self._start].data[:2], x.lower), x.upper) mid = self.alpha[self._start].clamp(min=0.01) else: mid = (x.lower + x.upper) / 2 self.add_linear_relaxation( mask=None, type='lower', k=-1./(mid**2), x0=mid) if x.lower.min() <= 0: mask = x.lower == 0 self.uw[..., mask] = 0 self.ub[..., mask] = torch.inf if x.upper.isinf().any(): mask = x.upper.isinf() self.lw[..., mask] = 0 self.lb[..., mask] = 0 def bound_backward(self, last_lA, last_uA, x, **kwargs): As, lbias, ubias = super().bound_backward(last_lA, last_uA, x, **kwargs) if isinstance(ubias, torch.Tensor) and ubias.isnan().any(): ubias[ubias.isnan()] = torch.inf if (last_uA != 0).any() else 0. if isinstance(lbias, torch.Tensor) and lbias.isnan().any(): lbias[lbias.isnan()] = 0. return As, lbias, ubias def _init_opt_parameters_impl(self, size_spec, **kwargs): """Implementation of init_opt_parameters for each start_node.""" l, u = self.inputs[0].lower, self.inputs[0].upper alpha = torch.empty(2, size_spec, *l.shape, device=l.device) alpha.data[:2] = (l + u) / 2 return alpha def build_gradient_node(self, grad_upstream): return [(ReciprocalGrad(), (grad_upstream, self.inputs[0].forward_value), [self.inputs[0]])] class ReciprocalGrad(Module): def __init__(self): super().__init__() def forward(self, g, x): # partial derivative of 1/x is -1/x^2 return -g / torch.square(x).unsqueeze(1) class BoundExp(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) if options is None: options = {} self.options = options.get('exp', {}) self.max_input = 0 def forward(self, x): if self.loss_fusion and self.options != 'no-max-input': self.max_input = torch.max(x, dim=-1, keepdim=True)[0].detach() return torch.exp(x - self.max_input) return torch.exp(x) def interval_propagate(self, *v): assert len(v) == 1 # unary monotonous functions only h_L, h_U = v[0] if self.loss_fusion and self.options != 'no-max-input': self.max_input = torch.max(h_U, dim=-1, keepdim=True)[0] h_L, h_U = h_L - self.max_input, h_U - self.max_input else: self.max_input = 0 return torch.exp(h_L), torch.exp(h_U) def bound_forward(self, dim_in, x): m = torch.min((x.lower + x.upper) / 2, x.lower + 0.99) exp_l, exp_m, exp_u = torch.exp(x.lower), torch.exp(m), torch.exp(x.upper) kl = exp_m lw = x.lw * kl.unsqueeze(1) lb = kl * (x.lb - m + 1) ku = (exp_u - exp_l) / (x.upper - x.lower + epsilon) uw = x.uw * ku.unsqueeze(1) ub = x.ub * ku - ku * x.lower + exp_l return LinearBound(lw, lb, uw, ub) def bound_backward(self, last_lA, last_uA, x, **kwargs): # Special case when computing log_softmax (FIXME: find a better solution, this trigger condition is not reliable). if self.loss_fusion and last_lA is None and last_uA is not None and torch.min( last_uA) >= 0 and x.from_input: # Adding an extra bias term to the input. This is equivalent to adding a constant and subtract layer before exp. # Note that we also need to adjust the bias term at the end. if self.options == 'no-detach': self.max_input = torch.max(x.upper, dim=-1, keepdim=True)[0] elif self.options != 'no-max-input': self.max_input = torch.max(x.upper, dim=-1, keepdim=True)[0].detach() else: self.max_input = 0 adjusted_lower = x.lower - self.max_input adjusted_upper = x.upper - self.max_input # relaxation for upper bound only (used in loss fusion) exp_l, exp_u = torch.exp(adjusted_lower), torch.exp(adjusted_upper) k = (exp_u - exp_l) / (adjusted_upper - adjusted_lower).clamp(min=1e-8) if k.requires_grad: k = k.clamp(min=1e-8) uA = last_uA * k.unsqueeze(0) ubias = last_uA * (-adjusted_lower * k + exp_l).unsqueeze(0) if ubias.ndim > 2: ubias = torch.sum(ubias, dim=tuple(range(2, ubias.ndim))) # Also adjust the missing ubias term. if uA.ndim > self.max_input.ndim: A = torch.sum(uA, dim=tuple(range(self.max_input.ndim, uA.ndim))) else: A = uA # These should hold true in loss fusion assert self.batch_dim == 0 assert A.shape[0] == 1 batch_size = A.shape[1] ubias -= (A.reshape(batch_size, -1) * self.max_input.reshape(batch_size, -1)).sum(dim=-1).unsqueeze(0) return [(None, uA)], 0, ubias else: As, lbias, ubias = super().bound_backward(last_lA, last_uA, x, **kwargs) lA, uA = As[0] lA, lbias = self._check_nan(lA, lbias, last_lA, 0) uA, ubias = self._check_nan(uA, ubias, last_uA, torch.inf) return [(lA, uA)], lbias, ubias def _check_nan(self, A, bias, last_A, const_bound): """Check for NaN caused by 0 in A and inf in lw/lb/uw/ub. It can happen if the pre-activation bounds are very loose for exp. """ if A is None: return A, bias if bias.isnan().any(): # These assertions ensure that 0 is in A and inf is in lw/lb/uw/ub assert not last_A.isnan().any() assert not last_A.isinf().any() assert not self.lw.isnan().any() assert not self.uw.isnan().any() assert not self.lb.isnan().any() assert not self.ub.isnan().any() A_ = A.view(-1, *A.shape[2:]).clone() bias_ = bias.view(-1).clone() mask = bias_.isnan() A_[mask] = 0 assert (last_A >= 0).all() bias_[mask] = const_bound if (last_A != 0).any() else 0. A = A_.view(A.shape) bias = bias_.view(bias.shape) return A, bias def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) min_val = -1e9 l, u = x.lower.clamp(min=min_val), x.upper.clamp(min=min_val) if self.opt_stage in ['opt', 'reuse']: self.alpha[self._start].data[:2] = torch.min(torch.max( self.alpha[self._start].data[:2], x.lower), x.upper) m = torch.min(self.alpha[self._start], x.lower + 0.99) else: m = torch.min((x.lower + x.upper) / 2, x.lower + 0.99) exp_l, exp_m, exp_u = torch.exp(x.lower), torch.exp(m), torch.exp(x.upper) k = exp_m self.add_linear_relaxation(mask=None, type='lower', k=k, x0=m, y0=exp_m) k = (exp_u - exp_l) / (u - l).clamp(min=1e-8) self.add_linear_relaxation(mask=None, type='upper', k=k, x0=l, y0=exp_l) def _init_opt_parameters_impl(self, size_spec, **kwargs): """Implementation of init_opt_parameters for each start_node.""" l, u = self.inputs[0].lower, self.inputs[0].upper alpha = torch.empty(2, size_spec, *l.shape, device=l.device) alpha.data[:2] = (l + u) / 2 return alpha def build_gradient_node(self, grad_upstream): if self.loss_fusion: raise NotImplementedError('Gradient computation for exp with loss fusion is not supported.') return [(ExpGrad(), (grad_upstream, self.inputs[0].forward_value), [self.inputs[0]])] class ExpGrad(Module): def __init__(self): super().__init__() def forward(self, g, preact): # exp'(x) = exp(x) return g * torch.exp(preact).unsqueeze(1) ================================================ FILE: auto_LiRPA/operators/convolution.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Convolution and padding operators""" from torch.autograd import Function from torch.nn import Module from .base import * import numpy as np from .solver_utils import grb from ..patches import unify_shape, compute_patches_stride_padding, is_shape_used, create_valid_mask EPS = 1e-2 class BoundConv(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) if len(attr['kernel_shape']) == 1: # for 1d conv assert (attr['pads'][0] == attr['pads'][1]) self.padding = [attr['pads'][0]] self.F_conv = F.conv1d self.conv_dim = 1 else: # for 2d conv assert (attr['pads'][0] == attr['pads'][2]) assert (attr['pads'][1] == attr['pads'][3]) self.padding = [attr['pads'][0], attr['pads'][1]] self.F_conv = F.conv2d self.conv_dim = 2 self.stride = attr['strides'] self.dilation = attr['dilations'] self.groups = attr['group'] if len(inputs) == 3: self.has_bias = True else: self.has_bias = False self.patches_start = True if options is None: options = {} self.mode = options.get("conv_mode", "matrix") # denote whether this Conv is followed by a ReLU # if self.relu_followed is False, we need to manually pad the conv patches. # If self.relu_followed is True, the patches are padded in the ReLU layer # and the manual padding is not needed. self.relu_followed = False def forward(self, *x): # x[0]: input, x[1]: weight, x[2]: bias if self.has_bias bias = x[2] if self.has_bias else None output = self.F_conv(x[0], x[1], bias, self.stride, self.padding, self.dilation, self.groups) return output def bound_backward(self, last_lA, last_uA, *x, **kwargs): if self.is_input_perturbed(1): raise NotImplementedError( 'Weight perturbation for convolution layers has not been implmented.') lA_y = uA_y = lA_bias = uA_bias = None weight = x[1].lower def _bound_oneside(last_A): if last_A is None: return None, 0 if type(last_A) is OneHotC: # Conv layer does not support the OneHotC fast path. We have to create a dense matrix instead. last_A = onehotc_to_dense(last_A, dtype=weight.dtype) if type(last_A) == Tensor: shape = last_A.size() # when (W−F+2P)%S != 0, construct the output_padding if self.conv_dim == 2: output_padding0 = ( int(self.input_shape[2]) - (int(self.output_shape[2]) - 1) * self.stride[0] + 2 * self.padding[0] - 1 - (int(weight.size()[2] - 1) * self.dilation[0])) output_padding1 = ( int(self.input_shape[3]) - (int(self.output_shape[3]) - 1) * self.stride[1] + 2 * self.padding[1] - 1 - (int(weight.size()[3] - 1) * self.dilation[1])) next_A = F.conv_transpose2d( last_A.reshape(shape[0] * shape[1], *shape[2:]), weight, None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, output_padding=(output_padding0, output_padding1)) else: # for 1d conv, we use conv_transpose1d() output_padding = ( int(self.input_shape[2]) - (int(self.output_shape[2]) - 1) * self.stride[0] + 2 * self.padding[0] - 1 - (int(weight.size()[2] - 1) * self.dilation[0])) next_A = F.conv_transpose1d( last_A.reshape(shape[0] * shape[1], *shape[2:]), weight, None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, output_padding=output_padding) next_A = next_A.view(shape[0], shape[1], *next_A.shape[1:]) if self.has_bias: # sum_bias = (last_A.sum((3, 4)) * x[2].lower).sum(2) sum_bias = torch.einsum('sbc...,c->sb', last_A, x[2].lower) else: sum_bias = 0 return next_A, sum_bias elif type(last_A) == Patches: # Here we build and propagate a Patch object with (patches, stride, padding) assert self.conv_dim == 2, 'Patches mode not supports conv1d so far.' assert type(last_A) == Patches if last_A.identity == 0: # FIXME (09/20): Don't call it relu_followed. Instead, make this a property of A, called "padded" and propagate this property. if not self.relu_followed: patches = last_A.create_padding(self.output_shape) else: patches = last_A.patches if self.has_bias: # bias is x[2] (lower and upper are the same), and has shape (c,). # Patches either has [out_c, batch, out_h, out_w, c, h, w] or [unstable_size, batch, c, h, w]. sum_bias = torch.einsum('sb...chw,c->sb...', patches, x[2].lower) # sum_bias has shape (out_c, batch, out_h, out_w) or (unstable_size, batch). else: sum_bias = 0 flattened_patches = patches.reshape( -1, patches.size(-3), patches.size(-2), patches.size(-1)) pieces = F.conv_transpose2d( flattened_patches, insert_zeros(weight, last_A.inserted_zeros) , stride=self.stride) # New patch size: (out_c, batch, out_h, out_w, c, h, w) or (unstable_size, batch, c, h, w). pieces = pieces.view( *patches.shape[:-3], pieces.size(-3), pieces.size(-2), pieces.size(-1)) elif last_A.identity == 1: # New patches have size [out_c, batch, out_h, out_w, c, h, w] if it is not sparse. # New patches have size [unstable_size, batch, c, h, w] if it is sparse. if last_A.unstable_idx is not None: pieces = weight.view( weight.size(0), 1, weight.size(1), weight.size(2), weight.size(3)) # Select based on the output channel (out_h and out_w are irrelevant here). pieces = pieces[last_A.unstable_idx[0]] # Expand the batch dimnension. pieces = pieces.expand(-1, last_A.shape[1], -1, -1, -1) # Do the same for the bias. if self.has_bias: sum_bias = x[2].lower[last_A.unstable_idx[0]].unsqueeze(-1) # bias has shape (unstable_size, batch). sum_bias = sum_bias.expand(-1, last_A.shape[1]) else: sum_bias = 0 else: assert weight.size(0) == last_A.shape[0] pieces = weight.view( weight.size(0), 1, 1, 1, weight.size(1), weight.size(2), weight.size(3)).expand(-1, *last_A.shape[1:4], -1, -1, -1) # The bias (x[2].lower) has shape (out_c,) need to make it (out_c, batch, out_h, out_w). # Here we should transpose sum_bias to set the batch dim to 1, aiming to keep it consistent with the matrix version if self.has_bias: sum_bias = x[2].lower.view(-1, 1, 1, 1).expand(-1, *last_A.shape[1:4]) else: sum_bias = 0 else: raise NotImplementedError() padding = last_A.padding if last_A is not None else (0, 0, 0, 0) # (left, right, top, bottom) stride = last_A.stride if last_A is not None else (1, 1) inserted_zeros = last_A.inserted_zeros if last_A is not None else 0 output_padding = last_A.output_padding if last_A is not None else (0, 0, 0, 0) padding, stride, output_padding = compute_patches_stride_padding( self.input_shape, padding, stride, self.padding, self.stride, inserted_zeros, output_padding) if (inserted_zeros == 0 and not is_shape_used(output_padding) and pieces.shape[-1] > self.input_shape[-1]): # the patches is too large and from now on, we will use matrix mode instead of patches mode. # This is our desired matrix: the input will be flattend to (batch_size, input_channel*input_x * input_y) and multiplies on this matrix. # After multiplication, the desired output is (batch_size, out_channel*output_x*output_y). # A_matrix has size (batch, out_c*out_h*out_w, in_c*in_h*in_w) A_matrix = patches_to_matrix( pieces, self.input_shape[1:], stride, padding, last_A.output_shape, last_A.unstable_idx) # print(f'Converting patches to matrix: old shape {pieces.shape}, size {pieces.numel()}; new shape {A_matrix.shape}, size {A_matrix.numel()}') if isinstance(sum_bias, Tensor) and last_A.unstable_idx is None: sum_bias = sum_bias.transpose(0, 1) sum_bias = sum_bias.reshape(sum_bias.size(0), -1).transpose(0,1) A_matrix = A_matrix.transpose(0,1) # Spec dimension at the front. return A_matrix, sum_bias new_patches = last_A.create_similar( pieces, stride=stride, padding=padding, output_padding=output_padding, identity=0, input_shape=self.input_shape) # if last_A is last_lA: # print(f'Conv : start_node {kwargs["start_node"].name} layer {self.name} {new_patches}') return new_patches, sum_bias else: raise NotImplementedError() lA_x, lbias = _bound_oneside(last_lA) uA_x, ubias = _bound_oneside(last_uA) return [(lA_x, uA_x), (lA_y, uA_y), (lA_bias, uA_bias)], lbias, ubias def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): if self.is_input_perturbed(1): raise NotImplementedError("Weight perturbation for convolution layers has not been implmented.") assert self.dilation == (1, 1) or self.dilation == [1, 1] # e.g., last layer input gurobi vars (3,32,32) gvars_array = np.array(v[0]) # pre_layer_shape (1,3,32,32) pre_layer_shape = np.expand_dims(gvars_array, axis=0).shape # this layer shape (1,8,16,16) this_layer_shape = self.output_shape out_lbs, out_ubs = None, None if self.is_lower_bound_current(): # self.lower shape (1,8,16,16) out_lbs = self.lower.detach().cpu().numpy() out_ubs = self.upper.detach().cpu().numpy() # current layer weight (8,3,4,4) this_layer_weight = v[1].detach().cpu().numpy() # current layer bias (8,) this_layer_bias = None if self.has_bias: this_layer_bias = v[2].detach().cpu().numpy() weight_shape2, weight_shape3 = this_layer_weight.shape[2], this_layer_weight.shape[3] padding0, padding1 = self.padding[0], self.padding[1] stride0, stride1 = self.stride[0], self.stride[1] new_layer_gurobi_vars = [] new_layer_gurobi_constrs = [] # precompute row and column index mappings # compute row mapping: from current row to input rows # vectorization of following code: # for out_row_idx in range(this_layer_shape[2]): # ker_row_min, ker_row_max = 0, weight_shape2 # in_row_idx_min = -padding0 + stride0 * out_row_idx # in_row_idx_max = in_row_idx_min + weight_shape2 - 1 # if in_row_idx_min < 0: # ker_row_min = -in_row_idx_min # if in_row_idx_max >= pre_layer_shape[2]: # ker_row_max = ker_row_max - in_row_idx_max + pre_layer_shape[2] - 1 # in_row_idx_min, in_row_idx_max = max(in_row_idx_min, 0), min(in_row_idx_max, # pre_layer_shape[2] - 1) in_row_idx_mins = np.arange(this_layer_shape[2]) * stride0 - padding0 in_row_idx_maxs = in_row_idx_mins + weight_shape2 - 1 ker_row_mins = np.zeros(this_layer_shape[2], dtype=int) ker_row_maxs = np.ones(this_layer_shape[2], dtype=int) * weight_shape2 ker_row_mins[in_row_idx_mins < 0] = -in_row_idx_mins[in_row_idx_mins < 0] ker_row_maxs[in_row_idx_maxs >= pre_layer_shape[2]] = \ ker_row_maxs[in_row_idx_maxs >= pre_layer_shape[2]] - in_row_idx_maxs[in_row_idx_maxs >= pre_layer_shape[2]]\ + pre_layer_shape[2] - 1 in_row_idx_mins = np.maximum(in_row_idx_mins, 0) in_row_idx_maxs = np.minimum(in_row_idx_maxs, pre_layer_shape[2] - 1) # compute column mapping: from current column to input columns # vectorization of following code: # for out_col_idx in range(this_layer_shape[3]): # ker_col_min, ker_col_max = 0, weight_shape3 # in_col_idx_min = -padding1 + stride1 * out_col_idx # in_col_idx_max = in_col_idx_min + weight_shape3 - 1 # if in_col_idx_min < 0: # ker_col_min = -in_col_idx_min # if in_col_idx_max >= pre_layer_shape[3]: # ker_col_max = ker_col_max - in_col_idx_max + pre_layer_shape[3] - 1 # in_col_idx_min, in_col_idx_max = max(in_col_idx_min, 0), min(in_col_idx_max, # pre_layer_shape[3] - 1) in_col_idx_mins = np.arange(this_layer_shape[3]) * stride1 - padding1 in_col_idx_maxs = in_col_idx_mins + weight_shape3 - 1 ker_col_mins = np.zeros(this_layer_shape[3], dtype=int) ker_col_maxs = np.ones(this_layer_shape[3], dtype=int) * weight_shape3 ker_col_mins[in_col_idx_mins < 0] = -in_col_idx_mins[in_col_idx_mins < 0] ker_col_maxs[in_col_idx_maxs >= pre_layer_shape[3]] = \ ker_col_maxs[in_col_idx_maxs >= pre_layer_shape[3]] - in_col_idx_maxs[in_col_idx_maxs >= pre_layer_shape[3]]\ + pre_layer_shape[3] - 1 in_col_idx_mins = np.maximum(in_col_idx_mins, 0) in_col_idx_maxs = np.minimum(in_col_idx_maxs, pre_layer_shape[3] - 1) neuron_idx = 0 for out_chan_idx in range(this_layer_shape[1]): out_chan_vars = [] for out_row_idx in range(this_layer_shape[2]): out_row_vars = [] # get row index range from precomputed arrays ker_row_min, ker_row_max = ker_row_mins[out_row_idx], ker_row_maxs[out_row_idx] in_row_idx_min, in_row_idx_max = in_row_idx_mins[out_row_idx], in_row_idx_maxs[out_row_idx] for out_col_idx in range(this_layer_shape[3]): # get col index range from precomputed arrays ker_col_min, ker_col_max = ker_col_mins[out_col_idx], ker_col_maxs[out_col_idx] in_col_idx_min, in_col_idx_max = in_col_idx_mins[out_col_idx], in_col_idx_maxs[out_col_idx] # init linear expression lin_expr = this_layer_bias[out_chan_idx] if self.has_bias else 0 # init linear constraint LHS implied by the conv operation for in_chan_idx in range(this_layer_weight.shape[1]): coeffs = this_layer_weight[out_chan_idx, in_chan_idx, ker_row_min:ker_row_max, ker_col_min:ker_col_max].reshape(-1) gvars = gvars_array[in_chan_idx, in_row_idx_min:in_row_idx_max+1, in_col_idx_min:in_col_idx_max+1].reshape(-1) if solver_pkg == 'gurobi': lin_expr += grb.LinExpr(coeffs, gvars) else: for i in range(len(coeffs)): try: lin_expr += coeffs[i] * gvars[i] except TypeError: lin_expr += coeffs[i] * gvars[i].var # init potential lb and ub, which helps solver to finish faster out_lb = out_lbs[0, out_chan_idx, out_row_idx, out_col_idx] if out_lbs is not None else -float('inf') out_ub = out_ubs[0, out_chan_idx, out_row_idx, out_col_idx] if out_ubs is not None else float('inf') if out_ub - out_lb < EPS: # If the inferred lb and ub are too close, it could lead to floating point disagreement # between solver's inferred lb and ub constraints and the computed ones from ab-crown. # Such disagreement can lead to "infeasible" result from the solver for feasible problem. # To avoid so, we relax the box constraints. # This should not affect the solver's result correctness, # since the tighter lb and ub can be inferred by the solver. out_lb, out_ub = (out_lb + out_ub - EPS) / 2., (out_lb + out_ub + EPS) / 2. # add the output var and constraint var = model.addVar(lb=out_lb, ub=out_ub, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'lay{self.name}_{neuron_idx}') model.addConstr(lin_expr == var, name=f'lay{self.name}_{neuron_idx}_eq') neuron_idx += 1 out_row_vars.append(var) out_chan_vars.append(out_row_vars) new_layer_gurobi_vars.append(out_chan_vars) self.solver_vars = new_layer_gurobi_vars model.update() def interval_propagate(self, *v, C=None): if self.is_input_perturbed(1): raise NotImplementedError("Weight perturbation for convolution layers has not been implmented.") norm = Interval.get_perturbation(v[0]) norm = norm[0] h_L, h_U = v[0] weight = v[1][0] bias = v[2][0] if self.has_bias else None if norm == torch.inf: mid = (h_U + h_L) / 2.0 diff = (h_U - h_L) / 2.0 weight_abs = weight.abs() deviation = self.F_conv(diff, weight_abs, None, self.stride, self.padding, self.dilation, self.groups) elif norm > 0: norm, eps = Interval.get_perturbation(v[0]) # L2 norm, h_U and h_L are the same. mid = h_U # TODO: padding assert not isinstance(eps, torch.Tensor) or eps.numel() == 1 deviation = torch.mul(weight, weight).sum((1, 2, 3)).sqrt() * eps deviation = deviation.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) else: # Here we calculate the L0 norm IBP bound using the bound proposed in [Certified Defenses for Adversarial Patches, ICLR 2020] norm, eps, ratio = Interval.get_perturbation(v[0]) mid = h_U k = int(eps) weight_sum = torch.sum(weight.abs(), 1) deviation = torch.sum(torch.topk(weight_sum.view(weight_sum.shape[0], -1), k)[0], dim=1) * ratio if self.has_bias: center = self.F_conv(mid, weight, v[2][0], self.stride, self.padding, self.dilation, self.groups) else: center = self.F_conv(mid, weight, None, self.stride, self.padding, self.dilation, self.groups) ss = center.shape deviation = deviation.repeat(ss[2] * ss[3]).view(-1, ss[1]).t().view(ss[1], ss[2], ss[3]) center = self.F_conv(mid, weight, bias, self.stride, self.padding, self.dilation, self.groups) upper = center + deviation lower = center - deviation return lower, upper def bound_dynamic_forward(self, *x, max_dim=None, offset=0): if self.is_input_perturbed(1) or self.is_input_perturbed(2): raise NotImplementedError("Weight perturbation for convolution layers has not been implmented.") weight = x[1].lb bias = x[2].lb if self.has_bias else None x = x[0] w = x.lw b = x.lb shape = w.shape shape_wconv = [shape[0] * shape[1]] + list(shape[2:]) def conv2d(input, weight, bias, stride, padding, dilation, groups): """ There may be some CUDA error (illegal memory access) when the batch size is too large. Thus split the input into several batches when needed. """ max_batch_size = 50 if input.device != torch.device('cpu') and input.shape[0] > max_batch_size: ret = [] for i in range((input.shape[0] + max_batch_size - 1) // max_batch_size): ret.append(self.F_conv( input[i*max_batch_size:(i+1)*max_batch_size], weight, bias, stride, padding, dilation, groups)) return torch.cat(ret, dim=0) else: return self.F_conv(input, weight, bias, stride, padding, dilation, groups) w_new = conv2d( w.reshape(shape_wconv), weight, None, self.stride, self.padding, self.dilation, self.groups) w_new = w_new.reshape(shape[0], -1, *w_new.shape[1:]) b_new = conv2d( b, weight, bias, self.stride, self.padding, self.dilation, self.groups) return LinearBound(w_new, b_new, w_new, b_new, x_L=x.x_L, x_U=x.x_U, tot_dim=x.tot_dim) def bound_forward(self, dim_in, *x): if self.is_input_perturbed(1) or self.is_input_perturbed(2): raise NotImplementedError("Weight perturbation for convolution layers has not been implmented.") weight = x[1].lb bias = x[2].lb if self.has_bias else None x = x[0] mid_w = (x.lw + x.uw) / 2 mid_b = (x.lb + x.ub) / 2 diff_w = (x.uw - x.lw) / 2 diff_b = (x.ub - x.lb) / 2 weight_abs = weight.abs() shape = mid_w.shape shape_wconv = [shape[0] * shape[1]] + list(shape[2:]) deviation_w = self.F_conv( diff_w.reshape(shape_wconv), weight_abs, None, self.stride, self.padding, self.dilation, self.groups) deviation_b = self.F_conv( diff_b, weight_abs, None, self.stride, self.padding, self.dilation, self.groups) center_w = self.F_conv( mid_w.reshape(shape_wconv), weight, None, self.stride, self.padding, self.dilation, self.groups) center_b = self.F_conv( mid_b, weight, bias, self.stride, self.padding, self.dilation, self.groups) deviation_w = deviation_w.reshape(shape[0], -1, *deviation_w.shape[1:]) center_w = center_w.reshape(shape[0], -1, *center_w.shape[1:]) return LinearBound( lw = center_w - deviation_w, lb = center_b - deviation_b, uw = center_w + deviation_w, ub = center_b + deviation_b) def build_gradient_node(self, grad_upstream): node_grad = Conv2dGrad( self, self.inputs[1].param, self.stride, self.padding, self.dilation, self.groups) return [(node_grad, (grad_upstream,), [])] def update_requires_input_bounds(self): self._check_weight_perturbation() class BoundConvTranspose(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) assert (attr['pads'][0] == attr['pads'][2]) assert (attr['pads'][1] == attr['pads'][3]) self.stride = attr['strides'] self.padding = [attr['pads'][0], attr['pads'][1]] self.dilation = attr['dilations'] self.groups = attr['group'] self.output_padding = [attr.get('output_padding', [0, 0])[0], attr.get('output_padding', [0, 0])[1]] assert len(attr['kernel_shape']) == 2 # 2d transposed convolution. if len(inputs) == 3: self.has_bias = True else: self.has_bias = False self.mode = options.get("conv_mode", "matrix") assert self.output_padding == [0, 0] assert self.dilation == [1, 1] assert self.stride[0] == self.stride[1] assert self.groups == 1 self.F_convtranspose = F.conv_transpose2d def forward(self, *x): # x[0]: input, x[1]: weight, x[2]: bias if self.has_bias bias = x[2] if self.has_bias else None output = F.conv_transpose2d(x[0], x[1], bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, output_padding=self.output_padding) return output def bound_backward(self, last_lA, last_uA, *x, **kwargs): if self.is_input_perturbed(1): raise NotImplementedError("Weight perturbation for convolution layers has not been implmented.") lA_y = uA_y = lA_bias = uA_bias = None weight = x[1].lower assert weight.size(-1) == weight.size(-2) def _bound_oneside(last_A): if last_A is None: return None, 0 if type(last_A) is OneHotC: # Conv layer does not support the OneHotC fast path. We have to create a dense matrix instead. last_A = onehotc_to_dense(last_A, dtype=weight.dtype) if type(last_A) == Tensor: shape = last_A.size() next_A = F.conv2d(last_A.reshape(shape[0] * shape[1], *shape[2:]), weight, None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) next_A = next_A.view(shape[0], shape[1], *next_A.shape[1:]) if self.has_bias: sum_bias = (last_A.sum((3, 4)) * x[2].lower).sum(2) else: sum_bias = 0 return next_A, sum_bias elif type(last_A) == Patches: # Here we build and propagate a Patch object with (patches, stride, padding) assert type(last_A) == Patches if last_A.identity == 0: patches = last_A.patches # FIXME: so far, assume there will be a relu layer in its input. if self.has_bias: # bias is x[2] (lower and upper are the same), and has shape (c,). # Patches either has [out_c, batch, out_h, out_w, c, h, w] or [unstable_size, batch, c, h, w]. sum_bias = torch.einsum('sb...chw,c->sb...', patches, x[2].lower) # sum_bias has shape (out_c, batch, out_h, out_w) or (unstable_size, batch). else: sum_bias = 0 flattened_patches = patches.reshape(-1, patches.size(-3), patches.size(-2), patches.size(-1)) # Merge patches with this layer's weights. Weight must be flipped here; and if stride != 1, we must insert zeros in the input image. # For conv_transpose2d, the weight matrix is in the (in, out, k, k) shape. # pieces = F.conv_transpose2d(flattened_patches, weight.transpose(0,1).flip(-1,-2), stride=self.stride) # pieces = F.conv_transpose2d(flattened_patches, weight.transpose(0,1).flip(-1,-2), stride=last_A.inserted_zeros + 1) # Use padding in conv_transposed2d directly. pieces = F.conv_transpose2d( # Transpose because the weight has in_channel before out_channel. flattened_patches, insert_zeros(weight.transpose(0,1).flip(-1,-2), last_A.inserted_zeros)) # New patch size: (out_c, batch, out_h, out_w, c, h, w) or (unstable_size, batch, c, h, w). pieces = pieces.view(*patches.shape[:-3], pieces.size(-3), pieces.size(-2), pieces.size(-1)) elif last_A.identity == 1: # New patches have size [out_c, batch, out_h, out_w, c, h, w] if it is not sparse. # New patches have size [unstable_size, batch, c, h, w] if it is sparse. if last_A.unstable_idx is not None: raise NotImplementedError() else: assert weight.size(0) == last_A.shape[0] pieces = weight.view(weight.size(0), 1, 1, 1, weight.size(1), weight.size(2), weight.size(3)).expand(-1, *last_A.shape[1:4], -1, -1, -1) # The bias (x[2].lower) has shape (out_c,) need to make it (out_c, batch, out_h, out_w). # Here we should transpose sum_bias to set the batch dim to 1, aiming to keep it consistent with the matrix version sum_bias = x[2].lower.view(-1, 1, 1, 1).expand(-1, *last_A.shape[1:4]) else: raise NotImplementedError() patches_padding = last_A.padding if last_A is not None else (0, 0, 0, 0) # (left, right, top, bottom) output_padding = last_A.output_padding if last_A is not None else (0, 0, 0, 0) # (left, right, top, bottom) inserted_zeros = last_A.inserted_zeros assert self.stride[0] == self.stride[1] # Unify the shape to 4-tuple. output_padding = unify_shape(output_padding) patches_padding = unify_shape(patches_padding) this_stride = unify_shape(self.stride) this_padding = unify_shape(self.padding) # Compute new padding. Due to the shape flip during merging, we need to check the string/size on the dimension 3 - j. # TODO: testing for asymmetric shapes. padding = tuple(p * (inserted_zeros + 1) + (weight.size(3 - j//2) - 1) for j, p in enumerate(patches_padding)) # Compute new output padding output_padding = tuple(p * (inserted_zeros + 1) + this_padding[j] for j, p in enumerate(output_padding)) # When we run insert_zeros, it's missing the right most column and the bottom row. # padding = (padding[0], padding[1] + inserted_zeros, padding[2], padding[3] + inserted_zeros) # If no transposed conv so far, inserted_zero is 0. # We a transposed conv is encountered, stride is multiplied on it. inserted_zeros = (inserted_zeros + 1) * this_stride[0] - 1 # FIXME: disabled patches_to_matrix because not all parameters are supported. if inserted_zeros == 0 and not is_shape_used(output_padding) and pieces.shape[-1] > self.input_shape[-1]: # the patches is too large and from now on, we will use matrix mode instead of patches mode. # This is our desired matrix: the input will be flattend to (batch_size, input_channel*input_x * input_y) and multiplies on this matrix. # After multiplication, the desired output is (batch_size, out_channel*output_x*output_y). # A_matrix has size (batch, out_c*out_h*out_w, in_c*in_h*in_w) assert inserted_zeros == 0 A_matrix = patches_to_matrix(pieces, self.input_shape[1:], last_A.stride, padding, last_A.output_shape, last_A.unstable_idx) if isinstance(sum_bias, Tensor) and last_A.unstable_idx is None: sum_bias = sum_bias.transpose(0, 1) sum_bias = sum_bias.reshape(sum_bias.size(0), -1).transpose(0,1) A_matrix = A_matrix.transpose(0,1) # Spec dimension at the front. return A_matrix, sum_bias new_patches = last_A.create_similar( pieces, padding=padding, inserted_zeros=inserted_zeros, output_padding=output_padding, input_shape=self.input_shape) return new_patches, sum_bias else: raise NotImplementedError() lA_x, lbias = _bound_oneside(last_lA) uA_x, ubias = _bound_oneside(last_uA) return [(lA_x, uA_x), (lA_y, uA_y), (lA_bias, uA_bias)], lbias, ubias def interval_propagate(self, *v, C=None): if self.is_input_perturbed(1): raise NotImplementedError("Weight perturbation for convolution layers has not been implmented.") norm = Interval.get_perturbation(v[0]) norm = norm[0] h_L, h_U = v[0] weight = v[1][0] bias = v[2][0] if self.has_bias else None if norm == torch.inf: mid = (h_U + h_L) / 2.0 diff = (h_U - h_L) / 2.0 weight_abs = weight.abs() deviation = F.conv_transpose2d(diff, weight_abs, None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, output_padding=self.output_padding) elif norm > 0: raise NotImplementedError() norm, eps = Interval.get_perturbation(v[0]) # L2 norm, h_U and h_L are the same. mid = h_U # TODO: padding deviation = torch.mul(weight, weight).sum((1, 2, 3)).sqrt() * eps deviation = deviation.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) else: # Here we calculate the L0 norm IBP bound using the bound proposed in [Certified Defenses for Adversarial Patches, ICLR 2020] raise NotImplementedError() center = F.conv_transpose2d(mid, weight, bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, output_padding=self.output_padding) upper = center + deviation lower = center - deviation return lower, upper def bound_forward(self, dim_in, *x): if self.is_input_perturbed(1) or self.is_input_perturbed(2): raise NotImplementedError("Weight perturbation for convolution layers has not been implmented.") weight = x[1].lb bias = x[2].lb if self.has_bias else None x = x[0] mid_w = (x.lw + x.uw) / 2 mid_b = (x.lb + x.ub) / 2 diff_w = (x.uw - x.lw) / 2 diff_b = (x.ub - x.lb) / 2 weight_abs = weight.abs() shape = mid_w.shape shape_wconv = [shape[0] * shape[1]] + list(shape[2:]) deviation_w = self.F_convtranspose( diff_w.reshape(shape_wconv), weight_abs, None, output_padding=self.output_padding, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) deviation_b = self.F_convtranspose( diff_b, weight_abs, None, output_padding=self.output_padding, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) center_w = self.F_convtranspose( mid_w.reshape(shape_wconv), weight, output_padding=self.output_padding, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) center_b = self.F_convtranspose( mid_b, weight, bias, output_padding=self.output_padding, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) deviation_w = deviation_w.reshape(shape[0], -1, *deviation_w.shape[1:]) center_w = center_w.reshape(shape[0], -1, *center_w.shape[1:]) return LinearBound( lw = center_w - deviation_w, lb = center_b - deviation_b, uw = center_w + deviation_w, ub = center_b + deviation_b) class BoundPad(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) if hasattr(attr, 'pads'): self.padding = attr['pads'][2:4] + attr['pads'][6:8] else: self.padding = [0, 0, 0, 0] self.value = attr.get('value', 0.0) assert self.padding == [0, 0, 0, 0] def forward(self, x, pad, value=0.0): # TODO: padding for 3-D or more dimensional inputs. assert x.ndim == 4 # x[1] should be [0,0,pad_top,pad_left,0,0,pad_bottom,pad_right] assert pad[0] == pad[1] == pad[4] == pad[5] == 0 pad = [int(pad[3]), int(pad[7]), int(pad[2]), int(pad[6])] final = F.pad(x, pad, value=value) self.padding, self.value = pad, value return final def interval_propagate(self, *v): l, u = zip(*v) return Interval.make_interval(self.forward(*l), self.forward(*u), v[0]) def bound_backward(self, last_lA, last_uA, *x, **kwargs): # TODO: padding for 3-D or more dimensional inputs. left, right, top, bottom = self.padding def _bound_oneside(last_A): if last_A is None: return None assert type(last_A) is Patches or last_A.ndim == 5 if type(last_A) is Patches: if isinstance(last_A.padding, tuple): new_padding = (last_A.padding[0] + left, last_A.padding[1] + right, last_A.padding[2] + top, last_A.padding[3] + bottom) else: new_padding = (last_A.padding + left, last_A.padding + right, last_A.padding + top, last_A.padding + bottom) return last_A.create_similar(padding=new_padding) else: shape = last_A.size() return last_A[:, :, :, top:(shape[3] - bottom), left:(shape[4] - right)] last_lA = _bound_oneside(last_lA) last_uA = _bound_oneside(last_uA) return [(last_lA, last_uA), (None, None), (None, None)], 0, 0 def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): # e.g., last layer input gurobi vars (3,32,32) gvars_array = np.array(v[0]) # pre_layer_shape (1,3,32,32) pre_layer_shape = np.expand_dims(gvars_array, axis=0).shape # this layer shape (1,3,35,35) this_layer_shape = self.output_shape # v1 = tensor([0, 0, 1, 1, 0, 0, 2, 2]) # [0,0,pad_top,pad_left,0,0,pad_bottom,pad_right] # => [left, right, top, bottom] padding = [int(v[1][3]), int(v[1][7]), int(v[1][2]), int(v[1][6])] left, right, top, bottom = padding assert pre_layer_shape[2] + padding[0] + padding[1] == this_layer_shape[2] assert pre_layer_shape[3] + padding[2] + padding[3] == this_layer_shape[3] new_layer_gurobi_vars = [] neuron_idx = 0 for out_chan_idx in range(this_layer_shape[1]): out_chan_vars = [] for out_row_idx in range(this_layer_shape[2]): out_row_vars = [] row_pad = out_row_idx < left or out_row_idx >= this_layer_shape[2] - right for out_col_idx in range(this_layer_shape[3]): col_pad = out_col_idx < top or out_col_idx >= this_layer_shape[3] - bottom if row_pad or col_pad: v = model.addVar(lb=0, ub=0, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'pad{self.name}_{neuron_idx}') else: v = gvars_array[out_chan_idx, out_row_idx - left, out_col_idx - top] # print(out_chan_idx, out_row_idx, out_col_idx, row_pad, col_pad, v.LB, v.UB) neuron_idx += 1 out_row_vars.append(v) out_chan_vars.append(out_row_vars) new_layer_gurobi_vars.append(out_chan_vars) self.solver_vars = new_layer_gurobi_vars model.update() class Conv2dGrad(Module): def __init__(self, fw_module, weight, stride, padding, dilation, groups): super().__init__() self.weight = weight self.dilation = dilation self.groups = groups self.fw_module = fw_module assert isinstance(stride, list) and stride[0] == stride[1] assert isinstance(padding, list) and padding[0] == padding[1] assert isinstance(dilation, list) and dilation[0] == dilation[1] self.stride = stride[0] self.padding = padding[0] self.dilation = dilation[0] def forward(self, grad_last): output_padding0 = ( int(self.fw_module.input_shape[2]) - (int(self.fw_module.output_shape[2]) - 1) * self.stride + 2 * self.padding - 1 - (int(self.weight.size()[2] - 1) * self.dilation)) output_padding1 = ( int(self.fw_module.input_shape[3]) - (int(self.fw_module.output_shape[3]) - 1) * self.stride + 2 * self.padding - 1 - (int(self.weight.size()[3] - 1) * self.dilation)) return Conv2dGradOp.apply( grad_last, self.weight, self.stride, self.padding, self.dilation, self.groups, output_padding0, output_padding1) class Conv2dGradOp(Function): @staticmethod def symbolic(g, x, w, stride, padding, dilation, groups, output_padding0, output_padding1): return g.op( 'grad::Conv2d', x, w, stride_i=stride, padding_i=padding, dilation_i=dilation, groups_i=groups, output_padding0_i=output_padding0, output_padding1_i=output_padding1).setType(x.type()) @staticmethod def forward( ctx, grad_last, w, stride, padding, dilation, groups, output_padding0, output_padding1): grad_shape = grad_last.shape grad = F.conv_transpose2d( grad_last.view(grad_shape[0], *grad_shape[1:]), w, None, stride=stride, padding=padding, dilation=dilation, groups=groups, output_padding=(output_padding0, output_padding1)) grad = grad.view((grad_shape[0], *grad.shape[1:])) return grad class BoundConv2dGrad(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.stride = attr['stride'] self.padding = attr['padding'] self.dilation = attr['dilation'] self.groups = attr['groups'] self.output_padding = [ attr.get('output_padding0', 0), attr.get('output_padding1', 0) ] self.has_bias = len(inputs) == 3 self.mode = options.get('conv_mode', 'matrix') self.patches_start = True def forward(self, *x): # x[0]: input, x[1]: weight, x[2]: bias if self.has_bias return F.conv_transpose2d( x[0], x[1], None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, output_padding=self.output_padding) def bound_backward(self, last_lA, last_uA, *x, **kwargs): assert not self.is_input_perturbed(1) lA_y = uA_y = lA_bias = uA_bias = None weight = x[1].lower def _bound_oneside(last_A): if last_A is None: return None, 0 if isinstance(last_A, torch.Tensor): shape = last_A.size() next_A = F.conv2d( last_A.reshape(shape[0] * shape[1], *shape[2:]), weight, None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) next_A = next_A.view( shape[0], shape[1], *next_A.shape[1:]) if self.has_bias: sum_bias = (last_A.sum((3, 4)) * x[2].lower).sum(2) else: sum_bias = 0 return next_A, sum_bias elif isinstance(last_A, Patches): # Here we build and propagate a Patch object with # (patches, stride, padding) assert self.stride == 1, 'The patches mode only supports stride = 1' if last_A.identity == 1: # create a identity patch # [out_dim, batch, out_c, out_h, out_w, in_dim, in_c, in_h, in_w] patch_shape = last_A.shape if last_A.unstable_idx is not None: # FIXME Somehow the usage of unstable_idx seems to have # been changed, and the previous code is no longer working. raise NotImplementedError( 'Sparse patches for ' 'BoundConv2dGrad is not supported yet.') output_shape = last_A.output_shape patches = torch.eye( patch_shape[0]).to(weight) patches = patches.view([ patch_shape[0], 1, 1, 1, 1, patch_shape[0], 1, 1]) # [out_dim, bsz, out_c, out_h, out_w, out_dim, in_c, in_h, in_w] patches = patches.expand([ patch_shape[0], patch_shape[1], patch_shape[2], output_shape[2], output_shape[3], patch_shape[0], 1, 1]) patches = patches.transpose(0, 1) patches = patches[ :,torch.tensor(list(range(patch_shape[0]))), last_A.unstable_idx[0], last_A.unstable_idx[1], last_A.unstable_idx[2]] patches = patches.transpose(0, 1) else: # out_dim * out_c patches = torch.eye(patch_shape[0]).to(weight) patches = patches.view([ patch_shape[0], 1, 1, 1, patch_shape[0], 1, 1]) patches = patches.expand(patch_shape) else: patches = last_A.patches if self.has_bias: # bias is x[2] (lower and upper are the same), and has # shape (c,). # Patches either has # [out_dim, batch, out_c, out_h, out_w, out_dim, c, h, w] # or [unstable_size, batch, out_dim, c, h, w]. # sum_bias has shape (out_dim, batch, out_c, out_h, out_w) # or (unstable_size, batch). sum_bias = torch.einsum( 'sb...ochw,c->sb...', patches, x[2].lower) else: sum_bias = 0 flattened_patches = patches.reshape( -1, patches.size(-3), patches.size(-2), patches.size(-1)) # Pad to the full size pieces = F.conv2d( flattened_patches, weight, stride=self.stride, padding=weight.shape[2]-1) # New patch size: # (out_c, batch, out_h, out_w, c, h, w) # or (unstable_size, batch, c, h, w). pieces = pieces.view( *patches.shape[:-3], pieces.size(-3), pieces.size(-2), pieces.size(-1)) # (left, right, top, bottom) padding = last_A.padding if last_A is not None else (0, 0, 0, 0) stride = last_A.stride if last_A is not None else 1 if isinstance(padding, int): padding = padding + weight.shape[2] - 1 else: padding = tuple(p + weight.shape[2] - 1 for p in padding) return Patches( pieces, stride, padding, pieces.shape, unstable_idx=last_A.unstable_idx, output_shape=last_A.output_shape), sum_bias else: raise NotImplementedError() lA_x, lbias = _bound_oneside(last_lA) uA_x, ubias = _bound_oneside(last_uA) return [(lA_x, uA_x), (lA_y, uA_y), (lA_bias, uA_bias)], lbias, ubias def interval_propagate(self, *v, C=None): assert not self.is_input_perturbed(1) norm = Interval.get_perturbation(v[0])[0] h_L, h_U = v[0] weight = v[1][0] bias = v[2][0] if self.has_bias else None if norm == torch.inf: mid = (h_U + h_L) / 2.0 diff = (h_U - h_L) / 2.0 weight_abs = weight.abs() deviation = F.conv_transpose2d( diff, weight_abs, None, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, output_padding=self.output_padding) else: raise NotImplementedError center = F.conv_transpose2d( mid, weight, bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups, output_padding=self.output_padding) upper = center + deviation lower = center - deviation return lower, upper ================================================ FILE: auto_LiRPA/operators/cut_ops.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Cut operators""" from .base import * from .clampmult import multiply_by_A_signs class CutModule(): # store under BoundedModule def __init__(self, relu_nodes=[], general_beta=None, x_coeffs=None, active_cuts=None, cut_bias=None): # all dict, storing cut parameters for each start node # {start node name: (2 (lA, uA), spec (out_c, out_h, out_w), batch, num_cuts)} self.general_beta = general_beta # {start node name: (# active cut constraints,)} self.active_cuts = active_cuts # all dict with tensor, storing coeffs for each relu layer, no grad # coeffs: {relu layername: (num_cuts, flattened_nodes)} self.relu_coeffs, self.arelu_coeffs, self.pre_coeffs = {}, {}, {} for m in relu_nodes: self.relu_coeffs[m.name] = self.arelu_coeffs[m.name] = self.pre_coeffs[m.name] = None # single tensor, always the same, no grad # bias: (num_cuts,) self.cut_bias = cut_bias # x_coeffs: (num_cuts, flattened input dims) self.x_coeffs = x_coeffs def use_patches(self, start_node): # check if we are using patches mode for the start node A = start_node.lA if start_node.lA is not None else start_node.uA return type(A) is Patches def select_active_general_beta(self, start_node, unstable_idx=None): # if one constraint have nodes deeper than start node, we do not count its effect for now # self.general_beta[start_node.name]: (2(0 lower, 1 upper), spec (out_c, out_h, out_w/# fc nodes), batch, num_constrs) # self.active_cuts[start_node.name]: a long() tensor with constraint index that # should be index on current layer with current start node if self.general_beta[start_node.name].ndim == 4: general_beta = self.general_beta[start_node.name][:, :, :, self.active_cuts[start_node.name]] elif self.general_beta[start_node.name].ndim == 6: general_beta = self.general_beta[start_node.name][:, :, :, :, :, self.active_cuts[start_node.name]] else: print("general beta shape not supported!") exit() if unstable_idx is not None: if self.use_patches(start_node): general_beta = general_beta[:, unstable_idx[0], unstable_idx[1], unstable_idx[2], :, :] else: # matrix mode if general_beta.ndim == 6: # conv layers general_beta: (2(0 lower, 1 upper), spec (out_c, out_h, out_w), batch, num_constrs) _, out_c, out_h, out_w, batch, num_constrs = general_beta.shape general_beta = general_beta.view(2, -1, batch, num_constrs) else: # dense layers general_beta: (2(0 lower, 1 upper), spec, batch, num_constrs) pass general_beta = general_beta[:, unstable_idx] else: # unstable_idx is None if general_beta.ndim == 6: # flatten spec layer shape _, out_c, out_h, out_w, batch, num_constrs = general_beta.shape general_beta = general_beta.view(2, -1, batch, num_constrs) return general_beta def general_beta_coeffs_mm(self, unstable_spec_beta, coeffs, A, current_layer_shape): if type(A) is Patches: # lA, uA are patches, we have to unfold beta and coeffs to match lA and uA # coeffs: (num_constrs, current_c, current_h, current_w) # coeffs_unfolded: (num_constrs, out_h, out_w, in_c, H, W) # current_layer_shape = x.lower.size()[1:] coeffs_unfolded = inplace_unfold(coeffs.view(-1, *current_layer_shape), \ kernel_size=A.patches.shape[-2:], padding=A.padding, stride=A.stride) # unstable_coeffs_unfolded: (num_constrs, unstable, in_c, H, W) # A.unstable_idx is the unstable idx for spec layer unstable_coeffs_unfolded = coeffs_unfolded[:, A.unstable_idx[1], A.unstable_idx[2], :, :, :] # A.unstable_idx: unstable index on out_c, out_h and out_w # general_beta: (2(0 lower, 1 upper), spec (out_c, out_h, out_w), batch, num_constrs) # unstable_spec_beta: (2(0 lower, 1 upper), unstable, batch, num_constrs) # unstable_spec_beta = general_beta[:, A.unstable_idx[0],\ # A.unstable_idx[1], A.unstable_idx[2], :, :] # beta_mm_coeffs_unfolded: (2(0 lower, 1 upper), unstable, batch, in_c, H, W) beta_mm_coeffs = torch.einsum('sihj,jiabc->sihabc', unstable_spec_beta, unstable_coeffs_unfolded) else: # unstable_spec_beta: (2(0 lower, 1 upper), unstable, batch, num_constrs) # coeffs: (num_constrs, current flattened layer nodes) # beta_mm_coeffs: (2(0 lower, 1 upper), unstable, batch, current flattened layer nodes) beta_mm_coeffs = torch.einsum('sihj,ja->siha', unstable_spec_beta, coeffs) assert beta_mm_coeffs[0].numel() == A.numel(), f"the shape of beta is not initialized correctly! {beta_mm_coeffs[0].shape} v.s. {A.shape}" return beta_mm_coeffs.reshape(2, *A.shape) def general_beta_coeffs_addmm_to_A(self, lA, uA, general_beta, coeffs, current_layer_shape): A = lA if lA is not None else uA # general_beta: (2(0 lower, 1 upper), spec (out_c, out_h, out_w), batch, num_constrs) # coeffs: (num_constrs, current_c, current_h, current_w) # beta_mm_coeffs[0] shape is the same as A # patches mode: (2(0 lower, 1 upper), unstable, batch, in_c, H, W) # not patches: (2(0 lower, 1 upper), unstable, batch, current flattened layer nodes) beta_mm_coeffs = self.general_beta_coeffs_mm(general_beta, coeffs, A, current_layer_shape) assert beta_mm_coeffs[0].shape == A.shape if type(A) is Patches: # lA, uA are patches, we have to unfold beta and coeffs to match lA and uA # lA_patches: (unstable, batch, in_c, H, W) if lA is not None: lA = Patches(lA.patches - beta_mm_coeffs[0], A.stride, A.padding, \ A.patches.shape, unstable_idx=A.unstable_idx, output_shape=A.output_shape) if uA is not None: uA = Patches(uA.patches + beta_mm_coeffs[1], A.stride, A.padding, \ A.patches.shape, unstable_idx=A.unstable_idx, output_shape=A.output_shape) else: # dense layers if lA is not None: lA = lA - beta_mm_coeffs[0] if uA is not None: uA = uA + beta_mm_coeffs[1] return lA, uA def patch_trick(self, start_node, layer_name, A, current_layer_shape): ######## A problem with patches mode for cut constraint start ########## # There are cases that the node that is in the constraint but not selected by the patches for the output node # trick: only count the small patches that have all the split node coeffs[ci].sum() equal to coeffs_unfolded[ci][out_h, out_w, -1].sum() # we should force these beta to be 0 to disable the effect of these constraints # this only apply if current layer uses patches mode; if the target layer is patches but current layer not, we should not use it! assert type(A) is Patches, "this trick fix only works for patches mode" # unstable_spec_beta stores the current propagation, self.general_beta[start_node.name] selected with active_cuts, spec unstable coeffs = 0 if layer_name != "input": if self.relu_coeffs[layer_name] is not None: coeffs = coeffs + self.relu_coeffs[layer_name] if self.arelu_coeffs[layer_name] is not None: coeffs = coeffs + self.arelu_coeffs[layer_name] if self.pre_coeffs[layer_name] is not None: coeffs = coeffs + self.pre_coeffs[layer_name] else: if self.x_coeffs is not None: coeffs = coeffs + self.x_coeffs coeffs_unfolded = inplace_unfold(coeffs.view(-1, *current_layer_shape), \ kernel_size=A.patches.shape[-2:], padding=A.padding, stride=A.stride) num_constrs, out_h, out_w, in_c, H, W = coeffs_unfolded.shape # make sure the small patch selected include all the nonzero coeffs ####### NOTE: This check could be costly ####### patch_mask_on_beta = (coeffs_unfolded.reshape(num_constrs, out_h, out_w, -1).abs().sum(-1) == \ coeffs.reshape(num_constrs, -1).abs().sum(-1).reshape(num_constrs, 1, 1)) # patch_mask_on_beta: (out_h, out_w, num_constrs) patch_mask_on_beta = patch_mask_on_beta.permute(1, 2, 0) # 2(lower, upper), out_c, out_h, out_w, batch, num_constrs patch_mask_on_beta = patch_mask_on_beta.reshape(1, 1, out_h, out_w, 1, num_constrs) self.general_beta[start_node.name].data = self.general_beta[start_node.name].data * patch_mask_on_beta def relu_cut(self, start_node, layer_name, last_lA, last_uA, current_layer_shape, unstable_idx=None, batch_mask=None): # propagate relu neuron in cut constraints through relu layer # start_node.name in self.general_beta means there are intermediate betas that can optimize this start node separately relu_coeffs = self.relu_coeffs[layer_name] active_cuts = self.active_cuts[start_node.name] # active_cuts.size(0) == 0 means all constraints containing this layer have deep layer nodes if relu_coeffs is None or active_cuts.size(0) == 0: # do nothing return last_lA, last_uA assert start_node.name in self.general_beta # select current relu layer general beta general_beta = self.select_active_general_beta(start_node, unstable_idx) relu_coeffs = relu_coeffs[active_cuts] if batch_mask is not None: general_beta = general_beta[:, :, batch_mask] last_lA, last_uA = self.general_beta_coeffs_addmm_to_A(last_lA, last_uA, general_beta, relu_coeffs, current_layer_shape) return last_lA, last_uA def pre_cut(self, start_node, layer_name, lA, uA, current_layer_shape, unstable_idx=None, batch_mask=None): # propagate prerelu neuron in cut constraints through relu layer # start_node.name in self.general_beta means there are intermediate betas that can optimize this start node separately pre_coeffs = self.pre_coeffs[layer_name] active_cuts = self.active_cuts[start_node.name] # active_cuts.size(0) == 0 means all constraints containing this layer have deep layer nodes if pre_coeffs is None or active_cuts.size(0) == 0: # do nothing return lA, uA general_beta = self.select_active_general_beta(start_node, unstable_idx) pre_coeffs = pre_coeffs[active_cuts] if batch_mask is not None: general_beta = general_beta[:, :, batch_mask] lA, uA = self.general_beta_coeffs_addmm_to_A(lA, uA, general_beta, pre_coeffs, current_layer_shape) return lA, uA @staticmethod @torch.jit.script def jit_arelu_lA(last_lA, lower, upper, beta_mm_coeffs, unstable_or_cut_index, upper_d, I_z1, I_z0): nu_hat_pos = last_lA.clamp(max=0.).abs() # gamma = (-lower.unsqueeze(0) * nu_hat_pos - beta_mm_coeffs[0]) / (upper.unsqueeze(0) - lower.unsqueeze(0) + 1e-10) pi = (upper.unsqueeze(0) * nu_hat_pos + beta_mm_coeffs[0]) / (upper.unsqueeze(0) - lower.unsqueeze(0) + 1e-10) pi = torch.min(pi, nu_hat_pos)#, torch.min(gamma, nu_hat_pos) pi = pi.clamp(min=0.)#, gamma.clamp(min=0.) pi = nu_hat_pos * I_z1 + pi * (~I_z1 * ~I_z0) new_upper_d = pi / (nu_hat_pos + 1e-10) # need to customize the upper bound slope and lbias for (1) unstable relus and # (2) relus that are used with upper boundary relaxation # original upper bound slope is u/(u-l) also equal to pi/(pi+gamma) if no beta_mm_coeffs[0] # now the upper bound slope should be pi/(pi+gamma) updated with beta_mm_coeffs[0] unstable_upper_bound_index = unstable_or_cut_index.unsqueeze(0).logical_and(last_lA < 0) # conv layer: # upper_d: 1, batch, current_c, current_w, current_h # unstable_upper_bound_index, new_upper_d: spec unstable, batch, current_c, current_w, current_h # dense layer: # upper_d: 1, batch, current flattened nodes # unstable_upper_bound_index, new_upper_d: spec unstable, batch, current flattened nodes # we may need a new mask to filter out the unstable nodes that are not in the current layer new_upper_d = (new_upper_d * unstable_upper_bound_index.to(lower.dtype) + upper_d * (1. - unstable_upper_bound_index.to(lower.dtype))) return nu_hat_pos, pi, new_upper_d, unstable_upper_bound_index @staticmethod @torch.jit.script def jit_arelu_lbias(unstable_or_cut_index, nu_hat_pos, beta_mm_coeffs, lower, upper, lbias, pi, I_z1, I_z0): # if no unstable, following bias should always be 0 if unstable_or_cut_index.sum() > 0: # update lbias with new form, only contribued by unstable relus uC = -upper.unsqueeze(0) * nu_hat_pos lC = -lower.unsqueeze(0) * nu_hat_pos # lbias: (spec unstable, batch, current flattened nodes) same as lA lbias = (pi * lower.unsqueeze(0)) # previous implementation # uC_mask = (beta_mm_coeffs[0] <= uC).to(lbias) # lC_mask = (beta_mm_coeffs[0] >= lC).to(lbias) # complete implementation uC_mask = ((beta_mm_coeffs[0] <= uC) | I_z0).to(lbias) lC_mask = ((beta_mm_coeffs[0] >= lC) | I_z1).to(lbias) default_mask = ((1-uC_mask) * (1-lC_mask)).to(lbias) lbias = - beta_mm_coeffs[0].to(lbias) * lC_mask + lbias * default_mask # lbias[beta_mm_coeffs[0] <= uC] = 0. # lbias[beta_mm_coeffs[0] >= lC] = -beta_mm_coeffs[0][beta_mm_coeffs[0] >= lC].to(lbias) # final lbias: (spec unstable, batch) lbias = (lbias * unstable_or_cut_index.unsqueeze(0).to(lower.dtype)).view(lbias.shape[0], lbias.shape[1], -1).sum(-1) return lbias @staticmethod @torch.jit.script def jit_arelu_uA(last_uA, lower, upper, beta_mm_coeffs, unstable_or_cut_index, upper_d, I_z1, I_z0): nu_hat_pos = (-last_uA).clamp(max=0.).abs() # gamma = (- lower.unsqueeze(0) * nu_hat_pos - beta_mm_coeffs[1]) / (upper.unsqueeze(0) - lower.unsqueeze(0) + 1e-10) pi = (upper.unsqueeze(0) * nu_hat_pos + beta_mm_coeffs[1]) / (upper.unsqueeze(0) - lower.unsqueeze(0) + 1e-10) pi = pi.clamp(min=0.) pi = torch.min(pi, nu_hat_pos) pi = pi * I_z1 + nu_hat_pos * (~I_z1 * ~I_z0) new_upper_d = pi / (nu_hat_pos + 1e-10) # assert ((gamma + pi - nu_hat_pos).abs()*unstable_or_cut_index).max() <= 1e-5, "pi+gamma should always be the same as nu_hat_pos" # unstable_or_cut_index = self.I.logical_or(self.arelu_coeffs.abs().sum(0).view(self.I.shape) != 0) unstable_upper_bound_index = unstable_or_cut_index.unsqueeze(0).logical_and(-last_uA < 0) new_upper_d = new_upper_d * unstable_upper_bound_index.to(lower.dtype) + \ upper_d * (1. - unstable_upper_bound_index.to(lower.dtype)) return nu_hat_pos, pi, new_upper_d, unstable_upper_bound_index @staticmethod @torch.jit.script def jit_arelu_ubias(unstable_or_cut_index, nu_hat_pos, beta_mm_coeffs, lower, upper, ubias, pi, I_z1, I_z0): if unstable_or_cut_index.sum() > 0: uC = -upper.unsqueeze(0) * nu_hat_pos lC = -lower.unsqueeze(0) * nu_hat_pos ubias = -(pi * lower.unsqueeze(0)) # uC_mask = (beta_mm_coeffs[1] <= uC).to(ubias) # lC_mask = (beta_mm_coeffs[1] >= lC).to(ubias) uC_mask = ((beta_mm_coeffs[1] <= uC) | I_z0).to(ubias) lC_mask = ((beta_mm_coeffs[1] >= lC) | I_z1).to(ubias) default_mask = ((1-uC_mask) * (1-lC_mask)).to(ubias) ubias = beta_mm_coeffs[1].to(ubias) * lC_mask + ubias * default_mask # ubias[beta_mm_coeffs[1] <= uC] = 0. # ubias[beta_mm_coeffs[1] >= lC] = beta_mm_coeffs[1][beta_mm_coeffs[1] >= lC].to(ubias) ubias = (ubias * unstable_or_cut_index.unsqueeze(0).to(lower.dtype)).view(ubias.shape[0], ubias.shape[1], -1).sum(-1) return ubias def arelu_cut(self, start_node, layer_name, last_lA, last_uA, lower_d, upper_d, lower_b, upper_b, lb_lower_d, ub_lower_d, relu_indicators, x, patch_size, current_layer_shape, unstable_idx=None, batch_mask=None): """ We want to calculate the pi and gamma for the lower bound of the next layer. To make the GCP CROWN complete, we have to consider the case when z is a constant. Now discuss the case when z = 0, z = 1 (constant), and 0 < z < 1 (variable). lbias is h(beta) in the paper. upper_d is the upper bound slope of the current layer. 1. z = 0 -> pi = 0, gamma = nu_hat_pos, tao = 0, mu = (alpha) * nu_hat_neg lbias = 0. upper_d = pi / (pi + gamma) = 0. 2. z = 1 -> pi = nu_hat_pos, gamma = 0, tao = alpha * nu_hat_neg, mu = 0 lbias = - beta_mm_coeffs[0]. upper_d = pi / (pi + gamma) = 1. 3. 0 < z < 1. We do the regular calculation using the closed form solution. lbias = pi * lower, if -upper * nu_hat_pos <= beta_mm_coeffs[0] <= -lower * nu_hat_pos lbias = 0, if beta_mm_coeffs[0] <= -upper * nu_hat_pos lbias = -beta_mm_coeffs[0], if beta_mm_coeffs[0] >= -lower * nu_hat_pos upper_d = pi / (nu_hat_pos). where pi = (upper * nu_hat_pos + beta_mm_coeffs[0]) / (upper - lower), pi = min(pi, nu_hat_pos), pi = max(pi, 0), gamma = (-lower * nu_hat_pos - beta_mm_coeffs[0]) / (upper - lower). gamma = min(gamma, nu_hat_pos), gamma = max(gamma, 0). Thus, we have the following implementation. if z = 0: pi = 0. if z = 1: pi = nu_hat_pos. Otherwise: if -upper * nu_hat_pos <= beta_mm_coeffs[0] <= -lower * nu_hat_pos: pi = (upper * nu_hat_pos + beta_mm_coeffs[0]) / (upper - lower), pi = min(pi, nu_hat_pos), pi = max(pi, 0), lbias = pi * lower, upper_d = pi / (nu_hat_pos). if beta_mm_coeffs[0] <= -upper * nu_hat_pos: lbias = 0. if beta_mm_coeffs[0] >= -lower * nu_hat_pos: lbias = -beta_mm_coeffs[0]. """ # propagate integer var of relu neuron (arelu) in cut constraints through relu layer # I[0]. unstable neuron mask. # I[1]. previous unstable now split on z = 1. # I[2]. previous unstable now split on z = 0. unstable_neurons_mask, z_split_to_1_mask, z_split_to_0_mask = relu_indicators arelu_coeffs = self.arelu_coeffs[layer_name] active_cuts = self.active_cuts[start_node.name] # active_cuts.size(0) == 0 means all constraints containing this layer have deep layer nodes if arelu_coeffs is None or active_cuts.size(0) == 0: # do regular propagation without cut uA, ubias = _bound_oneside(last_uA, upper_d, ub_lower_d if lower_d is None else lower_d, upper_b, lower_b, start_node, patch_size) lA, lbias = _bound_oneside(last_lA, lb_lower_d if lower_d is None else lower_d, upper_d, lower_b, upper_b, start_node, patch_size) return lA, uA, lbias, ubias # general_beta: (2(0 lower, 1 upper), spec (out_c, out_h, out_w), batch, num_constrs) general_beta = self.select_active_general_beta(start_node, unstable_idx) # arelu_coeffs: (num_constrs, flattened current layer nodes) arelu_coeffs = arelu_coeffs[active_cuts] if batch_mask is not None: general_beta = general_beta[:, :, batch_mask] A = last_lA if last_lA is not None else last_uA # beta_mm_coeffs[0] shape is the same as A # patches mode: (2(0 lower, 1 upper), unstable, batch, in_c, H, W) # not patches: (2(0 lower, 1 upper), unstable, batch, current flattened layer nodes) beta_mm_coeffs = self.general_beta_coeffs_mm(general_beta, arelu_coeffs, A, current_layer_shape) # unstable_this_layer = torch.logical_and(x.lower < 0, x.upper > 0).unsqueeze(0) # relu_indicator is the unstable index in this relu layer: (batch, *layer shape) # if there is node in cut constraint that is stable, also need to count its effect # self.arelu_coeffs: (num_constrs, flattened current layer) # self.arelu_coeffs do not have a batch dimension - only one cut can be applied to all batch elements. # We will handle the neurons which are unstable or those have cut constraints below, thus creating the mask. unstable_or_cut_index = unstable_neurons_mask.logical_or(arelu_coeffs.abs().sum(0).view(unstable_neurons_mask[0:1].shape) != 0) # Shape of unstable_or_cut_index is (batch, num_neurons). It is a binary mask. if type(A) is Patches: # patches mode, conv layer only # x.lower (always regular shape): batch, current_c, current_h, current_w # x_lower_unfold: unstable, batch, in_C, H, W (same as patches last_lA) x_lower_unfold = _maybe_unfold(x.lower.unsqueeze(0), A) x_upper_unfold = _maybe_unfold(x.upper.unsqueeze(0), A) # first minus upper and lower and then unfold to patch size will save memory x_upper_minus_lower_unfold = _maybe_unfold((x.upper - x.lower).unsqueeze(0), A) ####### be careful with the unstable_this_layer and unstable_idx ####### # unstable_this_layer is the unstable index in this layer # unstable_idx is the unstable index in spec layer # unstable_this_layer: spec unstable, batch, in_C, H, W (same as patches last_lA) # unstable_this_layer = torch.logical_and(x_lower_unfold < 0, x_upper_unfold > 0) # unstable_this_layer = _maybe_unfold(self.I.unsqueeze(0), A) unstable_or_cut_index = _maybe_unfold(unstable_or_cut_index.unsqueeze(0), A) if last_lA is not None: assert beta_mm_coeffs[0].shape == last_lA.shape, f"{beta_mm_coeffs[0].shape} != {last_lA.shape}" # last_lA.patches, nu_hat_pos, gamma, pi: (unstable, batch, in_c, H, W) nu_hat_pos = last_lA.patches.clamp(max=0.).abs() # gamma = (-x_lower_unfold * nu_hat_pos - beta_mm_coeffs[0]) / (x_upper_minus_lower_unfold.clamp(min=1e-10)) pi = (x_upper_unfold * nu_hat_pos + beta_mm_coeffs[0]) / (x_upper_minus_lower_unfold.clamp(min=1e-10)) pi = torch.min(pi, nu_hat_pos).clamp(min=0.) pi = nu_hat_pos * z_split_to_1_mask + pi * (~z_split_to_1_mask * ~z_split_to_0_mask) new_upper_d = pi / (nu_hat_pos + 1e-10) # assert ((gamma + pi - nu_hat_pos).abs()*unstable_or_cut_index).max() <= 1e-5, "pi+gamma should always be the same as nu_hat_pos" # unstable_upper_bound_index: spec unstable, batch, in_C, H, W (same as patches last_lA) unstable_upper_bound_index = unstable_or_cut_index.logical_and(last_lA.patches < 0) # upper_d: (spec unstable, 1, in_C, H, W) (unfolded shape, same as patches last_lA) new_upper_d = new_upper_d * unstable_upper_bound_index.to(x_lower_unfold.dtype) + \ upper_d * (1. - unstable_upper_bound_index.to(x_lower_unfold.dtype)) if last_uA is None: uA, ubias = None, 0. # lbias: unstable, batch # lA: unstable, batch, in_C, H, W (same as patches last_lA) lA, lbias = _bound_oneside(last_lA, lb_lower_d if lower_d is None else lower_d, new_upper_d, lower_b, upper_b, start_node, patch_size) # if general_beta[0].sum()!=0: import pdb; pdb.set_trace() # there is any unstable relus in this layer if unstable_or_cut_index.sum() > 0: uC = -x_upper_unfold * nu_hat_pos lC = -x_lower_unfold * nu_hat_pos lbias = (pi * x_lower_unfold) # lbias[beta_mm_coeffs[0] <= uC] = 0. # lbias[beta_mm_coeffs[0] >= lC] = -beta_mm_coeffs[0][beta_mm_coeffs[0] >= lC].to(lbias) lbias[(beta_mm_coeffs[0] <= uC)| z_split_to_0_mask] = 0. lbias[(beta_mm_coeffs[0] >= lC)| z_split_to_1_mask] = -beta_mm_coeffs[0][(beta_mm_coeffs[0] >= lC)| z_split_to_1_mask].to(lbias) # lbias: unstable, batch, in_C, H, W (same as patches last_lA) => lbias: (unstable, batch) lbias = (lbias * unstable_or_cut_index.to(x_lower_unfold.dtype)).view(lbias.shape[0], lbias.shape[1], -1).sum(-1) if last_uA is not None: # get the upper bound nu_hat_pos = (-last_uA.patches).clamp(max=0.).abs() # gamma = (-x_lower_unfold * nu_hat_pos - beta_mm_coeffs[1]) / (x_upper_minus_lower_unfold + 1e-10) pi = (x_upper_unfold * nu_hat_pos + beta_mm_coeffs[1]) / (x_upper_minus_lower_unfold + 1e-10) pi = torch.min(pi, nu_hat_pos).clamp(min=0.) pi = nu_hat_pos * z_split_to_1_mask + pi * (~z_split_to_1_mask * ~z_split_to_0_mask) new_upper_d = pi / (nu_hat_pos + 1e-10) # assert ((gamma + pi - nu_hat_pos).abs()*unstable_or_cut_index).max() <= 1e-5, "pi+gamma should always be the same as nu_hat_pos" unstable_upper_bound_index = unstable_or_cut_index.logical_and((-last_uA.patches) < 0) new_upper_d = new_upper_d * unstable_upper_bound_index.to(x_lower_unfold.dtype) + \ upper_d * (1. - unstable_upper_bound_index.to(x_lower_unfold.dtype)) uA, ubias = _bound_oneside(last_uA, new_upper_d, ub_lower_d if lower_d is None else lower_d, upper_b, lower_b, start_node, patch_size) if last_lA is None: lA, lbias = None, 0. if unstable_or_cut_index.sum() > 0: uC = -x_upper_unfold * nu_hat_pos lC = -x_lower_unfold * nu_hat_pos ubias = -(pi * x_lower_unfold) # ubias[beta_mm_coeffs[1] <= uC] = 0. # ubias[beta_mm_coeffs[1] >= lC] = beta_mm_coeffs[1][beta_mm_coeffs[1] >= lC].to(ubias) ubias[(beta_mm_coeffs[1] <= uC) | z_split_to_0_mask] = 0. ubias[(beta_mm_coeffs[1] >= lC) | z_split_to_1_mask] = beta_mm_coeffs[1][(beta_mm_coeffs[1] >= lC) | z_split_to_1_mask].to(ubias) # ubias: unstable, batch, in_C, H, W (same as patches last_uA) => ubias: (unstable, batch) ubias = (ubias * unstable_or_cut_index.to(x_lower_unfold.dtype)).view(ubias.shape[0], ubias.shape[1], -1).sum(-1) else: # dense if last_lA is not None: # ##################### # # C is nu_hat_pos # # last_lA: (spec unstable, batch, current flattened nodes (current_c*current_h*current_w)) # nu_hat_pos = last_lA.clamp(max=0.).abs() # # pi, gamma: spec_unstable, batch, current layer shape (same as last_lA) # # need to customize the upper bound slope and lbias for (1) unstable relus and # # (2) relus that are used with upper boundary relaxation # # original upper bound slope is u/(u-l) also equal to pi/(pi+gamma) if no beta_mm_coeffs[0] # # now the upper bound slope should be pi/(p+gamma) updated with beta_mm_coeffs[0] # # conv layer: # # upper_d: 1, batch, current_c, current_w, current_h # # unstable_upper_bound_index, new_upper_d: spec unstable, batch, current_c, current_w, current_h # # dense layer: # # upper_d: 1, batch, current flattened nodes # # unstable_upper_bound_index, new_upper_d: spec unstable, batch, current flattened nodes nu_hat_pos, pi, new_upper_d, unstable_upper_bound_index = self.jit_arelu_lA(last_lA, x.lower, x.upper, beta_mm_coeffs, unstable_or_cut_index, upper_d, z_split_to_1_mask, z_split_to_0_mask) if last_uA is None: uA, ubias = None, 0. lA, lbias = _bound_oneside(last_lA, lb_lower_d if lower_d is None else lower_d, new_upper_d, lower_b, upper_b, start_node, patch_size) lbias = self.jit_arelu_lbias(unstable_or_cut_index, nu_hat_pos, beta_mm_coeffs, x.lower, x.upper, lbias, pi, z_split_to_1_mask, z_split_to_0_mask) if last_uA is not None: # # C is nu_hat_pos nu_hat_pos, pi, new_upper_d, unstable_upper_bound_index = self.jit_arelu_uA(last_uA, x.lower, x.upper, beta_mm_coeffs, unstable_or_cut_index, upper_d, z_split_to_1_mask, z_split_to_0_mask) # one can test uA by optimize -obj which should have the same obj value uA, ubias = _bound_oneside(last_uA, new_upper_d, ub_lower_d if lower_d is None else lower_d, upper_b, lower_b, start_node, patch_size) if last_lA is None: lA, lbias = None, 0. ubias = self.jit_arelu_ubias(unstable_or_cut_index, nu_hat_pos, beta_mm_coeffs, x.lower, x.upper, ubias, pi, z_split_to_1_mask, z_split_to_0_mask) return lA, uA, lbias, ubias def input_cut(self, start_node, lA, uA, current_layer_shape, unstable_idx=None, batch_mask=None): # propagate input neuron in cut constraints through relu layer active_cuts = self.active_cuts[start_node.name] if self.x_coeffs is None or active_cuts.size(0) == 0: return lA, uA if type(lA) is Patches: A = lA if lA is not None else uA self.patch_trick(start_node, "input", A, current_layer_shape) general_beta = self.select_active_general_beta(start_node, unstable_idx) x_coeffs = self.x_coeffs[active_cuts] if batch_mask is not None: general_beta = general_beta[:, :, batch_mask] # general_beta: (2(0 lower, 1 upper), spec, batch, num_constrs) # x_coeffs: (num_constrs, flattened input dims) # beta_bias: (2(0 lower, 1 upper), batch, spec) lA, uA = self.general_beta_coeffs_addmm_to_A(lA, uA, general_beta, x_coeffs, current_layer_shape) return lA, uA def bias_cut(self, start_node, lb, ub, unstable_idx=None, batch_mask=None): active_cuts = self.active_cuts[start_node.name] if self.cut_bias is None or active_cuts.size(0) == 0: return lb, ub bias_coeffs = self.cut_bias[active_cuts] general_beta = self.select_active_general_beta(start_node, unstable_idx) if batch_mask is not None: general_beta = general_beta[:, :, batch_mask] # add bias for the bias term of general cut # general_beta: (2(0 lower, 1 upper), spec, batch, num_constrs) # bias_coeffs: (num_constrs,) # beta_bias: (2(0 lower, 1 upper), batch, spec) beta_bias = torch.einsum('sihj,j->shi', general_beta.to(lb.dtype), bias_coeffs.to(lb.dtype)) lb = lb + beta_bias[0] if lb is not None else None ub = ub - beta_bias[1] if ub is not None else None return lb, ub # Choose upper or lower bounds based on the sign of last_A # this is a copy from activation.py def _bound_oneside(last_A, d_pos, d_neg, b_pos, b_neg, start_node, patch_size): if last_A is None: return None, 0 if type(last_A) == Tensor: A, bias = multiply_by_A_signs(last_A, d_pos, d_neg, b_pos, b_neg, contiguous=True) return A, bias elif type(last_A) == Patches: # if last_A is not an identity matrix assert last_A.identity == 0 if last_A.identity == 0: # last_A shape: [out_c, batch_size, out_h, out_w, in_c, H, W]. Here out_c is the spec dimension. # or (unstable_size, batch_size, in_c, H, W) when it is sparse. patches = last_A.patches patches_shape = patches.shape if len(patches_shape) == 6: patches = patches.view(*patches_shape[:2], -1, *patches_shape[-2:]) if d_pos is not None: d_pos = d_pos.view(*patches_shape[:2], -1, *patches_shape[-2:]) if d_neg is not None: d_neg = d_neg.view(*patches_shape[:2], -1, *patches_shape[-2:]) if b_pos is not None: b_pos = b_pos.view(*patches_shape[:2], -1, *patches_shape[-2:]) if b_neg is not None: b_neg = b_neg.view(*patches_shape[:2], -1, *patches_shape[-2:]) A_prod, bias = multiply_by_A_signs(patches, d_pos, d_neg, b_pos, b_neg) # prod has shape [out_c, batch_size, out_h, out_w, in_c, H, W] or (unstable_size, batch_size, in_c, H, W) when it is sparse. # For sparse patches the return bias size is (unstable_size, batch). # For regular patches the return bias size is (spec, batch, out_h, out_w). if len(patches_shape) == 6: A_prod = A_prod.view(*patches_shape) # Save the patch size, which will be used in init_slope() to determine the number of optimizable parameters. if start_node is not None: if last_A.unstable_idx is not None: # Sparse patches, we need to construct the full patch size: (out_c, batch, out_h, out_w, c, h, w). patch_size[start_node.name] = [last_A.output_shape[1], A_prod.size(1), last_A.output_shape[2], last_A.output_shape[3], A_prod.size(-3), A_prod.size(-2), A_prod.size(-1)] else: # Regular patches. patch_size[start_node.name] = A_prod.size() return Patches(A_prod, last_A.stride, last_A.padding, A_prod.shape, unstable_idx=last_A.unstable_idx, output_shape=last_A.output_shape), bias # In patches mode, we need to unfold lower and upper slopes. In matrix mode we simply return. # this is a copy from activation.py def _maybe_unfold(d_tensor, last_A): # d_tensor (out_c, current_c, current_h, current_w): out_c shared the same alpha for spec layer if d_tensor is None: return None # if mode == "matrix" or d_tensor is None or last_A is None: if type(last_A) is not Patches or d_tensor is None or last_A is None: return d_tensor # Input are slopes with shape (spec, batch, input_c, input_h, input_w) # Here spec is the same as out_c. # assert d_tensor.ndim == 5 origin_d_shape = d_tensor.shape if d_tensor.ndim == 6: d_tensor = d_tensor.view(*origin_d_shape[:2], -1, *origin_d_shape[-2:]) d_shape = d_tensor.size() # Reshape to 4-D tensor to unfold. d_tensor = d_tensor.view(-1, *d_tensor.shape[-3:]) # unfold the slope matrix as patches. Patch shape is [spec * batch, out_h, out_w, in_c, H, W). d_unfolded = inplace_unfold(d_tensor, kernel_size=last_A.patches.shape[-2:], stride=last_A.stride, padding=last_A.padding) # Reshape to (spec, batch, out_h, out_w, in_c, H, W); here spec_size is out_c. d_unfolded_r = d_unfolded.view(*d_shape[:-3], *d_unfolded.shape[1:]) if last_A.unstable_idx is not None: if d_unfolded_r.size(0) == 1: if len(last_A.unstable_idx) == 3: # Broadcast the spec shape, so only need to select the reset dimensions. # Change shape to (out_h, out_w, batch, in_c, H, W) or (out_h, out_w, in_c, H, W). d_unfolded_r = d_unfolded_r.squeeze(0).permute(1, 2, 0, 3, 4, 5) d_unfolded_r = d_unfolded_r[last_A.unstable_idx[1], last_A.unstable_idx[2]] elif len(last_A.unstable_idx) == 4: # [spec, batch, output_h, output_w, input_c, H, W] # to [output_h, output_w, batch, in_c, H, W] d_unfolded_r = d_unfolded_r.squeeze(0).permute(1, 2, 0, 3, 4, 5) d_unfolded_r = d_unfolded_r[last_A.unstable_idx[2], last_A.unstable_idx[3]] else: raise NotImplementedError() # output shape: (unstable_size, batch, in_c, H, W). else: d_unfolded_r = d_unfolded_r[last_A.unstable_idx[0], :, last_A.unstable_idx[1], last_A.unstable_idx[2]] # For sparse patches, the shape after unfold is (unstable_size, batch_size, in_c, H, W). # For regular patches, the shape after unfold is (spec, batch, out_h, out_w, in_c, H, W). if d_unfolded_r.ndim != last_A.patches.ndim: d_unfolded_r = d_unfolded_r.unsqueeze(2).unsqueeze(-4) return d_unfolded_r ================================================ FILE: auto_LiRPA/operators/dropout.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .base import * class BoundDropout(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) if 'ratio' in attr: self.ratio = attr['ratio'] self.dynamic = False else: self.ratio = None self.dynamic = True self.clear() def clear(self): self.mask = None def forward(self, *inputs): x = inputs[0] if not self.training: return x if self.dynamic: # Inputs: data, ratio (optional), training_mode (optional) # We assume ratio must exist in the inputs. # We ignore training_mode, but will use self.training which can be # changed after BoundedModule is built. assert (inputs[1].dtype == torch.float32 or inputs[1].dtype == torch.float64) self.ratio = inputs[1] if self.ratio >= 1: raise ValueError('Ratio in dropout should be less than 1') self.mask = torch.rand(x.shape, device=self.ratio.device) > self.ratio return x * self.mask / (1 - self.ratio) def _check_forward(self): """ If in the training mode, a forward pass should have been called.""" if self.training and self.mask is None: raise RuntimeError('For a model with dropout in the training mode, '\ 'a clean forward pass must be called before bound computation') def bound_backward(self, last_lA, last_uA, *args, **kwargs): empty_A = [(None, None)] * (len(args) -1) if not self.training: return [(last_lA, last_uA), *empty_A], 0, 0 self._check_forward() def _bound_oneside(last_A): if last_A is None: return None return last_A * self.mask / (1 - self.ratio) lA = _bound_oneside(last_lA) uA = _bound_oneside(last_uA) return [(lA, uA), *empty_A], 0, 0 def bound_forward(self, dim_in, x, *args): if not self.training: return x self._check_forward() lw = x.lw * self.mask.unsqueeze(1) / (1 - self.ratio) lb = x.lb * self.mask / (1 - self.ratio) uw = x.uw * self.mask.unsqueeze(1) / (1 - self.ratio) ub = x.ub * self.mask / (1 - self.ratio) return LinearBound(lw, lb, uw, ub) def interval_propagate(self, *v): if not self.training: return v[0] self._check_forward() h_L, h_U = v[0] lower = h_L * self.mask / (1 - self.ratio) upper = h_U * self.mask / (1 - self.ratio) return lower, upper ================================================ FILE: auto_LiRPA/operators/dtype.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .base import * from ..utils import Patches class BoundCast(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.to = attr['to'] # See values of enum DataType in TensorProto. # Unsupported: str, uint16, uint32, uint64. self.data_types = [ None, torch.float, torch.uint8, torch.int8, None, torch.int16, torch.int32, torch.int64, None, torch.bool, torch.float16, torch.float64, None, None, torch.complex64, torch.complex128 ] self.type = self.data_types[self.to] assert self.type is not None, "Unsupported type conversion." self.use_default_ibp = True def forward(self, x): self.type_in = x.dtype return x.to(self.type) def bound_backward(self, last_lA, last_uA, x, **kwargs): if type(last_lA) == Tensor or type(last_uA) == Tensor: lA = last_lA.to(self.type_in) if last_lA is not None else None uA = last_uA.to(self.type_in) if last_uA is not None else None else: if last_lA is not None: lA = Patches(last_lA.patches.to(self.type_in), last_lA.stride, last_lA.padding, last_lA.shape, last_lA.identity, last_lA.unstable_idx, last_lA.output_shape) if last_uA is not None: uA = Patches(last_uA.patches.to(self.type_in), last_uA.stride, last_uA.padding, last_uA.shape, last_uA.identity, last_uA.unstable_idx, last_uA.output_shape) return [(lA, uA)], 0, 0 def bound_forward(self, dim_in, x): return LinearBound( x.lw.to(self.type), x.lb.to(self.type), x.uw.to(self.type), x.ub.to(self.type)) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(v[0]) ================================================ FILE: auto_LiRPA/operators/gelu.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import math import torch import torch.nn as nn import torch.nn.functional as F from .s_shaped import BoundTanh from .base import logger # FIXME resolve duplicate code with BoundTanh class BoundGelu(BoundTanh): sqrt_2 = math.sqrt(2) def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options, precompute=False) self.ibp_intermediate = False self.act_func = F.gelu def d_act_func(x): return (0.5 * (1 + torch.erf(x / self.sqrt_2)) + x * torch.exp(-0.5 * x ** 2) / math.sqrt(2 * torch.pi)) self.d_act_func = d_act_func def d2_act_func(x): return (2 * torch.exp(-0.5 * x ** 2) / math.sqrt(2 * torch.pi) - x ** 2 * torch.exp(-0.5 * x ** 2) / math.sqrt(2 * torch.pi)) self.d2_act_func = d2_act_func self.precompute_relaxation(self.act_func, self.d_act_func) def _init_masks(self, x): lower = x.lower upper = x.upper self.mask_left_pos = torch.logical_and(lower >= -self.sqrt_2, upper <= 0) self.mask_left_neg = upper <= -self.sqrt_2 self.mask_left = torch.logical_xor(upper <= 0, torch.logical_or(self.mask_left_pos, self.mask_left_neg)) self.mask_right_pos = lower >= self.sqrt_2 self.mask_right_neg = torch.logical_and(upper <= self.sqrt_2, lower >= 0) self.mask_right = torch.logical_xor(lower >= 0, torch.logical_or(self.mask_right_pos, self.mask_right_neg)) self.mask_2 = torch.logical_and(torch.logical_and(upper > 0, upper <= self.sqrt_2), torch.logical_and(lower < 0, lower >= -self.sqrt_2)) self.mask_left_3 = torch.logical_and(lower < -self.sqrt_2, torch.logical_and( upper > 0, upper <= self.sqrt_2)) self.mask_right_3 = torch.logical_and(upper > self.sqrt_2, torch.logical_and( lower < 0, lower >= -self.sqrt_2)) self.mask_4 = torch.logical_and(lower < -self.sqrt_2, upper > self.sqrt_2) self.mask_both = torch.logical_or(self.mask_2, torch.logical_or(self.mask_4, torch.logical_or(self.mask_left_3, self.mask_right_3))) @torch.no_grad() def precompute_relaxation(self, func, dfunc, x_limit=1000): """ This function precomputes the tangent lines that will be used as lower/upper bounds for S-shapes functions. """ self.x_limit = x_limit self.step_pre = 0.01 self.num_points_pre = int(self.x_limit / self.step_pre) max_iter = 100 logger.debug('Precomputing relaxation for GeLU (pre-activation limit: %f)', x_limit) def check_lower(upper, d): """Given two points upper, d (d <= upper), check if the slope at d will be less than f(upper) at upper.""" k = dfunc(d) # Return True if the slope is a lower bound. return k * (upper - d) + func(d) <= func(upper) def check_upper(lower, d): """Given two points lower, d (d >= lower), check if the slope at d will be greater than f(lower) at lower.""" k = dfunc(d) # Return True if the slope is a upper bound. return k * (lower - d) + func(d) >= func(lower) # Given an upper bound point (>=0), find a line that is guaranteed to # be a lower bound of this function. upper = self.step_pre * torch.arange( 0, self.num_points_pre + 5, device=self.device) + self.sqrt_2 r = torch.ones_like(upper) # Initial guess, the tangent line is at -1. l = -torch.ones_like(upper) while True: # Check if the tangent line at the guessed point is an lower bound at f(upper). checked = check_lower(upper, l).int() # If the initial guess is not smaller enough, then double it (-2, -4, etc). l = checked * l + (1 - checked) * (l * 2) if checked.sum() == l.numel(): break # Now we have starting point at l, its tangent line is guaranteed to # be an lower bound at f(upper). # We want to further tighten this bound by moving it closer to 0. for _ in range(max_iter): # Binary search. m = (l + r) / 2 checked = check_lower(upper, m).int() l = checked * m + (1 - checked) * l r = checked * r + (1 - checked) * m # At upper, a line with slope l is guaranteed to lower bound the function. self.d_lower_right = l.clone() # Do the same again: # Given an lower bound point (<=0), find a line that is guaranteed to # be an upper bound of this function. lower = ( -self.step_pre * torch.arange( 0, self.num_points_pre + 5, device=self.device ) + self.sqrt_2).clamp(min=0.01) l = torch.zeros_like(upper) + self.sqrt_2 r = torch.zeros_like(upper) + x_limit while True: checked = check_upper(lower, r).int() r = checked * r + (1 - checked) * (r * 2) if checked.sum() == l.numel(): break for _ in range(max_iter): m = (l + r) / 2 checked = check_upper(lower, m).int() l = (1 - checked) * m + checked * l r = (1 - checked) * r + checked * m self.d_upper_right = r.clone() upper = -self.step_pre * torch.arange( 0, self.num_points_pre + 5, device=self.device) - self.sqrt_2 r = torch.zeros_like(upper) - 0.7517916 # Initial guess, the tangent line is at -1. l = torch.zeros_like(upper) - self.sqrt_2 while True: checked = check_lower(upper, r).int() r = checked * r + (1 - checked) * (r * 2) if checked.sum() == l.numel(): break # Now we have starting point at l, its tangent line is guaranteed to be # an lower bound at f(upper). # We want to further tighten this bound by moving it closer to 0. for _ in range(max_iter): # Binary search. m = (l + r) / 2 checked = check_lower(upper, m).int() l = (1 - checked) * m + checked * l r = (1 - checked) * r + checked * m # At upper, a line with slope l is guaranteed to lower bound the function. self.d_lower_left = r.clone() # Do the same again: # Given an lower bound point (<=0), find a line that is guaranteed to # be an upper bound of this function. lower = ( self.step_pre * torch.arange( 0, self.num_points_pre + 5, device=self.device ) - self.sqrt_2).clamp(max=0) l = torch.zeros_like(upper) - x_limit r = torch.zeros_like(upper) - self.sqrt_2 while True: checked = check_upper(lower, l).int() l = checked * l + (1 - checked) * (l * 2) if checked.sum() == l.numel(): break for _ in range(max_iter): m = (l + r) / 2 checked = check_upper(lower, m).int() l = (1 - checked) * m + checked * l r = (1 - checked) * r + checked * m self.d_upper_left = r.clone() logger.debug('Done') def opt_init(self): super().opt_init() self.tp_right_lower_init = {} self.tp_right_upper_init = {} self.tp_left_lower_init = {} self.tp_left_upper_init = {} self.tp_both_lower_init = {} def _init_opt_parameters_impl(self, size_spec, name_start): """Implementation of init_opt_parameters for each start_node.""" l, u = self.inputs[0].lower, self.inputs[0].upper shape = [size_spec] + list(l.shape) alpha = torch.empty(14, *shape, device=l.device) alpha.data[:4] = ((l + u) / 2).unsqueeze(0).expand(4, *shape) alpha.data[4:6] = self.tp_right_lower_init[name_start].expand(2, *shape) alpha.data[6:8] = self.tp_right_upper_init[name_start].expand(2, *shape) alpha.data[8:10] = self.tp_left_lower_init[name_start].expand(2, *shape) alpha.data[10:12] = self.tp_left_upper_init[name_start].expand(2, *shape) alpha.data[12:14] = self.tp_both_lower_init[name_start].expand(2, *shape) return alpha def forward(self, x): return F.gelu(x) def bound_relax_impl(self, x, func, dfunc): lower, upper = x.lower, x.upper y_l, y_u = func(lower), func(upper) # k_direct is the slope of the line directly connect # (lower, func(lower)), (upper, func(upper)). k_direct = k = (y_u - y_l) / (upper - lower).clamp(min=1e-8) # Fixed bounds that cannot be optimized. self.mask_neg are the masks # for neurons with upper bound <= 0. # Upper bound for the case of input lower bound <= 0, is always the direct line. self.add_linear_relaxation( mask=torch.logical_or( torch.logical_or(self.mask_left_pos, self.mask_right_neg), self.mask_both ), type='upper', k=k_direct, x0=lower, y0=y_l) # Lower bound for the case of input upper bound >= 0, is always the direct line. self.add_linear_relaxation( mask=torch.logical_or(self.mask_left_neg, self.mask_right_pos), type='lower', k=k_direct, x0=lower, y0=y_l) # Indices of neurons with input upper bound >= sqrt(2), # whose optimal slope to lower bound on the right side was pre-computed. d_lower_right = self.retrieve_from_precompute( self.d_lower_right, upper - self.sqrt_2, lower) # Indices of neurons with input lower bound <= -sqrt(2), # whose optimal slope to lower bound on the left side was pre-computed. d_lower_left = self.retrieve_from_precompute( self.d_lower_left, -lower - self.sqrt_2, upper) # Indices of neurons with input lower bound <= sqrt(2), # whose optimal slope to upper bound on the right side was pre-computed. d_upper_right = self.retrieve_from_precompute( self.d_upper_right, -lower + self.sqrt_2, upper) # Indices of neurons with input lower bound <= sqrt(2), # whose optimal slope to upper bound on the right side was pre-computed. d_upper_left = self.retrieve_from_precompute( self.d_upper_left, -lower - self.sqrt_2, upper) if self.opt_stage in ['opt', 'reuse']: if not hasattr(self, 'alpha'): # Raise an error if alpha is not created. self._no_bound_parameters() ns = self._start # Clipping is done here rather than after `opt.step()` call # because it depends on pre-activation bounds self.alpha[ns].data[0:2] = torch.max( torch.min(self.alpha[ns][0:2], upper), lower) self.alpha[ns].data[2:4] = torch.max( torch.min(self.alpha[ns][2:4], upper), lower) self.alpha[ns].data[4:6] = torch.max( torch.min(self.alpha[ns][4:6], d_lower_right), lower) self.alpha[ns].data[6:8] = torch.max( self.alpha[ns][6:8], d_upper_right) self.alpha[ns].data[8:10] = torch.min( torch.max(self.alpha[ns][8:10], d_lower_left), upper) self.alpha[ns].data[10:12] = torch.min( self.alpha[ns][10:12], d_upper_left) self.alpha[ns].data[12:14] = torch.min( torch.max(self.alpha[ns][12:14], d_lower_left), d_lower_right) # shape [2, out_c, n, c, h, w]. tp_pos = self.alpha[ns][0:2] # For upper bound relaxation tp_neg = self.alpha[ns][2:4] # For lower bound relaxation tp_right_lower = self.alpha[ns][4:6] tp_right_upper = self.alpha[ns][6:8] tp_left_lower = self.alpha[ns][8:10] tp_left_upper = self.alpha[ns][10:12] tp_both_lower = self.alpha[ns][12:14] # No need to use tangent line, when the tangent point is at the left # side of the preactivation lower bound. Simply connect the two sides. mask_direct = torch.logical_and(self.mask_right, k_direct < dfunc(lower)) self.add_linear_relaxation( mask=mask_direct, type='lower', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_or(self.mask_right_3, torch.logical_xor(self.mask_right, mask_direct)), type='lower', k=dfunc(tp_right_lower), x0=tp_right_lower) mask_direct = torch.logical_and(self.mask_left, k_direct > dfunc(upper)) self.add_linear_relaxation( mask=mask_direct, type='lower', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_or(self.mask_left_3, torch.logical_xor(self.mask_left, mask_direct)), type='lower', k=dfunc(tp_left_lower), x0=tp_left_lower) mask_direct = torch.logical_and(self.mask_right, k_direct < dfunc(upper)) self.add_linear_relaxation( mask=mask_direct, type='upper', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_xor(self.mask_right, mask_direct), type='upper', k=dfunc(tp_right_upper), x0=tp_right_upper) mask_direct = torch.logical_and(self.mask_left, k_direct > dfunc(lower)) self.add_linear_relaxation( mask=mask_direct, type='upper', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_xor(self.mask_left, mask_direct), type='upper', k=dfunc(tp_left_upper), x0=tp_left_upper) self.add_linear_relaxation( mask=self.mask_4, type='lower', k=dfunc(tp_both_lower), x0=tp_both_lower) self.add_linear_relaxation( mask=torch.logical_or(torch.logical_or(self.mask_left_pos, self.mask_right_neg), self.mask_2), type='lower', k=dfunc(tp_neg), x0=tp_neg) self.add_linear_relaxation( mask=torch.logical_or(self.mask_right_pos, self.mask_left_neg), type='upper', k=dfunc(tp_pos), x0=tp_pos) else: if self.opt_stage == 'init': # Initialize optimizable slope. tp_right_lower_init = d_lower_right.detach() tp_right_upper_init = d_upper_right.detach() tp_left_lower_init = d_lower_left.detach() tp_left_upper_init = d_upper_left.detach() tp_both_lower_init = d_lower_right.detach() ns = self._start self.tp_right_lower_init[ns] = tp_right_lower_init self.tp_right_upper_init[ns] = tp_right_upper_init self.tp_left_lower_init[ns] = tp_left_lower_init self.tp_left_upper_init[ns] = tp_left_upper_init self.tp_both_lower_init[ns] = tp_both_lower_init # Not optimized (vanilla CROWN bound). # Use the middle point slope as the lower/upper bound. Not optimized. m = (lower + upper) / 2 y_m = func(m) k = dfunc(m) # Lower bound is the middle point slope for the case input upper bound <= 0. # Note that the upper bound in this case is the direct line between # (lower, func(lower)) and (upper, func(upper)). self.add_linear_relaxation( mask=torch.logical_or( torch.logical_or(self.mask_left_pos, self.mask_right_neg), self.mask_2 ), type='lower', k=k, x0=m, y0=y_m) # Upper bound is the middle point slope for the case input lower bound >= 0. # Note that the lower bound in this case is the direct line between # (lower, func(lower)) and (upper, func(upper)). self.add_linear_relaxation(mask=torch.logical_or(self.mask_right_pos, self.mask_left_neg), type='upper', k=k, x0=m, y0=y_m) # Now handle the case where input lower bound <=0 and upper bound >= 0. # A tangent line starting at d_lower is guaranteed to be a lower bound # given the input upper bound. mask_direct = torch.logical_and(self.mask_right, k_direct < dfunc(lower)) self.add_linear_relaxation(mask=mask_direct, type='lower', k=k_direct, x0=lower, y0=y_l) # Otherwise we do not use the direct line, we use the d_lower slope. self.add_linear_relaxation( mask=torch.logical_or(torch.logical_or(self.mask_right_3, self.mask_4), torch.logical_xor(self.mask_right, mask_direct)), type='lower', k=dfunc(d_lower_right), x0=d_lower_right) mask_direct = torch.logical_and(self.mask_left, k_direct > dfunc(upper)) self.add_linear_relaxation(mask=mask_direct, type='lower', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_or(self.mask_left_3, torch.logical_xor(self.mask_left, mask_direct)), type='lower', k=dfunc(d_lower_left), x0=d_lower_left) mask_direct = torch.logical_and(self.mask_right, k_direct < dfunc(upper)) self.add_linear_relaxation( mask=mask_direct, type='upper', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_xor(self.mask_right, mask_direct), type='upper', k=dfunc(d_upper_right), x0=d_upper_right) mask_direct = torch.logical_and(self.mask_left, k_direct > dfunc(lower)) self.add_linear_relaxation( mask=mask_direct, type='upper', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_xor(self.mask_left, mask_direct), type='upper', k=dfunc(d_upper_left), x0=d_upper_left) def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) self.bound_relax_impl(x, self.act_func, self.d_act_func) def interval_propagate(self, *v): pl, pu = self.forward(v[0][0]), self.forward(v[0][1]) pl, pu = torch.min(pl, pu), torch.max(pl, pu) min_global = self.forward(torch.tensor(-0.7517916)) pl, pu = torch.min(min_global, torch.min(pl, pu)), torch.max(pl, pu) return pl, pu class GELUOp(torch.autograd.Function): sqrt_2 = math.sqrt(2) sqrt_2pi = math.sqrt(2 * math.pi) @staticmethod def symbolic(g, x): return g.op('custom::Gelu', x) @staticmethod def forward(ctx, x): ctx.save_for_backward(x) return torch.nn.functional.gelu(x) @staticmethod def backward(ctx, grad_output): x, = ctx.saved_tensors grad_input = grad_output.clone() grad = (0.5 * (1 + torch.erf(x / GELUOp.sqrt_2)) + x * torch.exp(-0.5 * x ** 2) / GELUOp.sqrt_2pi) return grad_input * grad class GELU(nn.Module): def forward(self, x): return GELUOp.apply(x) ================================================ FILE: auto_LiRPA/operators/indexing.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .base import * from ..patches import Patches, patches_to_matrix from torch.nn import Module class BoundGather(Bound): def __init__(self, attr, x, output_index, options): super().__init__(attr, x, output_index, options) self.axis = attr['axis'] if 'axis' in attr else 0 def forward(self, x, indices): self.indices = indices if self.axis == -1: self.axis = len(x.shape) - 1 # BoundShape.shape() will return values on cpu only x = x.to(self.indices.device) if indices.ndim == 0: if indices == -1: self.indices = x.shape[self.axis] + indices return torch.index_select(x, dim=self.axis, index=self.indices).squeeze(self.axis) elif indices.ndim == 1: if self.axis == 0: assert not self.perturbed # `index_select` requires `indices` to be a 1-D tensor return torch.index_select(x, dim=self.axis, index=indices) raise ValueError('Unsupported shapes in Gather: ' f'data {x.shape}, indices {indices.shape}, ' f'axis {self.axis}') def bound_backward(self, last_lA, last_uA, *args, **kwargs): assert self.from_input def _expand_A_with_zeros(A, axis, idx, max_axis_size): # Need to recreate A with three parts: before the gathered element, gathered element, and after gathered element. tensors = [] if idx < 0: idx = max_axis_size + idx if idx > 0: shape_pre = list(A.shape) shape_pre[axis] *= idx # Create the same shape as A, except for the dimension to be gathered. tensors.append(torch.zeros(shape_pre, device=A.device)) # The gathered element itself, in the middle. tensors.append(A) if max_axis_size - idx - 1 > 0: shape_next = list(A.shape) shape_next[axis] *= max_axis_size - idx - 1 # Create the rest part of A. tensors.append(torch.zeros(shape_next, device=A.device)) # Concatenate all three parts together. return torch.cat(tensors, dim=axis) def _bound_oneside(A): if A is None: return None if isinstance(A, torch.Tensor): if self.indices.ndim == 0: A = A.unsqueeze(self.axis + 1) idx = int(self.indices) return _expand_A_with_zeros(A, self.axis + 1, idx, self.input_shape[self.axis]) else: shape = list(A.shape) final_A = torch.zeros(*shape[:self.axis + 1], self.input_shape[self.axis], *shape[self.axis + 2:], device=A.device) idx = self.indices.view([*[1]*(self.axis+1), -1, *[1]*len(shape[self.axis + 2:])]) idx = idx.repeat([*A.shape[:self.axis+1], 1, *A.shape[self.axis+2:]]) final_A.scatter_add_(dim=self.axis+1, index=idx, src=A) return final_A elif isinstance(A, Patches): if self.indices.ndim == 0: idx = int(self.indices) assert len(self.input_shape) == 4 and self.axis == 1, "Gather is only supported on the channel dimension for Patches mode." # For gather in the channel dimension, we only need to deal with the in_c dimension (-3) in patches. patches = A.patches # -3 is the in_c dimension. new_patches = _expand_A_with_zeros(patches, axis=-3, idx=idx, max_axis_size=self.input_shape[self.axis]) return A.create_similar(new_patches) else: raise NotImplementedError else: raise ValueError(f'Unknown last_A type {type(A)}') return [(_bound_oneside(last_lA), _bound_oneside(last_uA)), (None, None)], 0, 0 def bound_forward(self, dim_in, x, indices): assert self.indices.numel() == 1 and self.indices.ndim <= 1 and (self.indices >= 0).all() if isinstance(x, torch.Size): lw = uw = torch.zeros(dim_in, device=self.device) lb = ub = torch.index_select( torch.tensor(x, device=self.device), dim=self.axis, index=self.indices).squeeze(self.axis) else: axis = self.axis + 1 lw = torch.index_select(x.lw, dim=self.axis + 1, index=self.indices) uw = torch.index_select(x.uw, dim=self.axis + 1, index=self.indices) lb = torch.index_select(x.lb, dim=self.axis, index=self.indices) ub = torch.index_select(x.ub, dim=self.axis, index=self.indices) if self.indices.ndim == 0: lw = lw.squeeze(axis) uw = uw.squeeze(axis) lb = lb.squeeze(self.axis) ub = ub.squeeze(self.axis) return LinearBound(lw, lb, uw, ub) def interval_propagate(self, *v): assert not self.is_input_perturbed(1) return self.forward(v[0][0], v[1][0]), self.forward(v[0][1], v[1][0]) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(v[0], v[1]) def build_gradient_node(self, grad_upstream): return [(GatherGrad(self.axis, self.indices, self.input_shape), (grad_upstream,), []), None] class GatherGrad(Module): def __init__(self, axis, indices, input_shape): super().__init__() self.axis = axis self.indices = indices self.input_shape = input_shape def forward(self, grad_last): # TODO: It's better to use scatter_add_ instead of cat. # This is a workaround for the fact that scatter_add_ does not support negative indices. # Scalar indices case (ndim == 0) if self.indices.ndim == 0: grad_unsq = grad_last.unsqueeze(self.axis) # Get the scalar index and adjust if negative. idx = int(self.indices) if idx < 0: idx = self.input_shape[self.axis] + idx # Build the gradient by concatenating three parts along self.axis: tensors = [] # 1. Zeros block before the gathered element (if idx > 0) if idx > 0: shape_pre = list(grad_unsq.shape) shape_pre[self.axis] = idx # pre-block has size idx along self.axis zeros_pre = torch.zeros(shape_pre, dtype=grad_last.dtype, device=grad_last.device) tensors.append(zeros_pre) # 2. The gathered gradient slice (already in grad_unsq) tensors.append(grad_unsq) # 3. Zeros block after the gathered element num_after = self.input_shape[self.axis] - idx - 1 if num_after > 0: shape_post = list(grad_unsq.shape) shape_post[self.axis] = num_after zeros_post = torch.zeros(shape_post, dtype=grad_last.dtype, device=grad_last.device) tensors.append(zeros_post) # Concatenate all parts along self.axis to form the full gradient tensor. grad_input = torch.cat(tensors, dim=self.axis) return grad_input # 1-D indices case (ndim == 1) elif self.indices.ndim == 1: grad_slices = [] # Iterate over each position in the original input along self.axis. for i in range(self.input_shape[self.axis]): # matching: tensor of indices (in grad_last) where the gathered index equals i. matching = (self.indices == i).nonzero(as_tuple=False).squeeze(-1) if matching.numel() == 0: # No matching index: create a zeros slice with the same shape as one slice of grad_last. slice_shape = list(grad_last.shape) slice_shape[self.axis] = 1 # single slice along self.axis grad_slice = torch.zeros(slice_shape, dtype=grad_last.dtype, device=grad_last.device) else: # There are one or more matching positions. # For each matching index j, extract the corresponding slice from grad_last. slice_list = [] for j in matching.tolist(): # Build slicing object:select all elements, but at self.axis take index j. slicer = [slice(None)] * grad_last.dim() slicer[self.axis] = j # Extract the slice and add back the missing dimension. slice_j = grad_last[tuple(slicer)].unsqueeze(self.axis) slice_list.append(slice_j) # Concatenate all slices along self.axis; if there are duplicates, sum them. cat_slices = torch.cat(slice_list, dim=self.axis) # Sum along self.axis to accumulate contributions from duplicate indices. grad_slice = cat_slices.sum(dim=self.axis, keepdim=True) # Append the slice corresponding to position i. grad_slices.append(grad_slice) # Concatenate all slices in order along self.axis to form the final gradient tensor. grad_input = torch.cat(grad_slices, dim=self.axis) return grad_input else: raise ValueError("Unsupported indices dimensions in gradient for Gather") class BoundGatherElements(Bound): def __init__(self, attr, input, output_index, options): super().__init__(attr, input, output_index, options) self.axis = attr['axis'] def forward(self, x, index): self.index = index return torch.gather(x, dim=self.axis, index=index) def bound_backward(self, last_lA, last_uA, x, index, **kwargs): assert self.from_input dim = self._get_dim() def _bound_oneside(last_A): if last_A is None: return None A = torch.zeros( last_A.shape[0], last_A.shape[1], *x.output_shape[1:], device=last_A.device) A.scatter_( dim=dim + 1, index=self.index.unsqueeze(0).repeat(A.shape[0], *([1] * (A.ndim - 1))), src=last_A) return A return [(_bound_oneside(last_lA), _bound_oneside(last_uA)), (None, None)], 0, 0 def interval_propagate(self, *v): assert not self.is_input_perturbed(1) return self.forward(v[0][0], v[1][0]), \ self.forward(v[0][1], v[1][1]) def bound_forward(self, dim_in, x, index): assert self.axis != 0 dim = self._get_dim() return LinearBound( torch.gather(x.lw, dim=dim + 1, index=self.index.unsqueeze(1).repeat(1, dim_in, 1)), torch.gather(x.lb, dim=dim, index=self.index), torch.gather(x.uw, dim=dim + 1, index=self.index.unsqueeze(1).repeat(1, dim_in, 1)), torch.gather(x.ub, dim=dim, index=self.index)) def _get_dim(self): dim = self.axis if dim < 0: dim = len(self.output_shape) + dim return dim ================================================ FILE: auto_LiRPA/operators/jacobian.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch from torch.nn import Module from .base import Bound from ..utils import prod class JacobianOP(torch.autograd.Function): @staticmethod def symbolic(g, output, input): return g.op('grad::jacobian', output, input).setType(output.type()) @staticmethod def forward(ctx, output, input): output_ = output.flatten(1) return torch.zeros( output.shape[0], output_.shape[-1], *input.shape[1:], device=output.device) class BoundJacobianOP(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) def forward(self, output, input): return JacobianOP.apply(output, input) class BoundJacobianInit(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.never_perturbed = True def forward(self, x): dim = prod(x.shape[1:]) eye = torch.eye(dim, device=x.device, requires_grad=x.requires_grad) eye = eye.unsqueeze(0).expand( x.shape[0], -1, -1 ).view(x.shape[0], dim, *x.shape[1:]) return eye class GradNorm(Module): def __init__(self, norm=1): super().__init__() self.norm = norm def forward(self, grad): grad = grad.view(grad.size(0), -1) if self.norm == 1: # torch.norm is not supported in auto_LiRPA yet # use simpler operators for now return grad.abs().sum(dim=-1, keepdim=True) elif self.norm == 2: return (grad * grad).sum(dim=-1) else: raise NotImplementedError(self.norm) ================================================ FILE: auto_LiRPA/operators/leaf.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Leaf nodes (indepedent nodes in the auto_LiRPA paper). Including input, parameter, buffer, etc.""" from itertools import chain from .base import * class BoundInput(Bound): def __init__(self, ori_name, value, perturbation=None, input_index=None, options=None, attr=None): super().__init__(options=options, attr=attr) self.ori_name = ori_name self.value = value self.perturbation = perturbation self.from_input = True self.input_index = input_index self.no_jacobian = True def __setattr__(self, key, value): super().__setattr__(key, value) # Update perturbed property based on the perturbation set. if key == "perturbation": if self.perturbation is not None: self.perturbed = True else: self.perturbed = False def forward(self): return self.value def bound_forward(self, dim_in): assert 0 def bound_backward(self, last_lA, last_uA, **kwargs): raise ValueError('{} is a BoundInput node and should not be visited here'.format( self.name)) def interval_propagate(self, *v): raise ValueError('{} is a BoundInput node and should not be visited here'.format( self.name)) class BoundParams(BoundInput): def __init__(self, ori_name, value, perturbation=None, options=None, attr=None): super().__init__(ori_name, None, perturbation, attr=attr) self.register_parameter('param', value) if options is None: options = {} self.auto_requires_grad = options.get("param", {}).get("auto_requires_grad", True) self.from_input = False def register_parameter(self, name, param): """Override register_parameter() hook to register only needed parameters.""" if name == 'param': return super().register_parameter(name, param) else: # Just register it as a normal property of class. object.__setattr__(self, name, param) def init(self, initializing=False): self.initializing = initializing def forward(self): param = self.param if self.auto_requires_grad: param = param.requires_grad_(self.training) return param class BoundBuffers(BoundInput): def __init__(self, ori_name, value, perturbation=None, options=None, attr=None): super().__init__(ori_name, None, perturbation, attr=attr) self.register_buffer('buffer', value.clone().detach()) # BoundBuffers are like constants and they are by default not from inputs. # The "has_batchdim" was a hack that will forcibly set BoundBuffer to be # from inputs, to workaround buffers with a batch size dimension. This is # not needed in most cases now. if 'buffers' in options and 'has_batchdim' in options['buffers']: warnings.warn('The "has_batchdim" option for BoundBuffers is deprecated.' ' It may be removed from the next release.') self.from_input = options.get('buffers', {}).get('has_batchdim', False) def forward(self): return self.buffer ================================================ FILE: auto_LiRPA/operators/linear.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Linear (possibly with weight perturbation) or Dot product layers """ from torch import Tensor from torch.nn import Module from typing import Tuple, List from .activation_base import BoundOptimizableActivation from .base import * from .bivariate import BoundMul, MulHelper from .leaf import BoundParams, BoundBuffers from ..patches import Patches, inplace_unfold from .solver_utils import grb from .clampmult import multiply_by_A_signs EPS = 1e-2 class BoundLinear(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): # Gemm: # A = A if transA == 0 else A.T # B = B if transB == 0 else B.T # C = C if C is not None else np.array(0) # Y = alpha * np.dot(A, B) + beta * C # return Y super().__init__(attr, inputs, output_index, options) # Defaults in ONNX self.transA = 0 self.transB = 0 self.alpha_linear = 1.0 self.beta_linear = 1.0 if attr is not None: self.transA = attr['transA'] if 'transA' in attr else self.transA self.transB = attr['transB'] if 'transB' in attr else self.transB self.alpha_linear = attr['alpha'] if 'alpha' in attr else self.alpha_linear self.beta_linear = attr['beta'] if 'beta' in attr else self.beta_linear options = options or {} self.opt_matmul = options.get('matmul') self.splittable = False self.mul_helper = MulHelper() self.use_seperate_weights_for_lower_and_upper_bounds = False self.batched_weight_and_bias = False self.share_alphas = options.get('matmul', {}).get('share_alphas', False) self.mul_middle = options.get('mul', {}).get('middle', False) # For MatMul, it's possible that only the second input is perturbed. # In this case, we swap the roles of x and weight. self.swap_x_and_weight = False def _preprocess(self, a, b, c=None): """Handle tranpose and linear coefficients.""" if self.transA and isinstance(a, Tensor): a = a.transpose(-2,-1) if self.alpha_linear != 1.0: a = self.alpha_linear * a if not self.transB and isinstance(b, Tensor): # our code assumes B is transposed (common case), so we transpose B # only when it is not transposed in gemm. b = b.transpose(-2, -1) if c is not None: if self.beta_linear != 1.0: c = self.beta_linear * c return a, b, c def init_opt_parameters(self, start_nodes): shared_alpha_dims = [] if self.share_alphas: # TODO Temporarily an adhoc check for alpha sharing. count_matmul = len([item for item in self._all_optimizable_activations if isinstance(item, BoundLinear)]) if count_matmul >= 6: shared_alpha_dims = [1, 2, 3] elif count_matmul >= 4: shared_alpha_dims = [1, 2] input_lb = [xi.lower for xi in self.inputs] input_ub = [xi.upper for xi in self.inputs] input_lb = self._preprocess(*input_lb) input_ub = self._preprocess(*input_ub) x_l, x_u, y_l, y_u = self._reshape(input_lb[0], input_ub[0], input_lb[1], input_ub[1]) assert x_l.ndim == y_l.ndim shape = [1 if i in shared_alpha_dims else max(x_l.shape[i], y_l.shape[i]) for i in range(x_l.ndim)] for start_node in start_nodes: ns, size_s = start_node[:2] # start_node[3] == False means that this start node is not the final node # if not start_node[3]: # # NOTE Experimental code. Please check how it will impact the results. # size_s = 1 if isinstance(size_s, torch.Size): # TODO do not give torch.Size size_s = prod(size_s) elif isinstance(size_s, (list, tuple)): size_s = size_s[0] self.alpha[ns] = torch.ones(4, size_s, *shape, device=x_l.device) def forward(self, x, w, b=None): x, w, b = self._preprocess(x, w, b) self.input_shape = self.x_shape = x.shape self.y_shape = w.t().shape res = x.matmul(w.t()) if b is not None: res += b return res def onehot_mult(self, weight, bias, C, batch_size): """Multiply weight matrix with a diagonal matrix with selected rows.""" if C is None: return None, 0.0 new_weight = None new_bias = 0.0 if C.index.ndim == 2: # Shape is [spec, batch] index = C.index.transpose(0, 1) coeffs = C.coeffs.transpose(0, 1) else: index = C.index coeffs = C.coeffs if C.index.ndim == 1: # Every element in the batch shares the same rows. if weight is not None: new_weight = self.non_deter_index_select( weight, dim=0, index=index ).unsqueeze(1).expand( [-1, batch_size] + [-1] * (weight.ndim - 1)) if bias is not None: new_bias = self.non_deter_index_select( bias, dim=0, index=index ).unsqueeze(1).expand(-1, batch_size) elif C.index.ndim == 2: # Every element in the batch has different rows, but the number of # rows are the same. This essentially needs a batched index_select function. if weight is not None: new_weight = batched_index_select( weight.unsqueeze(0), dim=1, index=index) if bias is not None: new_bias = batched_index_select( bias.unsqueeze(0), dim=1, index=index) if C.coeffs is not None: if weight is not None: new_weight = new_weight * coeffs.unsqueeze(-1) if bias is not None: new_bias = new_bias * coeffs if C.index.ndim == 2: # Eventually, the shape of A is [spec, batch, *node] so need a transpose. new_weight = new_weight.transpose(0, 1) new_bias = new_bias.transpose(0, 1) return new_weight, new_bias def bound_backward(self, last_lA, last_uA, *x, start_node=None, reduce_bias=True, **kwargs): assert len(x) == 2 or len(x) == 3 if start_node is not None: self._start = start_node.name has_bias = len(x) == 3 # x[0]: input node, x[1]: weight, x[2]: bias input_lb = [xi.lower for xi in x] input_ub = [xi.upper for xi in x] if self.swap_x_and_weight: input_lb = [input_lb[1].transpose(-1, -2) if input_lb[1] is not None else None, input_lb[0].transpose(-1, -2) if input_lb[0] is not None else None, input_lb[2:]] input_ub = [input_ub[1].transpose(-1, -2) if input_ub[1] is not None else None, input_ub[0].transpose(-1, -2) if input_ub[0] is not None else None, input_ub[2:]] if last_lA is not None: if isinstance(last_lA, torch.Tensor): last_lA = last_lA.transpose(-1, -2) elif isinstance(last_lA, eyeC): last_lA = last_lA._replace(shape=last_lA.shape[:-2] + (last_lA.shape[-1], last_lA.shape[-2])) else: raise NotImplementedError( f"last_lA's type {type(last_lA)} is not supported for transpose in the case of swapping x and weight.") if last_uA is not None: if isinstance(last_uA, torch.Tensor): last_uA = last_uA.transpose(-1, -2) elif isinstance(last_uA, eyeC): last_uA = last_uA._replace(shape=last_uA.shape[:-2] + (last_uA.shape[-1], last_uA.shape[-2])) else: raise NotImplementedError( f"last_uA's type {type(last_uA)} is not supported for transpose in the case of swapping x and weight.") # transpose and scale each term if necessary. input_lb = self._preprocess(*input_lb) input_ub = self._preprocess(*input_ub) lA_y = uA_y = lA_bias = uA_bias = None lbias = ubias = 0 batch_size = last_lA.shape[1] if last_lA is not None else last_uA.shape[1] weight = input_lb[1] bias = input_lb[2] if has_bias else None def _bound_oneside(last_A, weight_override=None): # For most applications, weight_override should be left as None # This will cause used_weight to be set to weight, which is the weight # assigned to input_lb[1]. The only reason provide an override weight # is if this layer has different weights for it's lower and upper bounds. # That is currently only the case for the implementation of output # constraints, where lower and upper bounds use distinct gammas. if weight_override is None: used_weight = weight else: used_weight = weight_override if last_A is None: return None, 0 if isinstance(last_A, torch.Tensor): # Matrix mode. # Just multiply this layer's weight into bound matrices, and produce biases. if self.batched_weight_and_bias: # last_A is the A at the current layer (self) # next_A is the A for the layer consumed by the current (self) one # "next_A" makes sense because we're backpropagating. However, the below shapes # will refer to "prev_layer", which also is the layer that is consumed by # the current (self) one. That's because they should match the documentation in # output_constraints.py, which is written from a "forward facing" point of view. # We have: last_A.shape = (unstable_neurons, batch_size, this_layer_neurons) # We want: next_A.shape = (unstable_neurons, batch_size, prev_layer_neurons) # We also have # used_weight.shape = (batch_size, this_layer_neurons, prev_layer_neurons) mod_last_A = last_A.unsqueeze(2) mod_used_weight = used_weight.unsqueeze(0) # mod_last_A.shape = (unstable_neurons, batch_size, 1, this_layer_neurons) # mod_used_weight.shape = (1, batch_size, this_layer_neurons, prev_layer_neurons) mod_next_A = mod_last_A.to(mod_used_weight).matmul(mod_used_weight) # mod_next_A.shape = (unstable_neurons, batch_size, 1, prev_layer_neurons) next_A = mod_next_A.squeeze(2) # next_A.shape = (unstable_neurons, batch_size, prev_layer_neurons) if has_bias: # bias.shape = (batch_size, this_layer_neurons) mod_bias = bias.unsqueeze(0).unsqueeze(3) # mod_bias.shape = (1, batch_size, this_layer_neurons, 1) # mod_last_A.shape = (unstable_neurons, batch_size, 1, this_layer_neurons) mod_sum_bias = mod_last_A.to(mod_bias).matmul(mod_bias) # mod_sum_bias.shape = (unstable_neurons, batch_size, 1, 1) sum_bias = mod_sum_bias.squeeze(3).squeeze(2) # sum_bias.shape = (unstable_neurons, batch_size) else: next_A = last_A.to(used_weight).matmul(used_weight) sum_bias = (last_A.to(bias).matmul(bias) if has_bias else 0.0) else: assert isinstance(last_A, Patches) assert not self.batched_weight_and_bias # Patches mode. After propagating through this layer, it will become a matrix. # Reshape the weight matrix as a conv image. # Weight was in (linear_output_shape, linear_input_shape) # Reshape it to (linear_input_shape, c, h, w) reshaped_weight = used_weight.transpose(0, 1).view( -1, *last_A.input_shape[1:]) # After unfolding the shape is # (linear_input_shape, output_h, output_w, in_c, patch_h, patch_w) unfolded_weight = inplace_unfold( reshaped_weight, kernel_size=last_A.patches.shape[-2:], stride=last_A.stride, padding=last_A.padding, inserted_zeros=last_A.inserted_zeros, output_padding=last_A.output_padding) if has_bias: # Do the same for the bias. reshaped_bias = bias.view(*last_A.input_shape[1:]).unsqueeze(0) # After unfolding the bias shape is (1, output_h, output_w, in_c, patch_h, patch_w) unfolded_bias = inplace_unfold( reshaped_bias, kernel_size=last_A.patches.shape[-2:], stride=last_A.stride, padding=last_A.padding, inserted_zeros=last_A.inserted_zeros, output_padding=last_A.output_padding) if last_A.unstable_idx is not None: # In this case, the last_A shape is (num_unstable, batch, out_c, patch_h, patch_w) # Reshape our weight to (output_h, output_w, 1, in_c, patch_h, patch_w, linear_input_shape), 1 is the inserted batch dim. unfolded_weight_r = unfolded_weight.permute(1, 2, 3, 4, 5, 0).unsqueeze(2) # for sparse patches the shape is (unstable_size, batch, in_c, patch_h, patch_w). Batch size is 1 so no need to select here. # We select in the (output_h, out_w) dimension. selected_weight = unfolded_weight_r[last_A.unstable_idx[1], last_A.unstable_idx[2]] next_A = torch.einsum('sbchw,sbchwi->sbi', last_A.patches, selected_weight) if has_bias: # Reshape our bias to (output_h, output_w, 1, in_c, patch_h, patch_w). We already have the batch dim. unfolded_bias_r = unfolded_bias.permute(1, 2, 0, 3, 4, 5) selected_bias = unfolded_bias_r[last_A.unstable_idx[1], last_A.unstable_idx[2]] sum_bias = torch.einsum('sbchw,sbchw->sb', last_A.patches, selected_bias) else: # Reshape our weight to (1, 1, output_h, output_w, in_c, patch_h, patch_w, linear_input_shape), 1 is the spec and batch. selected_weight = unfolded_weight.permute(1, 2, 3, 4, 5, 0).unsqueeze(0).unsqueeze(0) next_A_r = torch.einsum('sbpqchw,sbpqchwi->spqbi', last_A.patches, selected_weight) # We return a matrix with flattened spec dimension (corresponding to out_c * out_h * out_w). next_A = next_A_r.reshape(-1, next_A_r.size(-2), next_A_r.size(-1)) if has_bias: # Reshape our bias to (1, 1, output_h, output_w, in_c, patch_h, patch_w) selected_bias = unfolded_bias.unsqueeze(0) sum_bias_r = torch.einsum('sbpqchw,sbpqchw->spqb', last_A.patches, selected_bias) sum_bias = sum_bias_r.reshape(-1, sum_bias_r.size(-1)) return next_A, sum_bias if has_bias else 0.0 # Case #1: No weight/bias perturbation, only perturbation on input. if ((not self.is_input_perturbed(0) or not self.is_input_perturbed(1)) and (not has_bias or not self.is_input_perturbed(2))): # If last_lA and last_uA are indentity matrices. # FIXME (12/28): we should check last_lA and last_uA separately. # Same applies to the weight perturbed, bias perturbed settings. def multiply_with_weight(weight, set_l: bool, set_u: bool): lA_x = uA_x = None lbias = ubias = 0. if isinstance(last_lA, eyeC) and isinstance(last_uA, eyeC): # Use this layer's W as the next bound matrices. # Shape of inputs: (B, s_k, s_{k-1}, ..., s_1, m, n) @ (s_l, s_{l-1}, ..., s_1, n, p) # or (B, s_k, s_{k-1}, ..., s_1, m, n) @ (B, s_k, s_{k-1}, ..., s_1, n, p) # Shape of output: (B, s_k, ..., s_1, m, p) # last_lA: (specs, B, s_k, ..., s_1, m, p) # weight: (s_l, ..., s_1, p, n) where l <= k, or (B, s_k, ..., s_1, p, n) if len(last_lA.shape) == 3: # input x is a vector m = 1 p = last_lA.shape[-1] else: # general input shape m, p = last_lA.shape[-2:] n = weight.size(-1) assert last_lA.shape == last_uA.shape # shape of "broadcast dimensions" \prod_{i=1...k} s_i shape_broadcast = last_lA.shape[2:-2] prod_broadcast = prod(shape_broadcast) ndim_broadcast = len(shape_broadcast) assert weight.ndim - 3 <= ndim_broadcast, "Broadcasting on input 'x' is not supported." weight_has_batch = weight.ndim - 3 == ndim_broadcast # A_identity: (s_k, ...s_1, m, 1, s_k, ..., s_1, m, 1) where two 1s are for the two "matmul dimensions" A_identity = torch.eye( prod_broadcast * m, device=weight.device, dtype=weight.dtype ).view(*shape_broadcast, m, 1, *shape_broadcast, m, 1) # Assert specs = {product of shape of output} = \prod s_i * m * p assert last_lA.shape[0] == prod_broadcast * m * p if not weight_has_batch: # Pad the "broadcast dimensions" of weight according to shape of input # (s_l, ..., s_1, p, n) -> (1, ..., 1, s_l, ..., s_1, p, n) where there are (k-l) 1s w_padding = weight.reshape(*[1] * (ndim_broadcast + 2 - len(weight.shape)), *weight.shape) # Duplicate the "broadcast dimensions" to match both sides of A_identity # (*broadcast_dims, p, n) -> (*broadcast_dims, p, *broadcast_dims, n) w_eye_mask = torch.eye(prod_broadcast, device=weight.device, dtype=weight.dtype).reshape(*shape_broadcast, 1, *shape_broadcast, 1) w = w_eye_mask * w_padding.reshape(*w_padding.shape[:-1], *[1] * (len(w_padding.shape) - 2), w_padding.size(-1)) # Add two slots for the "m" dimension in A_identity # (*broadcast_dims, p, *broadcast_dims, n) -> (*broadcast_dims, 1, p, *broadcast_dims, 1, n) w = w.view(*w.shape[:ndim_broadcast], 1, p, *w.shape[:ndim_broadcast], 1, n) w = w * A_identity # (*broadcast_dims, m, p, *broadcast_dims, m, n) # expand the batch_size dim # (*broadcast_dims, m, p, *broadcast_dims, m, n) -> (Prod(broadcast_dims)*m*p, B, *broadcast_dims, m, n) tmp_A_x = w.reshape(last_lA.shape[0], 1, *last_lA.shape[2:-1], weight.size(-1)).expand(last_lA.shape[0], *last_lA.shape[1:-1], weight.size(-1)) else: # There's no need to pad the weight tensor if it has a batch dimension. # Duplicate the "broadcast dimensions" to match both sides of A_identity # (B, *broadcast_dims, p, n) -> (B, *broadcast_dims, p, *broadcast_dims, n) w_eye_mask = torch.eye(prod_broadcast, device=weight.device, dtype=weight.dtype).reshape(*shape_broadcast, 1, *shape_broadcast, 1) w = w_eye_mask * weight.reshape(*weight.shape[:-1], *[1] * (len(weight.shape) - 3), weight.size(-1)) # Add two slots for the "m" dimension in A_identity # (B, *broadcast_dims, p, *broadcast_dims, n) -> (B, *broadcast_dims, 1, p, *broadcast_dims, 1, n) w = w.view(w.shape[0], *w.shape[1:ndim_broadcast+1], 1, p, *w.shape[1:ndim_broadcast+1], 1, n) w = w * A_identity # (B, *broadcast_dims, m, p, *broadcast_dims, m, n) # (B, *broadcast_dims, m, p, *broadcast_dims, m, n) -> (Prod(broadcast_dims)*m*p, B, *broadcast_dims, m, n) tmp_A_x = w.reshape(w.shape[0], last_lA.shape[0], *last_lA.shape[2:-1], weight.size(-1)).transpose(0, 1) if set_l: lA_x = tmp_A_x if set_u: uA_x = tmp_A_x if has_bias: tmp_bias = bias.unsqueeze(1).repeat(1, batch_size) if set_l: lbias = tmp_bias if set_u: ubias = tmp_bias elif isinstance(last_lA, OneHotC) or isinstance(last_uA, OneHotC): # We need to select several rows from the weight matrix # (its shape is output_size * input_size). if set_l: lA_x, lbias = self.onehot_mult(weight, bias, last_lA, batch_size) if last_lA is last_uA and set_l and set_u: uA_x = lA_x ubias = lbias elif set_u: uA_x, ubias = self.onehot_mult(weight, bias, last_uA, batch_size) else: if set_l: lA_x, lbias = _bound_oneside(last_lA, weight_override=weight) if set_u: uA_x, ubias = _bound_oneside(last_uA, weight_override=weight) return lA_x, uA_x, lbias, ubias if self.use_seperate_weights_for_lower_and_upper_bounds: lA_x, _, lbias, _ = multiply_with_weight(input_lb[1], set_l=True, set_u=False) _, uA_x, _, ubias = multiply_with_weight(input_ub[1], set_l=False, set_u=True) else: lA_x, uA_x, lbias, ubias = multiply_with_weight(weight, set_l=True, set_u=True) # Case #2: weight is perturbed. bias may or may not be perturbed. elif self.is_input_perturbed(1): assert not self.use_seperate_weights_for_lower_and_upper_bounds # Obtain relaxations for matrix multiplication. [(lA_x, uA_x), (lA_y, uA_y)], lbias, ubias = self.bound_backward_with_weight( last_lA, last_uA, input_lb, input_ub, x[0], x[1], reduce_bias=reduce_bias, **kwargs) if has_bias: assert reduce_bias if x[2].perturbation is not None: # Bias is also perturbed. Since bias is directly added to the # output, in backward mode it is treated as an input with # last_lA and last_uA as associated bounds matrices. # It's okay if last_lA or last_uA is eyeC, as it will be # handled in the perturbation object. lA_bias = last_lA uA_bias = last_uA else: # Bias not perturbed, so directly adding the bias of this # layer to the final bound bias term. if isinstance(last_lA, eyeC) and isinstance(last_uA, eyeC): # Bias will be directly added to output. lbias += input_lb[2].unsqueeze(1).repeat(1, batch_size) ubias += input_lb[2].unsqueeze(1).repeat(1, batch_size) else: if last_lA is not None: lbias += last_lA.matmul(input_lb[2]) if last_uA is not None: ubias += last_uA.matmul(input_lb[2]) # If not has_bias, no need to compute lA_bias and uA_bias # Case 3: Only bias is perturbed, weight is not perturbed. elif not self.is_input_perturbed(1) and has_bias and self.is_input_perturbed(2): assert not self.use_seperate_weights_for_lower_and_upper_bounds assert reduce_bias if isinstance(last_lA, eyeC) and isinstance(last_uA, eyeC): # Use this layer's W as the next bound matrices. Duplicate the # batch dimension. Other dimensions are kept 1. lA_x = uA_x = input_lb[1].unsqueeze(1).repeat( [1, batch_size] + [1] * (input_lb[1].ndim - 1)) else: lA_x = last_lA.matmul(input_lb[1]) uA_x = last_uA.matmul(input_lb[1]) # It's okay if last_lA or last_uA is eyeC, as it will be handled in the perturbation object. lA_bias = last_lA uA_bias = last_uA else: assert not self.use_seperate_weights_for_lower_and_upper_bounds if self.swap_x_and_weight: return [(None, None), (lA_x.transpose(-1, -2) if lA_x is not None else None, uA_x.transpose(-1, -2) if uA_x is not None else None), (lA_bias, uA_bias)], lbias, ubias return [(lA_x, uA_x), (lA_y, uA_y), (lA_bias, uA_bias)], lbias, ubias def _reshape(self, x_l, x_u, y_l, y_u): x_shape, y_shape = self.input_shape, self.y_shape # (x_1, x_2, ..., x_{n-1}, -1, x_n) # FIXME x_l = x_l.unsqueeze(-2) x_u = x_u.unsqueeze(-2) # FIXME merge these two cases if len(x_shape) == len(y_shape): # (x_1, x_2, ..., -1, y_n, y_{n-1}) y_l = y_l.unsqueeze(-3) y_u = y_u.unsqueeze(-3) elif len(y_shape) == 2: # (x_1, x_2, ..., -1, y_2, y_1) y_l = y_l.reshape(*([1] * (len(x_shape) - 2)), *y_shape).unsqueeze(-3) y_u = y_u.reshape(*([1] * (len(x_shape) - 2)), *y_shape).unsqueeze(-3) else: raise ValueError(f'Unsupported shapes: x_shape {x_shape}, y_shape {y_shape}') return x_l, x_u, y_l, y_u @staticmethod # @torch.jit.script def propagate_A_xy(last_A: Tensor, alpha_pos: Tensor, alpha_neg: Tensor, beta_pos: Tensor, beta_neg: Tensor, dim_y: List[int]) -> Tuple[Tensor, Tensor]: # last_uA has size (batch, spec, output) last_A_pos = last_A.clamp(min=0).unsqueeze(-1) last_A_neg = last_A.clamp(max=0).unsqueeze(-1) # alpha_u has size (batch, spec, output, input) # uA_x has size (batch, spec, input). A_x = (alpha_pos.transpose(-1, -2).matmul(last_A_pos) + alpha_neg.transpose(-1, -2).matmul(last_A_neg)).squeeze(-1) # beta_u has size (batch, spec, output, input) # uA_y is for weight matrix, with parameter size (output, input) # uA_y has size (batch, spec, output, input). This is an element-wise multiplication. # TODO (for zhouxing/qirui): generalize multiply_by_A_signs() to calculate A_x, # so last_A_pos and last_A_neg are not needed. This saves memory. A_y, _ = multiply_by_A_signs(last_A.unsqueeze(-1), beta_pos, beta_neg, None, None) if len(dim_y) != 0: A_y = torch.sum(A_y, dim=dim_y) return A_x, A_y def bound_backward_with_weight(self, last_lA, last_uA, input_lb, input_ub, x, y, reduce_bias=True, **kwargs): # FIXME This is nonlinear. Move to `bivariate.py`. # Note: x and y are not tranposed or scaled, and we should avoid using them directly. # Use input_lb and input_ub instead. (alpha_l, beta_l, gamma_l, alpha_u, beta_u, gamma_u) = self.mul_helper.get_relaxation( *self._reshape(input_lb[0], input_ub[0], input_lb[1], input_ub[1]), self.opt_stage, getattr(self, 'alpha', None), getattr(self, '_start', None), middle=self.mul_middle) x_shape = input_lb[0].size() if reduce_bias: gamma_l = torch.sum(gamma_l, dim=-1) gamma_u = torch.sum(gamma_u, dim=-1) if len(x.output_shape) != 2 and len(x.output_shape) == len(y.output_shape): dim_y = [-3] elif len(y.output_shape) == 2: dim_y = list(range(2, 2 + len(x_shape) - 2)) else: raise NotImplementedError def _bound_oneside(last_A, alpha_pos, beta_pos, gamma_pos, alpha_neg, beta_neg, gamma_neg): if last_A is None: return None, None, 0 if isinstance(last_A, eyeC): # FIXME (12/28): Handle the OneHotC case. #FIXME previous implementation is incorrect # expanding eyeC for now last_A = (torch.eye(last_A.shape[0], device=last_A.device) .view(last_A.shape[0], 1, *last_A.shape[2:]).expand(last_A.shape)) A_x, A_y = BoundLinear.propagate_A_xy( last_A, alpha_pos, alpha_neg, beta_pos, beta_neg, dim_y) if reduce_bias: # last_uA has size (batch, spec, output) # gamma_u has size (batch, output, 1) # ubias has size (batch, spec, 1) if self.opt_stage in ['opt', 'reuse']: bias = (torch.einsum('sb...,sb...->sb', last_A.clamp(min=0), gamma_pos) + torch.einsum('sb...,sb...->sb', last_A.clamp(max=0), gamma_neg)) else: bias = ( self.get_bias(last_A.clamp(min=0), gamma_pos) + self.get_bias(last_A.clamp(max=0), gamma_neg) ) else: assert self.batch_dim == 0 assert self.opt_stage not in ['opt', 'reuse'] assert dim_y == [-3] bias = (last_A.unsqueeze(-1).clamp(min=0) * gamma_pos + last_A.unsqueeze(-1).clamp(max=0) * gamma_neg) bias_x = bias.sum(dim=-2) bias_y = bias.sum(dim=-3) bias = (bias_x, bias_y) return A_x, A_y, bias if self.opt_stage in ['opt', 'reuse']: lA_x, lA_y, lbias = _bound_oneside( last_lA, alpha_l[0], beta_l[0], gamma_l[0], alpha_u[0], beta_u[0], gamma_u[0]) uA_x, uA_y, ubias = _bound_oneside( last_uA, alpha_u[1], beta_u[1], gamma_u[1], alpha_l[1], beta_l[1], gamma_l[1]) else: lA_x, lA_y, lbias = _bound_oneside( last_lA, alpha_l, beta_l, gamma_l, alpha_u, beta_u, gamma_u) uA_x, uA_y, ubias = _bound_oneside( last_uA, alpha_u, beta_u, gamma_u, alpha_l, beta_l, gamma_l) return [(lA_x, uA_x), (lA_y, uA_y)], lbias, ubias @staticmethod def _propagate_Linf(x, w): h_L, h_U = x mid = (h_L + h_U) / 2 diff = (h_U - h_L) / 2 w_abs = w.abs() if mid.ndim == 2 and w.ndim == 3: center = torch.bmm(mid.unsqueeze(1), w.transpose(-1, -2)).squeeze(1) deviation = torch.bmm(diff.unsqueeze(1), w_abs.transpose(-1, -2)).squeeze(1) else: center = mid.matmul(w.transpose(-1, -2)) deviation = diff.matmul(w_abs.transpose(-1, -2)) return center, deviation def interval_propagate(self, *v, C=None, w=None): has_bias = self is not None and len(v) == 3 if self is not None: # This will convert an Interval object to tuple. # We need to add perturbation property later. v_lb, v_ub = zip(*v) v_lb = self._preprocess(*v_lb) v_ub = self._preprocess(*v_ub) # After preprocess the lower and upper bounds, we make them Intervals again. v = [Interval.make_interval(bounds[0], bounds[1], bounds[2]) for bounds in zip(v_lb, v_ub, v)] if w is None and self is None: # Use C as the weight, no bias. w, lb, ub = C, torch.tensor(0., device=C.device), torch.tensor(0., device=C.device) else: if w is None: # No specified weight, use this layer's weight. if self.is_input_perturbed(1): # input index 1 is weight. # w is a perturbed tensor. Use IBP with weight perturbation. # C matrix merging not supported. assert C is None res = self.interval_propagate_with_weight(*v) l, u = res if has_bias: return l + v[2][0], u + v[2][1] else: return l, u else: # Use weight w = v[1][0] if has_bias: lb, ub = v[2] else: lb = ub = 0.0 if C is not None: w = C.matmul(w) lb = C.matmul(lb) if not isinstance(lb, float) else lb ub = C.matmul(ub) if not isinstance(ub, float) else ub # interval_propagate() of the Linear layer may encounter input with different norms. norm, eps = Interval.get_perturbation(v[0])[:2] if norm == torch.inf: interval = BoundLinear._propagate_Linf(v[0], w) center, deviation = interval elif norm > 0: # General Lp norm. norm, eps = Interval.get_perturbation(v[0]) mid = v[0][0] dual_norm = np.float64(1.0) / (1 - 1.0 / norm) if w.ndim == 3: # Extra batch dimension. # mid has dimension [batch, input], w has dimension [batch, output, input]. center = w.matmul(mid.unsqueeze(-1)).squeeze(-1) else: # mid has dimension [batch, input], w has dimension [output, input]. center = mid.matmul(w.t()) deviation = w.norm(dual_norm, dim=-1) * eps else: # here we calculate the L0 norm IBP bound of Linear layers, # using the bound proposed in [Certified Defenses for Adversarial Patches, ICLR 2020] norm, eps, ratio = Interval.get_perturbation(v[0]) mid = v[0][0] weight_abs = w.abs() if w.ndim == 3: # Extra batch dimension. # mid has dimension [batch, input], w has dimension [batch, output, input]. center = w.matmul(mid.unsqueeze(-1)).squeeze(-1) else: # mid has dimension [batch, input], w has dimension [output, input]. center = mid.matmul(w.t()) # L0 norm perturbation k = int(eps) deviation = torch.sum(torch.topk(weight_abs, k)[0], dim=1) * ratio lower, upper = center - deviation + lb, center + deviation + ub return (lower, upper) def interval_propagate_with_weight(self, *v): input_norm, input_eps = Interval.get_perturbation(v[0]) weight_norm, weight_eps = Interval.get_perturbation(v[1]) if input_norm == torch.inf and weight_norm == torch.inf: # A memory-efficient implementation without expanding all the elementary multiplications if self.opt_matmul == 'economic': x_l, x_u = v[0][0], v[0][1] y_l, y_u = v[1][0].transpose(-1, -2), v[1][1].transpose(-1, -2) dx, dy = F.relu(x_u - x_l), F.relu(y_u - y_l) base = x_l.matmul(y_l) mask_xp, mask_xn = (x_l > 0).to(x_l.dtype), (x_u < 0).to(x_u.dtype) mask_xpn = 1 - mask_xp - mask_xn mask_yp, mask_yn = (y_l > 0).to(y_l.dtype), (y_u < 0).to(y_u.dtype) mask_ypn = 1 - mask_yp - mask_yn lower, upper = base.clone(), base.clone() lower += dx.matmul(y_l.clamp(max=0)) - (dx * mask_xn).matmul(y_l * mask_ypn) upper += dx.matmul(y_l.clamp(min=0)) + (dx * mask_xp).matmul(y_l * mask_ypn) lower += x_l.clamp(max=0).matmul(dy) - (x_l * mask_xpn).matmul(dy * mask_yn) upper += x_l.clamp(min=0).matmul(dy) + (x_l * mask_xpn).matmul(dy * mask_yp) lower += (dx * mask_xn).matmul(dy * mask_yn) upper += (dx * (mask_xpn + mask_xp)).matmul(dy * (mask_ypn + mask_yp)) else: # Both input data and weight are Linf perturbed (with upper and lower bounds). # We need a x_l, x_u for each row of weight matrix. x_l, x_u = v[0][0].unsqueeze(-2), v[0][1].unsqueeze(-2) y_l, y_u = v[1][0].unsqueeze(-3), v[1][1].unsqueeze(-3) # Reuse the multiplication bounds and sum over results. lower, upper = BoundMul.interval_propagate_both_perturbed(*[(x_l, x_u), (y_l, y_u)]) lower, upper = torch.sum(lower, -1), torch.sum(upper, -1) return lower, upper elif input_norm == torch.inf and weight_norm == 2: # This eps is actually the epsilon per row, as only one row is involved for each output element. eps = weight_eps # Input data and weight are Linf perturbed (with upper and lower bounds). h_L, h_U = v[0] # First, handle non-perturbed weight with Linf perturbed data. center, deviation = BoundLinear._propagate_Linf(v[0], v[1][0]) # Compute the maximal L2 norm of data. Size is [batch, 1]. max_l2 = torch.max(h_L.abs(), h_U.abs()).norm(2, dim=-1).unsqueeze(-1) # Add the L2 eps to bounds. lb, ub = center - deviation - max_l2 * eps, center + deviation + max_l2 * eps return lb, ub else: raise NotImplementedError( "Unsupported perturbation combination: data={}, weight={}".format(input_norm, weight_norm)) @staticmethod @torch.jit.script def bound_forward_mul(x_lw: Tensor, x_lb: Tensor, x_uw: Tensor, x_ub: Tensor, w: Tensor, weight_has_batch: bool = False, swap_x_and_weight: bool = False): w_pos = w.clamp(min=0) w_neg = w.clamp(max=0) if swap_x_and_weight: lw = matmul_maybe_batched(w_pos, x_lw, weight_has_batch) + matmul_maybe_batched(w_neg, x_uw, weight_has_batch) uw = matmul_maybe_batched(w_pos, x_uw, weight_has_batch) + matmul_maybe_batched(w_neg, x_lw, weight_has_batch) lb = matmul_maybe_batched(w_pos, x_lb, weight_has_batch) + matmul_maybe_batched(w_neg, x_ub, weight_has_batch) ub = matmul_maybe_batched(w_pos, x_ub, weight_has_batch) + matmul_maybe_batched(w_neg, x_lb, weight_has_batch) else: lw = matmul_maybe_batched(x_lw, w_pos, weight_has_batch) + matmul_maybe_batched(x_uw, w_neg, weight_has_batch) uw = matmul_maybe_batched(x_uw, w_pos, weight_has_batch) + matmul_maybe_batched(x_lw, w_neg, weight_has_batch) lb = matmul_maybe_batched(x_lb, w_pos, weight_has_batch) + matmul_maybe_batched(x_ub, w_neg, weight_has_batch) ub = matmul_maybe_batched(x_ub, w_pos, weight_has_batch) + matmul_maybe_batched(x_lb, w_neg, weight_has_batch) return lw, lb, uw, ub # w: an optional argument which can be utilized by BoundMatMul def bound_dynamic_forward(self, x, w=None, b=None, C=None, max_dim=None, offset=0): assert not self.transA and self.alpha_linear == 1.0 and self.transB and self.beta_linear == 1.0 assert not self.is_input_perturbed(1) assert not self.is_input_perturbed(2) weight = w.lb bias = b.lb if b is not None else None if C is not None: weight = C.to(weight).matmul(weight).transpose(-1, -2) if bias is not None: bias = C.to(bias).matmul(bias) lb = x.lb.unsqueeze(1) else: weight = weight.transpose(-1, -2) lb = x.lb w_new = x.lw.matmul(weight) b_new = lb.matmul(weight) if C is not None: b_new = b_new.squeeze(1) if bias is not None: b_new += bias return LinearBound(w_new, b_new, w_new, b_new, x_L=x.x_L, x_U=x.x_U, tot_dim=x.tot_dim) # w: an optional argument which can be utilized by BoundMatMul def bound_forward(self, dim_in, x, w=None, b=None, C=None, weight_has_batch=False): has_bias = b is not None #FIXME _preprocess can only be applied to tensors so far but not linear bounds. x, w, b = self._preprocess(x, w, b) # Shape of x: (B, s_k, s_{k-1}, ..., s_1, m, n) # Shape of w: (s_l, s_{l-1}, ..., s_1, p, n) or (B, s_k, s_{k-1}, ..., s_1, p, n) if weight_has_batch # Forward pass: (B, s_k, s_{k-1}, ..., s_1, m, n) @ (s_l, s_{l-1}, ..., s_1, p, n)^T # Here, the transpose of w means transposing the last two dimensions of w. # Case #1: No weight/bias perturbation, only perturbation on input. if ((not self.is_input_perturbed(0) or not self.is_input_perturbed(1)) and (not has_bias or not self.is_input_perturbed(2))): if isinstance(w, LinearBound): w = w.lower if isinstance(b, LinearBound): b = b.lower if C is not None: w = C.to(w).matmul(w).transpose(-1, -2) if b is not None: b = C.to(b).matmul(b) x_lb, x_ub = x.lb.unsqueeze(1), x.ub.unsqueeze(1) else: w = w.transpose(-1, -2) x_lb, x_ub = x.lb, x.ub lw, lb, uw, ub = BoundLinear.bound_forward_mul( x.lw, x_lb, x.uw, x_ub, w, weight_has_batch, swap_x_and_weight=self.is_input_perturbed(1)) if C is not None: lb, ub = lb.squeeze(1), ub.squeeze(1) if b is not None: lb += b ub += b # Case #2: weight is perturbed. bias may or may not be perturbed. elif self.is_input_perturbed(1): if C is not None: raise NotImplementedError res = self.bound_forward_with_weight(dim_in, x, w) if has_bias: raise NotImplementedError lw, lb, uw, ub = res.lw, res.lb, res.uw, res.ub # Case 3: Only bias is perturbed, weight is not perturbed. elif not self.is_input_perturbed(1) and has_bias and self.is_input_perturbed(2): raise NotImplementedError return LinearBound(lw, lb, uw, ub) def bound_forward_with_weight(self, dim_in, x, y): # x has shape (B, s_k, s_{k-1}, ..., s_1, m, n) # y has shape (B, s_k, s_{k-1}, ..., s_1, p, n) # We need to reshape x and y to (B, s_k, s_{k-1}, ..., s_1, m, 1, n) # and (B, s_k, s_{k-1}, ..., s_1, 1, p, n) # respectively. # Then we can use the bound_forward_mul function to compute the bounds # for element-wise multiplication and sum over the last dimension. # The result will have shape (B, s_k, s_{k-1}, ..., s_1, m, p) x_unsqueeze = LinearBound( x.lw.unsqueeze(-2), x.lb.unsqueeze(-2), x.uw.unsqueeze(-2), x.ub.unsqueeze(-2), x.lower.unsqueeze(-2), x.upper.unsqueeze(-2), ) y_unsqueeze = LinearBound( y.lw.unsqueeze(-3), y.lb.unsqueeze(-3), y.uw.unsqueeze(-3), y.ub.unsqueeze(-3), y.lower.unsqueeze(-3), y.upper.unsqueeze(-3), ) res_mul = BoundMul.bound_forward_both_perturbed(self, dim_in, x_unsqueeze, y_unsqueeze) return LinearBound( res_mul.lw.sum(dim=-1) if res_mul.lw is not None else None, res_mul.lb.sum(dim=-1), res_mul.uw.sum(dim=-1) if res_mul.uw is not None else None, res_mul.ub.sum(dim=-1) ) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): has_bias = self is not None and len(v) == 3 # Aggregate a batch of bounds by taking minimum/maximum over the batch dimension. out_lbs = self.lower.min(dim=0).values.detach().cpu().numpy() if self.lower is not None else None out_ubs = self.upper.max(dim=0).values.detach().cpu().numpy() if self.upper is not None else None # current layer weight (out_width, in_width) this_layer_weight = v[1] if self.transB == 0: this_layer_weight = this_layer_weight.transpose(1, 0) #### make sure if this is correct for per-label operations if C is not None: # merge specification C into last layer weights # only last layer has C not None this_layer_weight = C.squeeze(0).mm(this_layer_weight) this_layer_weight = this_layer_weight.detach().cpu().numpy() this_layer_shape = this_layer_weight.shape this_layer_bias = None if has_bias: # current layer bias (out_width,) this_layer_bias = v[2] if C is not None: this_layer_bias = C.squeeze(0).mm(this_layer_bias.unsqueeze(-1)).view(-1) this_layer_bias = this_layer_bias.detach().cpu().numpy() new_layer_gurobi_vars = [] for neuron_idx in range(this_layer_shape[0]): out_lb = out_lbs[neuron_idx] if out_lbs is not None else -float('inf') out_ub = out_ubs[neuron_idx] if out_ubs is not None else float('inf') if out_lbs is not None and out_ubs is not None: """ If the inferred lb and ub are too close, it could lead to floating point disagreement between solver's inferred lb and ub constraints and the computed ones from ab-crown. Such disagreement can lead to "infeasible" result from the solver for feasible problem. Also, prevent lb to be larger than ub due to the floating point issue. To avoid so, we relax the box constraints. This should not affect the solver's result correctness, since the tighter lb and ub can be inferred by the solver. """ if out_lb != float('-inf') and out_ub != float('inf'): diff = out_ub - out_lb avg = (out_ub + out_lb) / 2.0 condition = (diff < EPS) out_lb = np.where(condition, avg - EPS / 2.0, out_lb) out_ub = np.where(condition, avg + EPS / 2.0, out_ub) lin_expr = 0 if has_bias: lin_expr = this_layer_bias[neuron_idx].item() coeffs = this_layer_weight[neuron_idx, :] if solver_pkg == 'gurobi': lin_expr += grb.LinExpr(coeffs, v[0]) else: # FIXME (01/12/22): This is slow, must be fixed using addRow() or similar. for i in range(len(coeffs)): try: lin_expr += coeffs[i] * v[0][i] except TypeError: lin_expr += coeffs[i] * v[0][i].var var = model.addVar(lb=out_lb, ub=out_ub, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'lay{self.name}_{neuron_idx}') model.addConstr(lin_expr == var, name=f'lay{self.name}_{neuron_idx}_eq') new_layer_gurobi_vars.append(var) self.solver_vars = new_layer_gurobi_vars model.update() def build_gradient_node(self, grad_upstream): if not self.is_input_perturbed(1): if isinstance(self.inputs[1], BoundParams): w = self.inputs[1].param elif isinstance(self.inputs[1], BoundBuffers): w = self.inputs[1].buffer else: w = self.inputs[1].value if not self.transB: w = w.t() node_grad = LinearGrad(w.detach()) return [(node_grad, (grad_upstream,), [])] else: raise NotImplementedError( "Gradient computation for weight perturbation is not supported yet.") def update_requires_input_bounds(self): self._check_weight_perturbation() class BoundMatMul(BoundLinear): # Reuse most functions from BoundLinear. def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.transA = 0 self.transB = 0 self.splittable = True def forward(self, x, y): self.x_shape = x.shape self.y_shape = y.shape return x.matmul(y) def interval_propagate(self, *v, C=None): lower, upper = super().interval_propagate(*v, C=C) return lower, upper def bound_backward(self, last_lA, last_uA, *x, start_node=None, **kwargs): assert len(x) == 2 # Determine if two inputs should be swapped self.swap_x_and_weight = not self.is_input_perturbed(0) and self.is_input_perturbed(1) idx_weight = 0 if self.swap_x_and_weight else 1 if start_node is not None: self._start = start_node.name results = list(super().bound_backward(last_lA, last_uA, *x, **kwargs)) # Transpose weight-related tensors def transpose_weight(A_weight): return A_weight.transpose(-1, -2) if A_weight is not None else None results[0][idx_weight] = (transpose_weight(results[0][idx_weight][0]), transpose_weight(results[0][idx_weight][1])) if isinstance(results[1], tuple): lbias = (results[1][0], results[1][1].transpose(-1, -2)) else: lbias = results[1] if isinstance(results[2], tuple): ubias = (results[2][0], results[2][1].transpose(-1, -2)) else: ubias = results[2] # Reduce the broadcast dimensions lA_x = self.broadcast_backward(results[0][0][0], x[0]) uA_x = self.broadcast_backward(results[0][0][1], x[0]) lA_y = self.broadcast_backward(results[0][1][0], x[1]) uA_y = self.broadcast_backward(results[0][1][1], x[1]) return [(lA_x, uA_x), (lA_y, uA_y), results[0][2]], lbias, ubias def bound_forward(self, dim_in, x, y): def _bound_forward(x, y, weight_index=1): # We assume that x is perturbed and y is not perturbed (weight). weight_has_batch = (self.inputs[weight_index].batch_dim != -1) return super(BoundMatMul, self).bound_forward(dim_in, x, LinearBound( y.lw.transpose(-1, -2) if y.lw is not None else None, y.lb.transpose(-1, -2) if y.lb is not None else None, y.uw.transpose(-1, -2) if y.uw is not None else None, y.ub.transpose(-1, -2) if y.ub is not None else None, y.lower.transpose(-1, -2) if y.lower is not None else None, y.upper.transpose(-1, -2) if y.upper is not None else None ), weight_has_batch=weight_has_batch) # Check if we need to swap x and y if not self.is_input_perturbed(0) and self.is_input_perturbed(1): return _bound_forward(y, x, weight_index=0) else: return _bound_forward(x, y, weight_index=1) def update_requires_input_bounds(self): # If any multiplier is a constant, we do not need input bounds. self.is_linear_op = not self.inputs[1].perturbed or not self.inputs[0].perturbed if self.is_linear_op: # One input is constant; no bounds required. self.requires_input_bounds = [] self.splittable = False else: # Both inputs are perturbed. Need relaxation. self.requires_input_bounds = [0, 1] if not self.force_not_splittable: self.splittable = True class BoundNeg(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.ibp_intermediate = True def forward(self, x): return -x def bound_backward(self, last_lA, last_uA, x, **kwargs): if type(last_lA) == Tensor or type(last_uA) == Tensor: return [(-last_lA if last_lA is not None else None, -last_uA if last_uA is not None else None)], 0, 0 elif type(last_lA) == Patches or type(last_uA) == Patches: if last_lA is not None: lA = Patches(-last_lA.patches, last_lA.stride, last_lA.padding, last_lA.shape, unstable_idx=last_lA.unstable_idx, output_shape=last_lA.output_shape) else: lA = None if last_uA is not None: uA = Patches(-last_uA.patches, last_uA.stride, last_uA.padding, last_uA.shape, unstable_idx=last_uA.unstable_idx, output_shape=last_uA.output_shape) else: uA = None return [(lA, uA)], 0, 0 else: raise NotImplementedError def bound_forward(self, dim_in, x): return LinearBound(-x.uw, -x.ub, -x.lw, -x.lb) def interval_propagate(self, *v): return -v[0][1], -v[0][0] def build_gradient_node(self, grad_upstream): return [(NegGrad(), (grad_upstream,), [])] class NegGrad(Module): def forward(self, grad_last): return -grad_last class BoundCumSum(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.use_default_ibp = True def forward(self, x, axis): self.axis = axis return torch.cumsum(x, axis) class BoundIdentity(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.use_default_ibp = True def forward(self, x): return x def bound_backward(self, last_lA, last_uA, x, **kwargs): return [(last_lA, last_uA)], 0, 0 def bound_forward(self, dim_in, x): return x class LinearGrad(Module): def __init__(self, weight): super().__init__() self.weight = weight def forward(self, grad_last): weight = self.weight.to(grad_last).t() return F.linear(grad_last, weight) class MatMulGrad(Module): def forward(self, grad_last, x): return grad_last.matmul(x.transpose(-1, -2)) ================================================ FILE: auto_LiRPA/operators/logical.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Logical operators""" from .base import * class BoundWhere(Bound): def forward(self, condition, x, y): return torch.where(condition.to(torch.bool), x, y) def interval_propagate(self, *v): assert not self.is_input_perturbed(0) condition = v[0][0] return tuple([torch.where(condition, v[1][j], v[2][j]) for j in range(2)]) def bound_backward(self, last_lA, last_uA, condition, x, y, **kwargs): assert torch.allclose(condition.lower.float(), condition.upper.float()) assert self.from_input mask = condition.lower.float() def _bound_oneside(last_A): if last_A is None: return None, None assert last_A.ndim > 1 A_x = self.broadcast_backward(mask.unsqueeze(0) * last_A, x) A_y = self.broadcast_backward((1 - mask).unsqueeze(0) * last_A, y) return A_x, A_y lA_x, lA_y = _bound_oneside(last_lA) uA_x, uA_y = _bound_oneside(last_uA) return [(None, None), (lA_x, uA_x), (lA_y, uA_y)], 0, 0 class BoundNot(Bound): def forward(self, x): return x.logical_not() class BoundEqual(Bound): def forward(self, x, y): return x == y ================================================ FILE: auto_LiRPA/operators/minmax.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch from .base import * from .clampmult import multiply_by_A_signs from .activation_base import BoundOptimizableActivation class BoundMinMax(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.options = options self.requires_input_bounds = [0, 1] self.op = None def _init_opt_parameters_impl(self, size_spec, name_start): """Implementation of init_opt_parameters for each start_node.""" l = self.inputs[0].lower # Alpha dimension is (2, output_shape, batch, *shape). shape = [2, size_spec] + list(l.shape) return torch.ones(shape, device=l.device) def clip_alpha(self): # See https://www.overleaf.com/read/jzgrcmqtqpcx#9dbf97 for the math behind this code. lb_x = self._cached_lb_x ub_x = self._cached_ub_x lb_y = self._cached_lb_y ub_y = self._cached_ub_y for v in self.alpha.values(): eps = torch.tensor(1e-6).to(lb_x.dtype) if self.op == 'max': # Case 1: l_x >= u_y case1 = (lb_x >= ub_y).requires_grad_(False).to(lb_x.dtype) alpha_u_lb = torch.zeros_like(case1) alpha_u_ub = torch.zeros_like(case1) alpha_l_lb = torch.zeros_like(case1) alpha_l_ub = torch.zeros_like(case1) # Case 2: l_x < u_y && u_x > u_y case2 = ((lb_x < ub_y) * (ub_x > ub_y)).requires_grad_(False).to(lb_x.dtype) alpha_u_lb += case2 * (ub_x - ub_y) / (ub_x - torch.maximum(lb_x, lb_y)) alpha_u_ub += case2 alpha_l_ub += case2 # Case 3: l_x < u_y && u_x == u_y case3 = ((lb_x < ub_y) * (ub_x == ub_y)).requires_grad_(False).to(lb_x.dtype) alpha_u_ub += case3 alpha_l_ub += case3 alpha_u_lb = torch.clamp(alpha_u_lb, min=eps) alpha_u_ub = torch.clamp(alpha_u_ub, min=eps) elif self.op == 'min': # Case 1: l_y >= u_x case1 = (lb_y >= ub_x).requires_grad_(False).to(lb_x.dtype) alpha_u_lb = torch.zeros_like(case1) alpha_u_ub = torch.zeros_like(case1) alpha_l_lb = torch.zeros_like(case1) alpha_l_ub = torch.zeros_like(case1) # Case 2: l_y < u_x && l_y > l_x case2 = ((lb_y < ub_x) * (lb_y > lb_x)).requires_grad_(False).to(lb_x.dtype) alpha_u_ub += case2 alpha_l_lb += case2 * (lb_y - lb_x) / (torch.minimum(ub_x, ub_y) - lb_x) alpha_l_ub += case2 # Case 3: l_y < u_x && l_y == l_x case3 = ((lb_y < ub_x) * (lb_y == lb_x)).requires_grad_(False).to(lb_x.dtype) alpha_u_ub += case3 alpha_l_ub += case3 alpha_l_lb = torch.clamp(alpha_l_lb, min=eps) alpha_l_ub = torch.clamp(alpha_l_ub, min=eps) v.data[0] = torch.clamp(v.data[0], alpha_u_lb, alpha_u_ub) v.data[1] = torch.clamp(v.data[1], alpha_l_lb, alpha_l_ub) def forward(self, x, y): if self.op == 'max': return torch.max(x, y) elif self.op == 'min': return torch.min(x, y) else: raise NotImplementedError def _backward_relaxation(self, x, y, start_node=None): # See https://www.overleaf.com/read/jzgrcmqtqpcx#9dbf97 for the math behind this code. lb_x = x.lower ub_x = x.upper lb_y = y.lower ub_y = y.upper if self.opt_stage in ['opt', 'reuse']: selected_alpha = self.alpha[start_node.name] alpha_u = selected_alpha[0] alpha_l = selected_alpha[1] else: alpha_u = alpha_l = 1 ub_x = ub_x.unsqueeze(0) ub_y = ub_y.unsqueeze(0) lb_x = lb_x.unsqueeze(0) lb_y = lb_y.unsqueeze(0) if self.op == 'max': swapped_inputs = ub_x < ub_y elif self.op == 'min': swapped_inputs = lb_y < lb_x else: raise NotImplementedError lb_x, lb_y = torch.where(swapped_inputs, lb_y, lb_x), torch.where(swapped_inputs, lb_x, lb_y) ub_x, ub_y = torch.where(swapped_inputs, ub_y, ub_x), torch.where(swapped_inputs, ub_x, ub_y) self._cached_lb_x = lb_x.detach() self._cached_ub_x = ub_x.detach() self._cached_lb_y = lb_y.detach() self._cached_ub_y = ub_y.detach() epsilon = 1e-6 ub_x = torch.max(ub_x, lb_x + epsilon) ub_y = torch.max(ub_y, lb_y + epsilon) # Ideally, if x or y are constant, this layer should be replaced by a ReLU # max{x, c} = max{x − c, 0} + c # min{x, c} = −max{−x, −c} = −(max{−x + c, 0} − c) = −max{−x + c, 0} + c if torch.any(lb_x + 1e-4 >= ub_x) or torch.any(lb_y + 1e-4 >= ub_y): print("Warning: MinMax layer (often used for clamping) received at " "least one input with lower bound almost equal to the upper " "bound. This can happen e.g. if x or y are constants. Consider " "replacing this layer with a ReLU for higher efficieny.") assert torch.all(ub_x != lb_x) and torch.all(ub_y != lb_y), ( 'Lower/upper bounds are too close and epsilon was rounded away. ' 'To fix this, increase epsilon.' ) if isinstance(alpha_u, torch.Tensor): assert alpha_u.shape[1:] == ub_x.shape[1:] shape = alpha_u.shape else: shape = ub_x.shape upper_dx = torch.zeros(shape, device=ub_x.device) upper_dy = torch.zeros(shape, device=ub_x.device) lower_dx = torch.zeros(shape, device=ub_x.device) lower_dy = torch.zeros(shape, device=ub_x.device) upper_b = torch.zeros(shape, device=ub_x.device) lower_b = torch.zeros(shape, device=ub_x.device) if self.op == 'max': # Case 1: l_x >= u_y case1 = (lb_x >= ub_y).requires_grad_(False).to(lb_x.dtype) upper_dx += case1 lower_dx += case1 # Case 2: l_x < u_y && u_x > u_y case2 = ((lb_x < ub_y) * (ub_x > ub_y)).requires_grad_(False).to(lb_x.dtype) upper_dx = upper_dx + case2 * (ub_y - ub_x) / (alpha_u * (lb_x - ub_x)) upper_dy = upper_dy + case2 * (alpha_u - 1) * (ub_y - ub_x) / (alpha_u * (ub_y - lb_y)) upper_b = upper_b + case2 * (ub_x - (ub_x * (ub_y - ub_x)) / (alpha_u * (lb_x - ub_x)) - ((alpha_u - 1) * (ub_y - ub_x) * lb_y) / (alpha_u * (ub_y - lb_y))) lower_dx = lower_dx + case2 * (1 - alpha_l) lower_dy = lower_dy + case2 * alpha_l # Case 3: l_x < u_y && u_x == u_y case3 = ((lb_x < ub_y) * (ub_x == ub_y)).requires_grad_(False).to(lb_x.dtype) upper_dx = upper_dx + case3 * alpha_u * (ub_x - torch.maximum(lb_x, lb_y)) / (ub_x - lb_x) upper_dy = upper_dy + case3 * alpha_u * (ub_x - torch.maximum(lb_x, lb_y)) / (ub_y - lb_y) upper_b = upper_b + case3 * (ub_x - (alpha_u * (ub_x - torch.maximum(lb_x, lb_y)) * lb_x) / (ub_x - lb_x) - (alpha_u * (ub_x - torch.maximum(lb_x, lb_y)) * ub_y) / (ub_y - lb_y)) lower_dx = lower_dx + case3 * (1 - alpha_l) lower_dy = lower_dy + case3 * alpha_l elif self.op == 'min': # Case 1: l_y >= u_x case1 = (lb_y >= ub_x).requires_grad_(False).to(lb_x.dtype) upper_dx = case1.clone() lower_dx = case1.clone() upper_dy = torch.zeros_like(case1) lower_dy = torch.zeros_like(case1) upper_b = torch.zeros_like(case1) lower_b = torch.zeros_like(case1) # Case 2: l_y < u_x && l_y > l_x case2 = ((lb_y < ub_x) * (lb_y > lb_x)).requires_grad_(False).to(lb_x.dtype) upper_dx = upper_dx + case2 * (1 - alpha_u) upper_dy = upper_dy + case2 * alpha_u lower_dx = lower_dx + case2 * (lb_x - lb_y) / (alpha_l * (lb_x - ub_x)) lower_dy = lower_dy + case2 * (alpha_l - 1) * (lb_x - lb_y) / (alpha_l * (ub_y - lb_y)) lower_b = lower_b + case2 * (lb_y - (ub_x * (lb_x - lb_y)) / (alpha_l * (lb_x - ub_x)) - ((alpha_l - 1) * (lb_x - lb_y) * lb_y) / (alpha_l * (ub_y - lb_y))) # Case 3: l_y < u_x && l_y == l_x case3 = ((lb_y < ub_x) * (lb_y == lb_x)).requires_grad_(False).to(lb_x.dtype) upper_dx = upper_dx + case3 * (1 - alpha_u) upper_dy = upper_dy + case3 * alpha_u lower_dx = lower_dx + case3 * alpha_l * (torch.minimum(ub_x, ub_y) - lb_x) / (ub_x - lb_x) lower_dy = lower_dy + case3 * alpha_l * (torch.minimum(ub_x, ub_y) - lb_x) / (ub_y - lb_y) lower_b = lower_b + case3 * (lb_x - (alpha_l * (torch.minimum(ub_x, ub_y) - lb_x) * lb_x) / (ub_x - lb_x) - (alpha_l * (torch.minimum(ub_x, ub_y) - lb_x) * ub_y) / (ub_y - lb_y)) else: raise NotImplementedError lower_dx, lower_dy = torch.where(swapped_inputs, lower_dy, lower_dx), torch.where(swapped_inputs, lower_dx, lower_dy) upper_dx, upper_dy = torch.where(swapped_inputs, upper_dy, upper_dx), torch.where(swapped_inputs, upper_dx, upper_dy) return upper_dx, upper_dy, upper_b, lower_dx, lower_dy, lower_b def bound_backward(self, last_lA, last_uA, x=None, y=None, start_shape=None, start_node=None, **kwargs): # Get element-wise CROWN linear relaxations. upper_dx, upper_dy, upper_b, lower_dx, lower_dy, lower_b = \ self._backward_relaxation(x, y, start_node) # Choose upper or lower bounds based on the sign of last_A def _bound_oneside(last_A, d_pos, d_neg, b_pos, b_neg): if last_A is None: return None, 0 # Obtain the new linear relaxation coefficients based on the signs in last_A. _A, _bias = multiply_by_A_signs(last_A, d_pos, d_neg, b_pos, b_neg) if isinstance(last_A, Patches): # Save the patch size, which will be used in init_slope() to determine the number of optimizable parameters. A_prod = _A.patches if start_node is not None: # Regular patches. self.patch_size[start_node.name] = A_prod.size() return _A, _bias # In patches mode we might need an unfold. # lower_dx, lower_dy, upper_dx, upper_dy, lower_b, upper_b: 1, batch, current_c, current_w, current_h or None # In _backward_relaxation, the lb_x etc. potentially got swapped. This may cause the memory to become # non-contiguous. This is not a problem if the spec_size is 1, e.g. if alphas are shared. upper_dx = upper_dx.contiguous() upper_dy = upper_dy.contiguous() lower_dx = lower_dx.contiguous() lower_dy = lower_dy.contiguous() upper_b = upper_b.contiguous() lower_b = lower_b.contiguous() upper_dx = maybe_unfold_patches(upper_dx, last_lA if last_lA is not None else last_uA) upper_dy = maybe_unfold_patches(upper_dy, last_lA if last_lA is not None else last_uA) lower_dx = maybe_unfold_patches(lower_dx, last_lA if last_lA is not None else last_uA) lower_dy = maybe_unfold_patches(lower_dy, last_lA if last_lA is not None else last_uA) upper_b = maybe_unfold_patches(upper_b, last_lA if last_lA is not None else last_uA) lower_b = maybe_unfold_patches(lower_b, last_lA if last_lA is not None else last_uA) uAx, ubias = _bound_oneside(last_uA, upper_dx, lower_dx, upper_b, lower_b) uAy, ubias2 = _bound_oneside(last_uA, upper_dy, lower_dy, upper_b, lower_b) if isinstance(ubias, torch.Tensor): assert isinstance(ubias2, torch.Tensor) assert torch.all(ubias == ubias2) else: assert ubias == ubias2 == 0 lAx, lbias = _bound_oneside(last_lA, lower_dx, upper_dx, lower_b, upper_b) lAy, lbias2 = _bound_oneside(last_lA, lower_dy, upper_dy, lower_b, upper_b) if isinstance(lbias, torch.Tensor): assert isinstance(lbias2, torch.Tensor) assert torch.all(lbias == lbias2) else: assert lbias == lbias2 == 0 return [(lAx, uAx), (lAy, uAy)], lbias, ubias def interval_propagate(self, *v): h_Lx, h_Ux = v[0][0], v[0][1] h_Ly, h_Uy = v[1][0], v[1][1] return self.forward(h_Lx, h_Ly), self.forward(h_Ux, h_Uy) class BoundMax(BoundMinMax): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.op = 'max' class BoundMin(BoundMinMax): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.op = 'min' ================================================ FILE: auto_LiRPA/operators/normalization.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Normalization operators""" import copy import torch import torch.nn as nn from .base import * from .constant import BoundConstant from .leaf import BoundParams from .solver_utils import grb class BoundBatchNormalization(Bound): def __init__(self, attr, inputs, output_index, options, training): super().__init__(attr, inputs, output_index, options) self.eps = attr['epsilon'] self.momentum = round(1 - attr['momentum'], 5) # take care! self.options = options.get("bn", {}) # modes: # - forward: use mean and variance estimated from clean forward pass # - ibp: use mean and variance estimated from ibp self.bn_mode = self.options.get("mode", "forward") self.use_mean = self.options.get("mean", True) self.use_var = self.options.get("var", True) self.use_affine = self.options.get("affine", True) self.training = training self.patches_start = True self.mode = options.get("conv_mode", "matrix") if not self.use_mean or not self.use_var: logger.info(f'Batch normalization node {self.name}: use_mean {self.use_mean}, use_var {self.use_var}') def _check_unused_mean_or_var(self): # Check if either mean or var is opted out if not self.use_mean: self.current_mean = torch.zeros_like(self.current_mean) if not self.use_var: self.current_var = torch.ones_like(self.current_var) def forward(self, x, w, b, m, v): if len(x.shape) == 2: self.patches_start = False if self.training: dim = [0] + list(range(2, x.ndim)) self.current_mean = x.mean(dim) self.current_var = x.var(dim, unbiased=False) else: self.current_mean = m.data self.current_var = v.data self._check_unused_mean_or_var() if not self.use_affine: w = torch.ones_like(w) b = torch.zeros_like(b) result = F.batch_norm(x, m, v, w, b, self.training, self.momentum, self.eps) if not self.use_mean or not self.use_var: # If mean or variance is disabled, recompute the output from self.current_mean # and self.current_var instead of using standard F.batch_norm. w = w / torch.sqrt(self.current_var + self.eps) b = b - self.current_mean * w shape = (1, -1) + (1,) * (x.ndim - 2) result = w.view(*shape) * x + b.view(*shape) return result def bound_forward(self, dim_in, *x): inp = x[0] assert (x[1].lower == x[1].upper).all(), "unsupported forward bound with perturbed mean" assert (x[2].lower == x[2].upper).all(), "unsupported forward bound with perturbed var" weight, bias = x[1].lower, x[2].lower if not self.training: assert (x[3].lower == x[3].upper).all(), "unsupported forward bound with perturbed mean" assert (x[4].lower == x[4].upper).all(), "unsupported forward bound with perturbed var" self.current_mean = x[3].lower self.current_var = x[4].lower self._check_unused_mean_or_var() if not self.use_affine: weight = torch.ones_like(weight) bias = torch.zeros_like(bias) tmp_bias = bias - self.current_mean / torch.sqrt(self.current_var + self.eps) * weight tmp_weight = weight / torch.sqrt(self.current_var + self.eps) tmp_weight = tmp_weight.view(*((1, 1, -1) + (1,) * (inp.lw.ndim - 3))) new_lw = torch.clamp(tmp_weight, min=0.) * inp.lw + torch.clamp(tmp_weight, max=0.) * inp.uw new_uw = torch.clamp(tmp_weight, min=0.) * inp.uw + torch.clamp(tmp_weight, max=0.) * inp.lw tmp_weight = tmp_weight.view(*((1, -1) + (1,) * (inp.lb.ndim - 2))) tmp_bias = tmp_bias.view(*((1, -1) + (1,) * (inp.lb.ndim - 2))) new_lb = torch.clamp(tmp_weight, min=0.) * inp.lb + torch.clamp(tmp_weight, max=0.) * inp.ub + tmp_bias new_ub = torch.clamp(tmp_weight, min=0.) * inp.ub + torch.clamp(tmp_weight, max=0.) * inp.lb + tmp_bias return LinearBound( lw = new_lw, lb = new_lb, uw = new_uw, ub = new_ub) def bound_backward(self, last_lA, last_uA, *x, **kwargs): assert not self.is_input_perturbed(1) and not self.is_input_perturbed(2), \ 'Weight perturbation is not supported for BoundBatchNormalization' def get_param(p): if isinstance(p, BoundConstant): # When affine is disabled in BN return p.value elif isinstance(p, BoundParams): return p.param else: raise TypeError(p) # x[0]: input, x[1]: weight, x[2]: bias, x[3]: running_mean, x[4]: running_var weight = get_param(x[1]) bias = get_param(x[2]) if not self.training: self.current_mean = x[3].value self.current_var = x[4].value self._check_unused_mean_or_var() if not self.use_affine: weight = torch.ones_like(weight) bias = torch.zeros_like(bias) tmp_bias = bias - self.current_mean / torch.sqrt(self.current_var + self.eps) * weight tmp_weight = weight / torch.sqrt(self.current_var + self.eps) def _bound_oneside(last_A): if last_A is None: return None, 0 if type(last_A) == Tensor: next_A = last_A * tmp_weight.view(*((1, 1, -1) + (1,) * (last_A.ndim - 3))) if last_A.ndim > 3: sum_bias = (last_A.sum(tuple(range(3, last_A.ndim))) * tmp_bias).sum(2) else: sum_bias = (last_A * tmp_bias).sum(2) elif type(last_A) == Patches: # TODO Only 4-dim BN supported in the Patches mode if last_A.identity == 0: # FIXME (09/17): Need to check if it has already been padding. # Patch has dimension (out_c, batch, out_h, out_w, c, h, w) or (unstable_size, batch, c, h, w) patches = last_A.patches # tmp_weight has shape (c,), it will be applied on the (c,) dimension. patches = patches * tmp_weight.view(*([1] * (patches.ndim - 3)), -1, 1, 1) # Match with sparse or non-sparse patches. next_A = last_A.create_similar(patches) # bias to size (c,), need expansion before unfold. bias = tmp_bias.view(-1,1,1).expand(self.input_shape[1:]).unsqueeze(0) # Unfolded bias has shape (1, out_h, out_w, in_c, H, W). bias_unfolded = inplace_unfold(bias, kernel_size=last_A.patches.shape[-2:], padding=last_A.padding, stride=last_A.stride, inserted_zeros=last_A.inserted_zeros, output_padding=last_A.output_padding) if last_A.unstable_idx is not None: # Sparse bias has shape (unstable_size, batch, in_c, H, W). bias_unfolded = bias_unfolded[:, last_A.unstable_idx[1], last_A.unstable_idx[2]] sum_bias = torch.einsum('bschw,sbchw->sb', bias_unfolded, last_A.patches) # Output sum_bias has shape (unstable_size, batch). else: # Patch has dimension (out_c, batch, out_h, out_w, c, h, w). sum_bias = torch.einsum('bijchw,sbijchw->sbij', bias_unfolded, last_A.patches) # Output sum_bias has shape (out_c, batch, out_h, out_w). else: # we should create a real identity Patch num_channel = tmp_weight.numel() # desired Shape is (c, batch, out_w, out_h, c, 1, 1) or (unstable_size, batch, c, 1, 1). patches = (torch.eye(num_channel, device=tmp_weight.device) * tmp_weight.view(-1)).view(num_channel, 1, 1, 1, num_channel, 1, 1) # Expand out_h, out_w dimensions but not for batch dimension. patches = patches.expand(-1, -1, last_A.output_shape[2], last_A.output_shape[3], -1, 1, 1) if last_A.unstable_idx is not None: # Select based on unstable indices. patches = patches[last_A.unstable_idx[0], :, last_A.unstable_idx[1], last_A.unstable_idx[2]] # Expand the batch dimension. patches = patches.expand(-1, last_A.shape[1], *([-1] * (patches.ndim - 2))) next_A = last_A.create_similar(patches, stride=1, padding=0, identity=0) if last_A.unstable_idx is not None: # Need to expand the bias and choose the selected ones. bias = tmp_bias.view(-1,1,1,1).expand(-1, 1, last_A.output_shape[2], last_A.output_shape[3]) bias = bias[last_A.unstable_idx[0], :, last_A.unstable_idx[1], last_A.unstable_idx[2]] # Expand the batch dimension, and final output shape is (unstable_size, batch). sum_bias = bias.expand(-1, last_A.shape[1]) else: # Output sum_bias has shape (out_c, batch, out_h, out_w). sum_bias = tmp_bias.view(-1, 1, 1, 1).expand(-1, *last_A.shape[1:4]) else: raise NotImplementedError() return next_A, sum_bias lA, lbias = _bound_oneside(last_lA) uA, ubias = _bound_oneside(last_uA) return [(lA, uA), (None, None), (None, None), (None, None), (None, None)], lbias, ubias def interval_propagate(self, *v): assert not self.is_input_perturbed(1) and not self.is_input_perturbed(2), \ 'Weight perturbation is not supported for BoundBatchNormalization' h_L, h_U = v[0] weight, bias = v[1][0], v[2][0] mid = (h_U + h_L) / 2.0 diff = (h_U - h_L) / 2.0 # Use `mid` in IBP to compute mean and variance for BN. # In this case, `forward` should not have been called. if self.bn_mode == 'ibp' and not hasattr(self, 'forward_value'): m, v, w, b = tuple(self.inputs[i].forward() for i in range(1, 5)) self.forward(mid, m, v, w, b) if not self.training: assert not (self.is_input_perturbed(3) or self.is_input_perturbed(4)) self.current_mean = v[3][0] self.current_var = v[4][0] self._check_unused_mean_or_var() if not self.use_affine: weight = torch.ones_like(weight) bias = torch.zeros_like(bias) tmp_weight = weight / torch.sqrt(self.current_var + self.eps) tmp_weight_abs = tmp_weight.abs() tmp_bias = bias - self.current_mean * tmp_weight shape = (1, -1) + (1,) * (mid.ndim - 2) # interval_propagate() of the Linear layer may encounter input with different norms. norm, eps = Interval.get_perturbation(v[0])[:2] if norm == torch.inf: center = tmp_weight.view(*shape) * mid + tmp_bias.view(*shape) deviation = tmp_weight_abs.view(*shape) * diff elif norm > 0: mid = v[0][0] center = tmp_weight.view(*shape) * mid + tmp_bias.view(*shape) if norm == 2: ptb = copy.deepcopy(v[0].ptb) ptb.eps = eps * tmp_weight_abs.max() return Interval(center, center, ptb=ptb) else: # General Lp norm. center = tmp_weight.view(*shape) * mid deviation = tmp_weight_abs.view(*shape) * eps # use a Linf ball to replace Lp norm else: raise NotImplementedError lower, upper = center - deviation, center + deviation return lower, upper def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): # e.g., last layer input gurobi vars (3,32,32) gvars_array = np.array(v[0]) # pre_layer_shape (1,3,32,32) pre_layer_shape = np.expand_dims(gvars_array, axis=0).shape # this layer shape (1,8,16,16) this_layer_shape = self.output_shape weight, bias = v[1], v[2] self.current_mean = v[3] self.current_var = v[4] self._check_unused_mean_or_var() if not self.use_affine: weight = torch.ones_like(weight) bias = torch.zeros_like(bias) tmp_bias = bias - self.current_mean / torch.sqrt(self.current_var + self.eps) * weight tmp_weight = weight / torch.sqrt(self.current_var + self.eps) new_layer_gurobi_vars = [] neuron_idx = 0 for out_chan_idx in range(this_layer_shape[1]): out_chan_vars = [] for out_row_idx in range(this_layer_shape[2]): out_row_vars = [] for out_col_idx in range(this_layer_shape[3]): # print(this_layer_bias.shape, out_chan_idx, out_lbs.size(1)) lin_expr = tmp_bias[out_chan_idx].item() + tmp_weight[out_chan_idx].item() * gvars_array[out_chan_idx, out_row_idx, out_col_idx] var = model.addVar(lb=-float('inf'), ub=float('inf'), obj=0, vtype=grb.GRB.CONTINUOUS, name=f'lay{self.name}_{neuron_idx}') model.addConstr(lin_expr == var, name=f'lay{self.name}_{neuron_idx}_eq') neuron_idx += 1 out_row_vars.append(var) out_chan_vars.append(out_row_vars) new_layer_gurobi_vars.append(out_chan_vars) self.solver_vars = new_layer_gurobi_vars model.update() def update_requires_input_bounds(self): self._check_weight_perturbation() class LayerNormImpl(nn.Module): def __init__(self, axis, epsilon): super().__init__() self.axis = axis self.epsilon = epsilon def forward(self, x, scale, bias): mean = x.mean(self.axis, keepdim=True) d = x - mean dd = d**2 var = dd.mean(self.axis, keepdim=True) var_eps = var + self.epsilon std_dev = torch.sqrt(var_eps) inv_std_dev = torch.reciprocal(std_dev) normalized = d * inv_std_dev normalized_scaled = normalized * scale + bias return normalized_scaled class BoundLayerNormalization(Bound): def __init__(self, attr, inputs, output_index, options): super().__init__(attr, inputs, output_index, options) self.complex = True self.model = LayerNormImpl(self.attr['axis'], self.attr['epsilon']) def forward(self, x, scale, bias): self.input = (x, scale, bias) return self.model(x, scale, bias) ================================================ FILE: auto_LiRPA/operators/pooling.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """Pooling operators.""" from collections import OrderedDict from .base import * from .activation_base import BoundOptimizableActivation import numpy as np from .solver_utils import grb class BoundMaxPool(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) assert ('pads' not in attr) or (attr['pads'][0] == attr['pads'][2]) assert ('pads' not in attr) or (attr['pads'][1] == attr['pads'][3]) self.requires_input_bounds = [0] self.kernel_size = attr['kernel_shape'] self.stride = attr['strides'] self.padding = [attr['pads'][0], attr['pads'][1]] self.ceil_mode = False self.use_default_ibp = True self.alpha = {} self.init = {} def forward(self, x): output, _ = F.max_pool2d(x, self.kernel_size, self.stride, self.padding, return_indices=True, ceil_mode=self.ceil_mode) return output def project_simplex(self, patches): sorted = torch.flatten(patches, -2) sorted, _ = torch.sort(sorted, -1, descending=True) rho_sum = torch.cumsum(sorted, -1) rho_value = 1 - rho_sum rho_value = (sorted + rho_value/torch.tensor( range(1, sorted.size(-1)+1), dtype=torch.float, device=sorted.device)) > 0 _, rho_index = torch.max(torch.cumsum(rho_value, -1), -1) rho_sum = torch.gather(rho_sum, -1, rho_index.unsqueeze(-1)).squeeze(-1) lbd = 1/(rho_index+1)* (1-rho_sum) return torch.clamp(patches + lbd.unsqueeze(-1).unsqueeze(-1), min=0) def _init_opt_parameters_impl(self, size_spec, name_start): if name_start == '_forward': warnings.warn("MaxPool's optimization is not supported for forward mode") return None ref = self.inputs[0].lower # a reference variable for getting the shape alpha = torch.empty( [1, size_spec, self.input_shape[0], self.input_shape[1], self.output_shape[-2], self.output_shape[-1], self.kernel_size[0], self.kernel_size[1]], dtype=torch.float, device=ref.device, requires_grad=True) self.init[name_start] = False return alpha @staticmethod @torch.jit.script def jit_mutiply(Apos, Aneg, pos, neg): return pos.contiguous() * Apos + neg.contiguous() * Aneg def bound_backward(self, last_lA, last_uA, x, start_node=None, unstable_idx=None, **kwargs): # self.padding is a tuple of two elements: (height dimension padding, width dimension padding). paddings = tuple((self.padding[0], self.padding[0], self.padding[1], self.padding[1])) if self.stride[0] != self.kernel_size[0]: raise ValueError("self.stride ({}) != self.kernel_size ({})".format(self.stride, self.kernel_size)) shape = self.input_shape batch_size = x.lower.shape[0] shape = list(shape[:-2]) + [a + 2*b for a, b in zip(self.input_shape[-2:], self.padding)] shape[0] = batch_size # Lower and upper D matrices. They have size (batch_size, input_c, x, y) which will be multiplied on enlarges the A matrices via F.interpolate. upper_d = torch.zeros(shape, device=x.device) lower_d = None # Size of upper_b and lower_b: (batch_size, output_c, h, w). upper_b = torch.zeros(batch_size, *self.output_shape[1:], device=x.device) lower_b = torch.zeros(batch_size, *self.output_shape[1:], device=x.device) # Find the maxpool neuron whose input bounds satisfy l_i > max_j u_j for all j != i. In this case, the maxpool neuron is linear, and we can set upper_d = lower_d = 1. # We first find which indices has the largest lower bound. max_lower, max_lower_index = F.max_pool2d( x.lower, self.kernel_size, self.stride, self.padding, return_indices=True, ceil_mode=self.ceil_mode) # Set the upper bound of the i-th input to -inf so it will not be selected as the max. if paddings == (0,0,0,0): delete_upper = torch.scatter( torch.flatten(x.upper, -2), -1, torch.flatten(max_lower_index, -2), -torch.inf).view(upper_d.shape) else: delete_upper = torch.scatter( torch.flatten(F.pad(x.upper, paddings), -2), -1, torch.flatten(max_lower_index, -2), -torch.inf).view(upper_d.shape) # Find the the max upper bound over the remaining ones. max_upper, _ = F.max_pool2d( delete_upper, self.kernel_size, self.stride, 0, return_indices=True, ceil_mode=self.ceil_mode) # The upper bound slope for maxpool is either 1 on input satisfies l_i > max_j u_j (linear), or 0 everywhere. Upper bound is not optimized. values = torch.zeros_like(max_lower) values[max_lower >= max_upper] = 1.0 upper_d = torch.scatter( torch.flatten(upper_d, -2), -1, torch.flatten(max_lower_index, -2), torch.flatten(values, -2)).view(upper_d.shape) if self.opt_stage == 'opt': if unstable_idx is not None and self.alpha[start_node.name].size(1) != 1: if isinstance(unstable_idx, tuple): raise NotImplementedError('Please use --conv_mode matrix') elif unstable_idx.ndim == 1: # Only unstable neurons of the start_node neurons are used. alpha = self.non_deter_index_select( self.alpha[start_node.name], index=unstable_idx, dim=1) elif unstable_idx.ndim == 2: # Each element in the batch selects different neurons. alpha = batched_index_select( self.alpha[start_node.name], index=unstable_idx, dim=1) else: raise ValueError else: alpha = self.alpha[start_node.name] if not self.init[start_node.name]: lower_d = torch.zeros((shape), device=x.device) # [batch, C, H, W] lower_d = torch.scatter( torch.flatten(lower_d, -2), -1, torch.flatten(max_lower_index, -2), 1.0).view(upper_d.shape) # shape [batch, C*k*k, L] lower_d_unfold = F.unfold( lower_d, self.kernel_size, 1, stride=self.stride) # [batch, C, k, k, out_H, out_W] alpha_data = lower_d_unfold.view( lower_d.shape[0], lower_d.shape[1], self.kernel_size[0], self.kernel_size[1], self.output_shape[-2], self.output_shape[-1]) # [batch, C, out_H, out_W, k, k] alpha.data.copy_(alpha_data.permute((0,1,4,5,2,3)).clone().detach()) self.init[start_node.name] = True # In optimization mode, we use the same lower_d once builded. if self.padding[0] > 0 or self.padding[1] > 0: lower_d = lower_d[...,self.padding[0]:-self.padding[0], self.padding[1]:-self.padding[1]] # The lower bound coefficients must be positive and projected to an unit simplex. alpha.data = self.project_simplex(alpha.data).clone().detach() # TODO: don't do this, never re-assign the .data property. Use copy_ instead. # permute the last 6 dimensions of alpha to [batch, C, k, k, out_H, out_W], which prepares for the unfold operation. alpha = alpha.permute((0,1,2,3,6,7,4,5)) alpha_shape = alpha.shape alpha = alpha.reshape((alpha_shape[0]*alpha_shape[1]*alpha_shape[2], -1, alpha_shape[-2]*alpha_shape[-1])) lower_d = F.fold(alpha, self.input_shape[-2:], self.kernel_size, 1, self.padding, self.stride) lower_d = lower_d.view(alpha_shape[0], alpha_shape[1], alpha_shape[2], *lower_d.shape[1:]) lower_d = lower_d.squeeze(0) else: lower_d = torch.zeros((shape), device=x.device) # Not optimizable bounds. We simply set \hat{z} >= z_i where i is the input element with largest lower bound. lower_d = torch.scatter(torch.flatten(lower_d, -2), -1, torch.flatten(max_lower_index, -2), 1.0).view(upper_d.shape) if self.padding[0] > 0 or self.padding[1] > 0: lower_d = lower_d[...,self.padding[0]:-self.padding[0], self.padding[1]:-self.padding[1]] # For the upper bound, we set the bias term to concrete upper bounds for maxpool neurons that are not linear. max_upper_, _ = F.max_pool2d(x.upper, self.kernel_size, self.stride, self.padding, return_indices=True, ceil_mode=self.ceil_mode) upper_b[max_upper > max_lower] = max_upper_[max_upper > max_lower] def _bound_oneside(last_A, d_pos, d_neg, b_pos, b_neg): if last_A is None: return None, 0 bias = 0 if isinstance(last_A, torch.Tensor): pos_A = last_A.clamp(min=0) neg_A = last_A.clamp(max=0) if b_pos is not None: # This is matrix mode, and padding is considered in the previous layers bias = bias + self.get_bias(pos_A, b_pos) if b_neg is not None: bias = bias + self.get_bias(neg_A, b_neg) # Here we should comfirm that the maxpool patches are not overlapped. shape = last_A.size() padding = [self.padding[0], self.padding[0], self.padding[1], self.padding[1]] d_pos = F.pad(d_pos, padding) d_neg = F.pad(d_neg, padding) pos_A = F.interpolate( pos_A.view(shape[0] * shape[1], *shape[2:]), scale_factor=self.kernel_size) if d_pos.shape[-2] > pos_A.shape[-2] or d_pos.shape[-1] > pos_A.shape[-1]: if not (d_pos.shape[-2] > pos_A.shape[-2] and d_pos.shape[-1] > pos_A.shape[-1]): raise NotImplementedError( "Asymmetric padding of maxpool not implemented.") pos_A = F.pad(pos_A, (0, d_pos.shape[-2] - pos_A.shape[-2], 0, d_pos.shape[-1] - pos_A.shape[-1])) else: d_pos = F.pad(d_pos, (0, pos_A.shape[-2] - d_pos.shape[-2], 0, pos_A.shape[-1] - d_pos.shape[-1])) pos_A = pos_A.view(shape[0], shape[1], *pos_A.shape[1:]) neg_A = F.interpolate(neg_A.view(shape[0] * shape[1], *shape[2:]), scale_factor=self.kernel_size) if d_neg.shape[-2] > neg_A.shape[-2] or d_neg.shape[-1] > neg_A.shape[-1]: if not (d_neg.shape[-2] > neg_A.shape[-2] and d_neg.shape[-1] > neg_A.shape[-1]): raise NotImplementedError("Asymmetric padding of maxpool not implemented.") neg_A = F.pad(neg_A, (0, d_neg.shape[-2] - neg_A.shape[-2], 0, d_neg.shape[-1] - neg_A.shape[-1])) else: d_neg = F.pad(d_neg, (0, neg_A.shape[-2] - d_neg.shape[-2], 0, neg_A.shape[-1] - d_neg.shape[-1])) neg_A = neg_A.view(shape[0], shape[1], *neg_A.shape[1:]) next_A = self.jit_mutiply(pos_A, neg_A, d_pos, d_neg) if self.padding[0] > 0 or self.padding[1] > 0: next_A = next_A[...,self.padding[0]:-self.padding[0], self.padding[1]:-self.padding[1]] elif isinstance(last_A, Patches): # The last_A.patches was not padded, so we need to pad them here. # If this Conv layer is followed by a ReLU layer, then the padding was already handled there and there is no need to pad again. one_d = torch.ones(tuple(1 for i in self.output_shape[1:]), device=last_A.patches.device, dtype=last_A.patches.dtype).expand(self.output_shape[1:]) # Add batch dimension. one_d = one_d.unsqueeze(0) # After unfolding, the shape is (1, out_h, out_w, in_c, h, w) one_d_unfolded = inplace_unfold( one_d, kernel_size=last_A.patches.shape[-2:], stride=last_A.stride, padding=last_A.padding, inserted_zeros=last_A.inserted_zeros, output_padding=last_A.output_padding) if last_A.unstable_idx is not None: # Move out_h, out_w dimension to the front for easier selection. one_d_unfolded_r = one_d_unfolded.permute(1, 2, 0, 3, 4, 5) # for sparse patches the shape is (unstable_size, batch, in_c, h, w). Batch size is 1 so no need to select here. one_d_unfolded_r = one_d_unfolded_r[ last_A.unstable_idx[1], last_A.unstable_idx[2]] else: # Append the spec dimension. one_d_unfolded_r = one_d_unfolded.unsqueeze(0) patches = last_A.patches * one_d_unfolded_r if b_pos is not None: patch_pos = Patches( patches.clamp(min=0), last_A.stride, last_A.padding, last_A.shape, unstable_idx=last_A.unstable_idx, output_shape=last_A.output_shape) bias = bias + self.get_bias(patch_pos, b_pos) if b_neg is not None: patch_neg = Patches( patches.clamp(max=0), last_A.stride, last_A.padding, last_A.shape, unstable_idx=last_A.unstable_idx, output_shape=last_A.output_shape) bias = bias + self.get_bias(patch_neg, b_neg) # bias = bias.transpose(0,1) shape = last_A.shape pos_A = last_A.patches.clamp(min=0) neg_A = last_A.patches.clamp(max=0) def upsample(last_patches, last_A): if last_A.unstable_idx is None: patches = F.interpolate( last_patches.view(shape[0] * shape[1] * shape[2], *shape[3:]), scale_factor=[1,]+self.kernel_size) patches = patches.view(shape[0], shape[1], shape[2], *patches.shape[1:]) else: patches = F.interpolate( last_patches, scale_factor=[1,] + self.kernel_size) return Patches( patches, stride=last_A.stride, padding=last_A.padding, shape=patches.shape, unstable_idx=last_A.unstable_idx, output_shape=last_A.output_shape) pos_A = upsample(pos_A, last_A) neg_A = upsample(neg_A, last_A) padding, stride, output_padding = compute_patches_stride_padding( self.input_shape, last_A.padding, last_A.stride, self.padding, self.stride, last_A.inserted_zeros, last_A.output_padding) pos_A.padding, pos_A.stride, pos_A.output_padding = padding, stride, output_padding neg_A.padding, neg_A.stride, neg_A.output_padding = padding, stride, output_padding # unsqueeze for the spec dimension d_pos = maybe_unfold_patches(d_pos.unsqueeze(0), pos_A) d_neg = maybe_unfold_patches(d_neg.unsqueeze(0), neg_A) next_A_patches = self.jit_mutiply( pos_A.patches, neg_A.patches, d_pos, d_neg) if start_node is not None: self.patch_size[start_node.name] = next_A_patches.size() next_A = Patches( next_A_patches, stride, padding, next_A_patches.shape, unstable_idx=last_A.unstable_idx, output_shape=last_A.output_shape, inserted_zeros=last_A.inserted_zeros, output_padding=output_padding) return next_A, bias if self.padding[0] > 0: upper_d = upper_d[...,self.padding[0]:-self.padding[0], self.padding[0]:-self.padding[0]] uA, ubias = _bound_oneside(last_uA, upper_d, lower_d, upper_b, lower_b) lA, lbias = _bound_oneside(last_lA, lower_d, upper_d, lower_b, upper_b) return [(lA, uA)], lbias, ubias def bound_forward(self, dim_in, x): lower_d, lower_b, upper_d, upper_b = self.bound_relax(x, init=False) def _bound_oneside(w_pos, b_pos, w_neg, b_neg, d, b): d_pos, d_neg = d.clamp(min=0), d.clamp(max=0) w_new = d_pos.unsqueeze(1) * w_pos + d_neg.unsqueeze(1) * w_neg b_new = d_pos * b_pos + d_neg * b_neg if isinstance(self.kernel_size, list) and len(self.kernel_size) == 2: tot_kernel_size = prod(self.kernel_size) elif isinstance(self.kernel_size, int): tot_kernel_size = self.kernel_size ** 2 else: raise ValueError(f'Unsupported kernel size {self.kernel_size}') w_pooled = (F.avg_pool2d(w_new.view(-1, *w_new.shape[2:]), self.kernel_size, self.stride, self.padding, ceil_mode=self.ceil_mode) * tot_kernel_size) w_pooled = w_pooled.reshape(w_new.shape[0], -1, *w_pooled.shape[1:]) b_pooled = F.avg_pool2d(b_new, self.kernel_size, self.stride, self.padding, ceil_mode=self.ceil_mode) * tot_kernel_size + b return w_pooled, b_pooled lw, lb = _bound_oneside(x.lw, x.lb, x.uw, x.ub, lower_d, lower_b) uw, ub = _bound_oneside(x.uw, x.ub, x.lw, x.lb, upper_d, upper_b) return LinearBound(lw, lb, uw, ub) def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) # Only used by forward mode paddings = tuple(self.padding + self.padding) self.upper, self.lower = x.upper, x.lower # A_shape = last_lA.shape if last_lA is not None else last_uA.shape # batch_size, input_c, x, y upper_d = torch.zeros_like(x.lower) lower_d = torch.zeros_like(x.lower) upper_d = F.pad(upper_d, paddings) lower_d = F.pad(lower_d, paddings) # batch_size, output_c, x, y upper_b = torch.zeros((list(self.output_shape))).to(x.lower) lower_b = torch.zeros((list(self.output_shape))).to(x.lower) # 1. find the index i where li > uj for all j, then set upper_d = lower_d = 1 max_lower, max_lower_index = F.max_pool2d(x.lower, self.kernel_size, self.stride, self.padding, return_indices=True, ceil_mode=self.ceil_mode) delete_upper = torch.scatter(torch.flatten(F.pad(x.upper, paddings), -2), -1, torch.flatten(max_lower_index, -2), -torch.inf).view(upper_d.shape) max_upper, _ = F.max_pool2d(delete_upper, self.kernel_size, self.stride, 0, return_indices=True, ceil_mode=self.ceil_mode) values = torch.zeros_like(max_lower) values[max_lower >= max_upper] = 1.0 upper_d = torch.scatter(torch.flatten(upper_d, -2), -1, torch.flatten(max_lower_index, -2), torch.flatten(values, -2)).view(upper_d.shape) if self.opt_stage == 'opt': raise NotImplementedError else: lower_d = torch.scatter(torch.flatten(lower_d, -2), -1, torch.flatten(max_lower_index, -2), 1.0).view(upper_d.shape) if self.padding[0] > 0: lower_d = lower_d[...,self.padding[0]:-self.padding[0], self.padding[0]:-self.padding[0]] values[:] = 0.0 max_upper_, _ = F.max_pool2d(x.upper, self.kernel_size, self.stride, self.padding, return_indices=True, ceil_mode=self.ceil_mode) values[max_upper > max_lower] = max_upper_[max_upper > max_lower] upper_b = values if self.padding[0] > 0: upper_d = upper_d[...,self.padding[0]:-self.padding[0], self.padding[0]:-self.padding[0]] return lower_d, lower_b, upper_d, upper_b def dump_alpha(self, device=None, dtype=None, non_blocking=False): ret = {'alpha': self._transfer_alpha(self.alpha, device=device, dtype=dtype, non_blocking=non_blocking, require_grad=False)} ret['init'] = self.init return ret def restore_alpha(self, alpha, device=None, dtype=None, non_blocking=False): self.alpha = self._transfer_alpha(alpha['alpha'], device=device, dtype=dtype, non_blocking=non_blocking, require_grad=True) self.init = alpha['init'] def drop_unused_alpha(self, keep_nodes): for spec_name in list(self.alpha.keys()): if spec_name not in keep_nodes: del self.alpha[spec_name] del self.init[spec_name] def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): # e.g., last layer input gurobi vars (3,32,32) gvars_array = np.array(v[0]) # pre_layer_shape (1,32,27,27) pre_layer_shape = np.expand_dims(gvars_array, axis=0).shape # this layer shape (1,32,6,6) this_layer_shape = self.output_shape assert this_layer_shape[2] == ((2 * self.padding[0] + pre_layer_shape[2] - (self.stride[0] - 1))//self.stride[0]) new_layer_gurobi_vars = [] neuron_idx = 0 pre_ubs = self.forward(self.inputs[0].upper).detach().cpu().numpy() for out_chan_idx in range(this_layer_shape[1]): out_chan_vars = [] for out_row_idx in range(this_layer_shape[2]): out_row_vars = [] for out_col_idx in range(this_layer_shape[3]): a_sum = 0.0 v = model.addVar(lb=-float('inf'), ub=float('inf'), obj=0, vtype=grb.GRB.CONTINUOUS, name=f'lay{self.name}_{neuron_idx}') for ker_row_idx in range(self.kernel_size[0]): in_row_idx = -self.padding[0] + self.stride[0] * out_row_idx + ker_row_idx if (in_row_idx < 0) or (in_row_idx == len(gvars_array[out_chan_idx][ker_row_idx])): # This is padding -> value of 0 continue for ker_col_idx in range(self.kernel_size[1]): in_col_idx = -self.padding[1] + self.stride[1] * out_col_idx + ker_col_idx if (in_col_idx < 0) or (in_col_idx == pre_layer_shape[3]): # This is padding -> value of 0 continue var = gvars_array[out_chan_idx][in_row_idx][in_col_idx] a = model.addVar(vtype=grb.GRB.BINARY) a_sum += a model.addConstr(v >= var) model.addConstr(v <= var + (1 - a) * pre_ubs[ 0, out_chan_idx, out_row_idx, out_col_idx]) model.addConstr(a_sum == 1, name=f'lay{self.name}_{neuron_idx}_eq') out_row_vars.append(v) out_chan_vars.append(out_row_vars) new_layer_gurobi_vars.append(out_chan_vars) self.solver_vars = new_layer_gurobi_vars model.update() class BoundGlobalAveragePool(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) def forward(self, x): output = nn.AdaptiveAvgPool2d((1, 1)).forward(x) # adaptiveAveragePool with output size (1, 1) return output def bound_backward(self, last_lA, last_uA, x, **kwargs): H, W = self.input_shape[-2], self.input_shape[-1] lA = (last_lA.expand(list(last_lA.shape[:-2]) + [H, W]) / (H * W)) if last_lA is not None else None uA = (last_uA.expand(list(last_uA.shape[:-2]) + [H, W]) / (H * W)) if last_uA is not None else None return [(lA, uA)], 0, 0 def interval_propagate(self, *v): h_L, h_U = v[0] h_L = F.adaptive_avg_pool2d(h_L, (1, 1)) h_U = F.adaptive_avg_pool2d(h_U, (1, 1)) return h_L, h_U class BoundAveragePool(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): # assumptions: ceil_mode=False, count_include_pad=True super().__init__(attr, inputs, output_index, options) assert ('pads' not in attr) or (attr['pads'][0] == attr['pads'][2]) assert ('pads' not in attr) or (attr['pads'][1] == attr['pads'][3]) self.kernel_size = attr['kernel_shape'] assert len(self.kernel_size) == 2 self.stride = attr['strides'] assert len(self.stride) == 2 # FIXME (22/07/02): padding is inconsistently handled. Should use 4-tuple. if 'pads' not in attr: self.padding = [0, 0] else: self.padding = [attr['pads'][0], attr['pads'][1]] self.ceil_mode = False self.count_include_pad = True self.use_default_ibp = True self.relu_followed = False def forward(self, x): return F.avg_pool2d(x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad) def bound_backward(self, last_lA, last_uA, x, **kwargs): def _bound_oneside(last_A): if last_A is None: return None, 0 equal_kernel_stride = (self.kernel_size[0] == self.stride[0] and self.kernel_size[1] == self.stride[1]) if isinstance(last_A, torch.Tensor): shape = last_A.size() if equal_kernel_stride: # propagate A to the next layer, with batch concatenated together next_A = F.interpolate( last_A.reshape(shape[0] * shape[1], *shape[2:]), scale_factor=self.kernel_size ) / (prod(self.kernel_size)) next_A = F.pad( next_A, (0, self.input_shape[-2] - next_A.shape[-2], 0, self.input_shape[-1] - next_A.shape[-1])) next_A = next_A.view(shape[0], shape[1], *next_A.shape[1:]) else: # Treat pooling as a general convolution weight = torch.zeros( self.input_shape[1], self.output_shape[1], *self.kernel_size, dtype=last_A.dtype, device=last_A.device) assert self.input_shape[1] == self.output_shape[1] weight = torch.eye(self.input_shape[1], dtype=last_A.dtype, device=last_A.device) weight = weight / prod(self.kernel_size) weight = weight.view(self.output_shape[1], self.input_shape[1], 1, 1) weight = weight.expand(self.output_shape[1], self.input_shape[1], *self.kernel_size) output_padding0 = ( int(self.input_shape[2]) - (int(self.output_shape[2]) - 1) * self.stride[0] + 2 * self.padding[0] - 1 - (int(weight.size()[2] - 1))) output_padding1 = ( int(self.input_shape[3]) - (int(self.output_shape[3]) - 1) * self.stride[1] + 2 * self.padding[1] - 1 - (int(weight.size()[3] - 1))) next_A = F.conv_transpose2d( last_A.reshape(shape[0] * shape[1], *shape[2:]), weight, None, stride=self.stride, padding=self.padding, output_padding=(output_padding0, output_padding1)) next_A = next_A.view(shape[0], shape[1], *next_A.shape[1:]) elif isinstance(last_A, Patches): patches = last_A.patches shape = patches.size() # When the number of inserted zeros can cancel out the stride, we use a shortcut that can reduce computation. simplify_patch = (equal_kernel_stride and last_A.inserted_zeros + 1 == self.kernel_size[0] and self.kernel_size[0] == self.kernel_size[1]) padding, stride, output_padding = compute_patches_stride_padding( self.input_shape, last_A.padding, last_A.stride, self.padding, self.stride, inserted_zeros=last_A.inserted_zeros, output_padding=last_A.output_padding, simplify=not simplify_patch) inserted_zeros = last_A.inserted_zeros if equal_kernel_stride and last_A.inserted_zeros == 0: # No inserted zeros, can be handled using interpolate. if last_A.unstable_idx is None: # shape is: [out_C, batch, out_H, out_W, in_c, patch_H, patch_W] up_sampled_patches = F.interpolate( patches.reshape(shape[0] * shape[1], shape[2] * shape[3], *shape[4:]), scale_factor=[1,] + self.kernel_size) # The dimension of patch-H and patch_W has changed. up_sampled_patches = up_sampled_patches.reshape( *shape[:-2], up_sampled_patches.size(-2), up_sampled_patches.size(-1)) else: # shape is: [spec, batch, in_c, patch_H, patch_W] up_sampled_patches = F.interpolate( patches, scale_factor=[1,] + self.kernel_size) # Divided by the averaging factor. up_sampled_patches = up_sampled_patches / prod(self.kernel_size) elif simplify_patch: padding = tuple(p // s - o for p, s, o in zip(padding, stride, output_padding)) output_padding = (0, 0, 0, 0) stride = 1 # Stride and inserted zero canceled out. No need to insert zeros and add output_padding. inserted_zeros = 0 value = 1. / prod(self.kernel_size) # In the case where the stride and adding_zeros cancel out, we do not need to insert zeros. weight = torch.full( size=(self.input_shape[1], 1, *self.kernel_size), fill_value=value, dtype=patches.dtype, device=patches.device) if last_A.unstable_idx is None: # shape is: [out_C, batch, out_H, out_W, in_c, patch_H, patch_W] up_sampled_patches = F.conv_transpose2d( patches.reshape( shape[0] * shape[1] * shape[2] * shape[3], *shape[4:] ), weight, stride=1, groups=self.input_shape[1]) else: # shape is: [spec, batch, in_c, patch_H, patch_W] up_sampled_patches = F.conv_transpose2d( patches.reshape(shape[0] * shape[1], *shape[2:]), weight, stride=1, groups=self.input_shape[1]) up_sampled_patches = up_sampled_patches.view( *shape[:-2], up_sampled_patches.size(-2), up_sampled_patches.size(-1)) else: # With inserted zeros, must be handled by treating pooling as general convolution. value = 1. / prod(self.kernel_size) weight = torch.full(size=(self.input_shape[1], 1, *self.kernel_size), fill_value=value, dtype=patches.dtype, device=patches.device) if not self.relu_followed: patches = last_A.create_padding(self.output_shape) weight = insert_zeros(weight, last_A.inserted_zeros) if last_A.unstable_idx is None: # shape is: [out_C, batch, out_H, out_W, in_c, patch_H, patch_W] up_sampled_patches = F.conv_transpose2d( patches.reshape(shape[0] * shape[1] * shape[2] * shape[3], *shape[4:]), weight, stride=self.stride, groups=self.input_shape[1]) else: # shape is: [spec, batch, in_c, patch_H, patch_W] up_sampled_patches = F.conv_transpose2d( patches.reshape(shape[0] * shape[1], *shape[2:]), weight, stride=self.stride, groups=self.input_shape[1]) up_sampled_patches = up_sampled_patches.view( *shape[:-2], up_sampled_patches.size(-2), up_sampled_patches.size(-1)) next_A = last_A.create_similar( up_sampled_patches, stride=stride, padding=padding, output_padding=output_padding, inserted_zeros=inserted_zeros) else: raise ValueError(f'last_A has unexpected type {type(last_A)}') return next_A, 0. lA, lbias = _bound_oneside(last_lA) uA, ubias = _bound_oneside(last_uA) return [(lA, uA)], lbias, ubias def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): # e.g., last layer input gurobi vars (3,32,32) gvars_array = np.array(v[0]) # pre_layer_shape (1,32,27,27) pre_layer_shape = np.expand_dims(gvars_array, axis=0).shape # this layer shape (1,32,6,6) this_layer_shape = self.output_shape assert this_layer_shape[2] == ( (2 * self.padding[0] + pre_layer_shape[2] - (self.stride[0] - 1) ) // self.stride[0]) value = 1.0/(self.kernel_size[0] * self.kernel_size[1]) new_layer_gurobi_vars = [] neuron_idx = 0 for out_chan_idx in range(this_layer_shape[1]): out_chan_vars = [] for out_row_idx in range(this_layer_shape[2]): out_row_vars = [] for out_col_idx in range(this_layer_shape[3]): # print(self.bias.shape, out_chan_idx, out_lbs.size(1)) lin_expr = 0.0 for ker_row_idx in range(self.kernel_size[0]): in_row_idx = -self.padding[0] + self.stride[0] * out_row_idx + ker_row_idx if (in_row_idx < 0) or (in_row_idx == len(gvars_array[out_chan_idx][ker_row_idx])): # This is padding -> value of 0 continue for ker_col_idx in range(self.kernel_size[1]): in_col_idx = -self.padding[1] + self.stride[1] * out_col_idx + ker_col_idx if (in_col_idx < 0) or (in_col_idx == pre_layer_shape[3]): # This is padding -> value of 0 continue coeff = value lin_expr += coeff * gvars_array[out_chan_idx][in_row_idx][in_col_idx] v = model.addVar(lb=-float('inf'), ub=float('inf'), obj=0, vtype=grb.GRB.CONTINUOUS, name=f'lay{self.name}_{neuron_idx}') model.addConstr(lin_expr == v, name=f'lay{self.name}_{neuron_idx}_eq') neuron_idx += 1 out_row_vars.append(v) out_chan_vars.append(out_row_vars) new_layer_gurobi_vars.append(out_chan_vars) self.solver_vars = new_layer_gurobi_vars model.update() ================================================ FILE: auto_LiRPA/operators/reduce.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Reduce operators""" from .base import * from torch.nn import Module class BoundReduce(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.axis = attr.get('axes', None) self.keepdim = bool(attr['keepdims']) if 'keepdims' in attr else True self.use_default_ibp = True def _parse_input_and_axis(self, *x): if len(x) > 1: assert not self.is_input_perturbed(1) self.axis = tuple(item.item() for item in tuple(x[1])) self.axis = self.make_axis_non_negative(self.axis) return x[0] def _return_bound_backward(self, lA, uA): return [(lA, uA)] + [(None, None)] * (len(self.inputs) - 1), 0, 0 class BoundReduceMax(BoundReduce): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) """Assume that the indexes with the maximum values are not perturbed. This generally doesn't hold true, but can still be used for the input shift in Softmax of Transformers.""" self.fixed_max_index = options.get('fixed_reducemax_index', False) def _parse_input_and_axis(self, *x): x = super()._parse_input_and_axis(*x) # for torch.max, `dim` must be an int if isinstance(self.axis, tuple): assert len(self.axis) == 1 self.axis = self.axis[0] return x def forward(self, *x): x = self._parse_input_and_axis(*x) res = torch.max(x, dim=self.axis, keepdim=self.keepdim) self.indices = res.indices return res.values def bound_backward(self, last_lA, last_uA, *args, **kwargs): if self.fixed_max_index: def _bound_oneside(last_A): if last_A is None: return None indices = self.indices.unsqueeze(0) if not self.keepdim: assert (self.from_input) last_A = last_A.unsqueeze(self.axis + 1) indices = indices.unsqueeze(self.axis + 1) shape = list(last_A.shape) shape[self.axis + 1] *= self.input_shape[self.axis] A = torch.zeros(shape, device=last_A.device) indices = indices.expand(*last_A.shape) A.scatter_(dim=self.axis + 1, index=indices, src=last_A) return A return self._return_bound_backward(_bound_oneside(last_lA), _bound_oneside(last_uA)) else: raise NotImplementedError( '`bound_backward` for BoundReduceMax with perturbed maximum' 'indexes is not implemented.') def build_gradient_node(self, grad_upstream): if self.fixed_max_index: node_grad = ReduceMaxGrad(self.axis, self.keepdim, self.input_shape, self.indices) return [(node_grad, (grad_upstream,), [])] else: raise NotImplementedError( '`build_gradient_node` for BoundReduceMax with perturbed maximum' 'indexes is not implemented.') class ReduceMaxGrad(Module): def __init__(self, axis, keepdim, input_shape, indices): super().__init__() self.axis = axis self.keepdim = keepdim self.input_shape = input_shape self.indices = indices.unsqueeze(0) def forward(self, grad_last): # Only keep the gradient at the maximum index # The gradient at other indices is 0 # If keepdim is False, add a singleton dimension at the specified axis if not self.keepdim: grad_last = grad_last.unsqueeze(self.axis + 1) indices = self.indices.unsqueeze(self.axis + 1) else: indices = self.indices assert grad_last.shape[self.axis + 1] == 1 # Calculate the target dimension size at axis + 1 new_dim = self.input_shape[self.axis] # Create the output tensor shape new_shape = list(grad_last.shape) new_shape[self.axis + 1] = new_dim ######################################################################## # TODO: The following lines are equivalent to: # # grad = torch.zeros(new_shape, device=grad_last.device) # indices = indices.expand(*grad_last.shape) # grad.scatter_(dim=self.axis + 1, index=indices, src=grad_last) # # But auto_LiRPA does not support scatter_ yet. # So we use a workaround to avoid using scatter_. ######################################################################## # Expand indices to match the target shape, # filling axis + 1 with new_dim indices_expanded = indices.expand( *grad_last.shape[:self.axis + 1], new_dim, *grad_last.shape[self.axis + 2:] ).to(grad_last.device) # Create a coordinate tensor for comparison along axis + 1 coord_shape = [1] * grad_last.dim() coord_shape[self.axis + 1] = new_dim coord = torch.arange(new_dim, device=grad_last.device).view(*coord_shape) # Create a binary mask where 1 indicates the desired position for each gradient mask = (coord == indices_expanded).type_as(grad_last) # Expand grad_last to match the target shape for element-wise multiplication grad_last_expanded = grad_last.expand( *grad_last.shape[:self.axis + 1], new_dim, *grad_last.shape[self.axis + 2:]) # Use the mask to retain values only at the correct positions grad = mask * grad_last_expanded return grad class BoundReduceMin(BoundReduceMax): def forward(self, *x): x = self._parse_input_and_axis(*x) res = torch.min(x, dim=self.axis, keepdim=self.keepdim) self.indices = res.indices return res.values class BoundReduceMean(BoundReduce): def forward(self, *x): x = self._parse_input_and_axis(*x) return torch.mean(x, dim=self.axis, keepdim=self.keepdim) def bound_backward(self, last_lA, last_uA, *args, **kwargs): def _bound_oneside(last_A): if last_A is None: return None if not self.keepdim: assert (self.from_input) for axis in self.axis: if axis > 0: last_A = last_A.unsqueeze(axis + 1) shape = list(last_A.shape) shape[2:] = self.input_shape[1:] # We perform expansion as in BoundReduceSum. # and divide the product of the sizes of the reduced dimensions. last_A = last_A.expand(*shape) / np.prod(np.take(self.input_shape, self.axis)) return last_A return self._return_bound_backward(_bound_oneside(last_lA), _bound_oneside(last_uA)) def bound_forward(self, dim_in, x, *args): assert self.keepdim assert len(self.axis) == 1 axis = self.make_axis_non_negative(self.axis[0]) assert (axis > 0) size = self.input_shape[axis] lw = x.lw.sum(dim=axis + 1, keepdim=True) / size lb = x.lb.sum(dim=axis, keepdim=True) / size uw = x.uw.sum(dim=axis + 1, keepdim=True) / size ub = x.ub.sum(dim=axis, keepdim=True) / size return LinearBound(lw, lb, uw, ub) class BoundReduceSum(BoundReduce): def forward(self, *x): x = self._parse_input_and_axis(*x) if self.axis is not None: return torch.sum(x, dim=self.axis, keepdim=self.keepdim) else: return torch.sum(x) def bound_backward(self, last_lA, last_uA, x, *args, **kwargs): def _bound_oneside(last_A): if last_A is None: return None if not self.keepdim: assert (self.from_input) for axis in self.axis: if axis > 0: last_A = last_A.unsqueeze(axis + 1) # last_A.shape = [num_spec, batch_size, ..., dim_size_1 (1), ...] shape = list(last_A.shape) # self.input_shape = [batch_size_original, ..., dim_size_1_before_reduction, ...] # we expand last_A with keeping its batch_size instead of that from self.input_shape. shape[2:] = self.input_shape[1:] # For reduced dims, their dim_size will be expanded from 1 to the original size. # For non-reduced dims, their dim_size will be unchanged. last_A = last_A.expand(*shape) return last_A return self._return_bound_backward(_bound_oneside(last_lA), _bound_oneside(last_uA)) def bound_forward(self, dim_in, x, *args): # Handle possibly multiple axes axes = [self.make_axis_non_negative(ax) for ax in self.axis] # Ensure all axes are greater than 0 (not batch dimension) assert all(ax > 0 for ax in axes) # For lw/uw, need to shift by 1 due to an extra leading dimension (num_spec) lw = x.lw.sum(dim=[ax + 1 for ax in axes], keepdim=self.keepdim) lb = x.lb.sum(dim=axes, keepdim=self.keepdim) uw = x.uw.sum(dim=[ax + 1 for ax in axes], keepdim=self.keepdim) ub = x.ub.sum(dim=axes, keepdim=self.keepdim) return LinearBound(lw, lb, uw, ub) def build_gradient_node(self, grad_upstream): node_grad = ReduceSumGrad(self.axis, self.keepdim, self.input_shape) return [(node_grad, (grad_upstream,), [])] class ReduceSumGrad(Module): def __init__(self, axis, keepdim, input_shape): super().__init__() self.axis = axis self.keepdim = keepdim self.input_shape = input_shape def forward(self, grad_last): grad_new = grad_last.clone() if not self.keepdim: for axis in self.axis: if axis > 0: grad_new = grad_new.unsqueeze(axis + 1) # For ReduceSum, ∂y/∂x = 1, so we just need to expand the gradient # along each axis that is reduced. shape = list(grad_new.shape) shape[2:] = self.input_shape[1:] grad_new = grad_new.expand(*shape) return grad_new ================================================ FILE: auto_LiRPA/operators/relu.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """BoundRelu.""" from typing import Optional, Tuple import torch from torch import Tensor from torch.nn import Module from torch.autograd import Function from collections import OrderedDict from .base import * from .clampmult import multiply_by_A_signs from .activation_base import BoundActivation, BoundOptimizableActivation from .solver_utils import grb from ..utils import unravel_index, prod class BoundTwoPieceLinear(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) if options is None: options = {} self.options = options self.ibp_intermediate = True self.splittable = True self.relu_options = options.get('activation_bound_option', 'adaptive') self.use_sparse_spec_alpha = options.get('sparse_spec_alpha', False) self.use_sparse_features_alpha = options.get('sparse_features_alpha', False) self.alpha_lookup_idx = self.alpha_indices = None self.beta = self.masked_beta = self.sparse_betas = None self.split_beta_used = False self.history_beta_used = False self.flattened_nodes = None self.patch_size = {} self.cut_used = False self.cut_module = None self.gcp_unstable_relu_indicators = None def init_opt_parameters(self, start_nodes): ref = self.inputs[0].lower # a reference variable for getting the shape batch_size = ref.size(0) self.alpha = OrderedDict() self.alpha_lookup_idx = OrderedDict() # For alpha with sparse spec dimention. self.alpha_indices = None # indices of non-zero alphas. verbosity = self.options.get('verbosity', 0) # Alpha can be sparse in both spec dimension, and the C*H*W dimension. # We first deal with the sparse-feature alpha, which is sparse in the # C*H*W dimesnion of this layer. minimum_sparsity = self.options.get('minimum_sparsity', 0.9) if (self.use_sparse_features_alpha and self.inputs[0].is_lower_bound_current() and self.inputs[0].is_upper_bound_current()): # Pre-activation bounds available, we will store the alpha for unstable neurons only. # Since each element in a batch can have different unstable neurons, # for simplicity we find a super-set using any(dim=0). # This can be non-ideal if the x in a batch are very different. self.get_unstable_idx() total_neuron_size = self.inputs[0].lower.numel() // batch_size if self.alpha_indices[0].size(0) <= minimum_sparsity * total_neuron_size: # Shape is the number of unstable neurons in this layer. alpha_shape = [self.alpha_indices[0].size(0)] # Skip the batch, spec dimension, and find the lower slopes for all unstable neurons. if len(self.alpha_indices) == 1: # This layer is after a linear layer. alpha_init = self.init_d[:, :, self.alpha_indices[0]] elif len(self.alpha_indices) == 3: # This layer is after a conv2d layer. alpha_init = self.init_d[ :, :, self.alpha_indices[0], self.alpha_indices[1], self.alpha_indices[2]] elif len(self.alpha_indices) == 2: # This layer is after a conv1d layer. alpha_init = self.init_d[ :, :, self.alpha_indices[0], self.alpha_indices[1]] else: raise ValueError if verbosity > 0: print(f'layer {self.name} using sparse-features alpha with shape {alpha_shape}; unstable size ' f'{self.alpha_indices[0].size(0)}; total size {total_neuron_size} ({list(ref.shape)})') else: alpha_shape = self.shape # Full alpha. alpha_init = self.init_d if verbosity > 0: print(f'layer {self.name} using full alpha with shape {alpha_shape}; unstable size ' f'{self.alpha_indices[0].size(0)}; total size {total_neuron_size} ({list(ref.shape)})') self.alpha_indices = None # Use full alpha. else: alpha_shape = self.shape # Full alpha. alpha_init = self.init_d # Now we start to create alphas for all start nodes. # When sparse-spec feature is enabled, alpha is created for only # unstable neurons in start node. for start_node in start_nodes: ns, output_shape, unstable_idx = start_node[:3] if isinstance(output_shape, (list, tuple)): if len(output_shape) > 1: size_s = prod(output_shape) # Conv layers. else: size_s = output_shape[0] else: size_s = output_shape # unstable_idx may be a tensor (dense layer or conv layer # with shared alpha), or tuple of 3-d tensors (conv layer with # non-sharing alpha). sparsity = float('inf') if unstable_idx is None else unstable_idx.size(0) if isinstance(unstable_idx, torch.Tensor) else unstable_idx[0].size(0) if sparsity <= minimum_sparsity * size_s and self.use_sparse_spec_alpha: # For fully connected layer, or conv layer with shared alpha per channel. # shape is (2, sparse_spec, batch, this_layer_shape) # We create sparse specification dimension, where the spec dimension of alpha only includes slopes for unstable neurons in start_node. self.alpha[ns] = torch.empty([self.alpha_size, sparsity + 1, batch_size, *alpha_shape], dtype=torch.float, device=ref.device, requires_grad=True) self.alpha[ns].data.copy_(alpha_init.data) # This will broadcast to (2, sparse_spec) dimensions. if verbosity > 0: print(f'layer {self.name} start_node {ns} using sparse-spec alpha {list(self.alpha[ns].size())}' f' with unstable size {sparsity} total_size {size_s} output_shape {output_shape}') # unstable_idx is a list of used neurons (or channels for BoundConv) for the start_node. assert unstable_idx.ndim == 1 if isinstance(unstable_idx, torch.Tensor) else unstable_idx[0].ndim == 1 # We only need to the alpha for the unstable neurons in start_node. indices = torch.arange(1, sparsity + 1, device=alpha_init.device, dtype=torch.long) if isinstance(output_shape, int) or len(output_shape) == 1: # Fully connected layers, or conv layer in patches mode with partially shared alpha (pixels in the same channel use the same alpha). self.alpha_lookup_idx[ns] = torch.zeros(size_s, dtype=torch.long, device=alpha_init.device) # This lookup table maps the unstable_idx to the actual alpha location in self.alpha[ns]. # Note that self.alpha[ns][:,0] is reserved for any unstable neurons that are not found in the lookup table. This usually should not # happen, unless reference bounds are not properly set. self.alpha_lookup_idx[ns].data[unstable_idx] = indices else: # conv layer in matrix mode, or in patches mode but with non-shared alpha. The lookup table is 3-d. assert len(output_shape) == 3 self.alpha_lookup_idx[ns] = torch.zeros(output_shape, dtype=torch.long, device=alpha_init.device) if isinstance(unstable_idx, torch.Tensor): # Convert the unstable index from flattend 1-d to 3-d. (matrix mode). unstable_idx_3d = unravel_index(unstable_idx, output_shape) else: # Patches mode with non-shared alpha, unstable_idx is already 3d. unstable_idx_3d = unstable_idx # Build look-up table. self.alpha_lookup_idx[ns].data[unstable_idx_3d[0], unstable_idx_3d[1], unstable_idx_3d[2]] = indices else: # alpha shape is (2, spec, batch, this_layer_shape). "this_layer_shape" may still be sparse. self.alpha[ns] = torch.empty([self.alpha_size, size_s, batch_size, *alpha_shape], dtype=torch.float, device=ref.device, requires_grad=True) self.alpha[ns].data.copy_(alpha_init.data) # This will broadcast to (2, spec) dimensions if verbosity > 0: print(f'layer {self.name} start_node {ns} using full alpha {list(self.alpha[ns].size())} with unstable ' f'size {sparsity if unstable_idx is not None else None} total_size {size_s} output_shape {output_shape}') # alpha_lookup_idx can be used for checking if sparse alpha is used or not. self.alpha_lookup_idx[ns] = None def select_alpha_by_idx(self, last_lA, last_uA, unstable_idx, start_node): # Each alpha has shape (2, output_shape, batch_size, *relu_node_shape]. # If slope is shared, output_shape will be 1. # The *relu_node_shape might be sparse (sparse-feature alpha), where the non-zero values are indicated by self.alpha_indices. # The out_shape might be sparse (sparse-spec alpha), where the non-zero values are indexed by self.alpha_lookup_idx. if unstable_idx is not None: # print(f'relu layer {self.name}, start_node {start_node}, unstable_idx {type(unstable_idx)} alpha idx {self.alpha_lookup_idx[start_node.name].size()}') if self.alpha_lookup_idx is not None: alpha_lookup_idx = self.alpha_lookup_idx[start_node.name] else: alpha_lookup_idx = None if isinstance(unstable_idx, tuple): # Start node is a conv node. selected_alpha = self.alpha[start_node.name] if isinstance(last_lA, Tensor) or isinstance(last_uA, Tensor): # Start node is a conv node but we received tensors as A matrices. # Patches mode converted to matrix, or matrix mode used. Need to select accross the spec dimension. # For this node, since it is in matrix mode, the spec dimension is out_c * out_h * out_w # Shape is [2, spec, batch, *this_layer_shape] if alpha_lookup_idx is None: if self.options['optimize_bound_args'].get('use_shared_alpha', False): # alpha is shared, and its spec dimension is always 1. In this case we do not need to select. # selected_alpha will have shape [2, 1, batch, *this_layer_shape] pass else: # alpha is not shared, so it has shape [2, spec, batch, *this_layer_shape] # Reshape the spec dimension to c*h*w so we can select used alphas based on unstable index. # Shape becomes [2, out_c, out_h, out_w, batch, *this_layer_shape] selected_alpha = selected_alpha.view(selected_alpha.size(0), *start_node.output_shape[1:], *selected_alpha.shape[2:]) selected_alpha = selected_alpha[:, unstable_idx[0], unstable_idx[1], unstable_idx[2]] else: assert alpha_lookup_idx.ndim == 3 # We only stored some alphas, and A is also sparse, so the unstable_idx must be first translated to real indices. # alpha shape is (2, sparse_spec_shape, batch_size, *relu_node_shape) where relu_node_shape can also be sparse. # We use sparse-spec alphas. Need to convert these unstable_idx[0], unstable_idx[1], unstable_idx[0] using lookup table. _unstable_idx = alpha_lookup_idx[unstable_idx[0], unstable_idx[1], unstable_idx[2]] selected_alpha = self.non_deter_index_select(selected_alpha, index=_unstable_idx, dim=1) else: # Patches mode. Alpha must be selected after unfolding, so cannot be done here. # Selection is deferred to maybe_unfold() using alpha_lookup_idx. # For partially shared alpha, its shape is (2, out_c, batch_size, *relu_node_shape). # For full alpha, its shape is (2, out_c*out_h*out_w, batch_size, *relu_node_shape). # Both the spec dimension and relu_node_shape dimensions can be sparse. pass elif unstable_idx.ndim == 1: # Start node is a FC node. # Only unstable neurons of the start_node neurons are used. assert alpha_lookup_idx is None or alpha_lookup_idx.ndim == 1 if self.options['optimize_bound_args'].get('use_shared_alpha', False): # Shared alpha is used, all output specs use the same alpha. No selection is needed. # The spec dim is 1 and will be broadcast. selected_alpha = self.alpha[start_node.name] else: _unstable_idx = alpha_lookup_idx[unstable_idx] if alpha_lookup_idx is not None else unstable_idx selected_alpha = self.non_deter_index_select(self.alpha[start_node.name], index=_unstable_idx, dim=1) elif unstable_idx.ndim == 2: assert alpha_lookup_idx is None, "sparse spec alpha has not been implemented yet." # Each element in the batch selects different neurons. selected_alpha = batched_index_select(self.alpha[start_node.name], index=unstable_idx, dim=1) else: raise ValueError else: # Spec dimension is dense. Alpha must not be created sparsely. assert self.alpha_lookup_idx is None or self.alpha_lookup_idx[start_node.name] is None selected_alpha = self.alpha[start_node.name] alpha_lookup_idx = None return selected_alpha, alpha_lookup_idx def reconstruct_full_alpha(self, sparse_alpha, full_alpha_shape, alpha_indices): full_alpha = torch.zeros(full_alpha_shape, dtype=sparse_alpha.dtype, device=sparse_alpha.device) if len(alpha_indices) == 1: # Relu after a dense layer. full_alpha[:, :, alpha_indices[0]] = sparse_alpha elif len(alpha_indices) == 3: # Relu after a conv2d layer. full_alpha[:, :, alpha_indices[0], alpha_indices[1], alpha_indices[2]] = sparse_alpha elif len(alpha_indices) == 2: # Relu after a conv1d layer. full_alpha[:, :, alpha_indices[0], alpha_indices[1]] = sparse_alpha else: raise ValueError return full_alpha def bound_backward(self, last_lA, last_uA, x=None, start_node=None, unstable_idx=None, reduce_bias=True, **kwargs): """ start_node: the name of the layer where the backward bound propagation starts. Can be the output layer or an intermediate layer. unstable_idx: indices for the unstable neurons, whose bounds need to be computed. Either be a tuple (for patches) or a 1-D tensor. """ lower = x.lower upper = x.upper # Get element-wise CROWN linear relaxations. (upper_d, upper_b, lower_d, lower_b, lb_lower_d, ub_lower_d, lb_upper_d, ub_upper_d, lb_upper_b, ub_upper_b, alpha_lookup_idx) = \ self._backward_relaxation(last_lA, last_uA, x, start_node, unstable_idx) # save for calculate babsr score self.d = upper_d self.lA = last_lA # Save for initialization bounds. self.init_d = lower_d # Choose upper or lower bounds based on the sign of last_A def _bound_oneside(last_A, d_pos, d_neg, b_pos, b_neg): if last_A is None: return None, 0 # Obtain the new linear relaxation coefficients based on the signs in last_A. same_slope = True if self.relu_options == "same-slope" else False _A, _bias = multiply_by_A_signs( last_A, d_pos, d_neg, b_pos, b_neg, reduce_bias=reduce_bias, same_slope=same_slope) if isinstance(last_A, Patches): # Save the patch size, which will be used in init_alpha() to determine the number of optimizable parameters. A_prod = _A.patches if start_node is not None: if last_A.unstable_idx is not None: # Sparse patches, we need to construct the full patch size: (out_c, batch, out_h, out_w, c, h, w). self.patch_size[start_node.name] = [ last_A.output_shape[1], A_prod.size(1), last_A.output_shape[2], last_A.output_shape[3], A_prod.size(-3), A_prod.size(-2), A_prod.size(-1)] else: # Regular patches. self.patch_size[start_node.name] = A_prod.size() return _A, _bias ######## A problem with patches mode for cut constraint start ########## # There are cases that the node that is in the constraint but not selected by the patches for the output node # trick: only count the small patches that have all the split node coeffs[ci].sum() equal to coeffs_unfolded[ci][out_h, out_w, -1].sum() # we should force these beta to be 0 to disable the effect of these constraints A = last_lA if last_lA is not None else last_uA current_layer_shape = lower.size()[1:] if self.cut_used and type(A) is Patches: self.cut_module.patch_trick(start_node, self.name, A, current_layer_shape) ######## A problem with patches mode for cut constraint end ########## if self.cut_used: if self.leaky_alpha > 0: raise NotImplementedError # propagate postrelu node in cut constraints last_lA, last_uA = self.cut_module.relu_cut( start_node, self.name, last_lA, last_uA, current_layer_shape, unstable_idx, batch_mask=self.inputs[0].alpha_beta_update_mask) # In patches mode we might need an unfold. # lower_d, upper_d, lower_b, upper_b: 1, batch, current_c, current_w, current_h or None upper_d = maybe_unfold_patches(upper_d, last_lA if last_lA is not None else last_uA) lower_d = maybe_unfold_patches(lower_d, last_lA if last_lA is not None else last_uA) upper_b = maybe_unfold_patches(upper_b, last_lA if last_lA is not None else last_uA) lower_b = maybe_unfold_patches(lower_b, last_lA if last_lA is not None else last_uA) # for ReLU it is always None; keeping it here for completeness. # ub_lower_d and lb_lower_d might have sparse spec dimension, so they may need alpha_lookup_idx to convert to actual spec dim. ub_lower_d = maybe_unfold_patches(ub_lower_d, last_uA, alpha_lookup_idx=alpha_lookup_idx) ub_upper_d = maybe_unfold_patches(ub_upper_d, last_uA, alpha_lookup_idx=alpha_lookup_idx) # optimizable slope lb_lower_d: spec (only channels in spec layer), batch, current_c, current_w, current_h # patches mode lb_lower_d after unfold: unstable, batch, in_C, H, W lb_lower_d = maybe_unfold_patches(lb_lower_d, last_lA, alpha_lookup_idx=alpha_lookup_idx) lb_upper_d = maybe_unfold_patches(lb_upper_d, last_lA, alpha_lookup_idx=alpha_lookup_idx) # ub_upper_b and lb_upper_b can also be optimizable variables, just like ub/lb_upper/lower_d. # This is only possible when alpha is optimized in the "same-slope" setting, where we move the linear upper bound together with the lower bound. ub_upper_b = maybe_unfold_patches(ub_upper_b, last_lA, alpha_lookup_idx=alpha_lookup_idx) lb_upper_b = maybe_unfold_patches(lb_upper_b, last_lA, alpha_lookup_idx=alpha_lookup_idx) if self.cut_used: assert reduce_bias # Here, we create a tuple includes 3 masks: # unstable_indicators. unstable neuron mask. # positive_indicators. previous unstable now split on z = 1. # negative_indicators. previous unstable now split on z = 0. unstable_indicators = (lower < 0) * (upper > 0) positive_indicators = ~(lower < 0) & self.gcp_unstable_relu_indicators negative_indicators = ~(upper > 0) & self.gcp_unstable_relu_indicators relu_indicators = (unstable_indicators, positive_indicators, negative_indicators) # propagate integer var of relu neuron (arelu) in cut constraints through relu layer lA, uA, lbias, ubias = self.cut_module.arelu_cut( start_node, self.name, last_lA, last_uA, lower_d, upper_d, lower_b, upper_b, lb_lower_d, ub_lower_d, relu_indicators, x, self.patch_size, current_layer_shape, unstable_idx, batch_mask=self.inputs[0].alpha_beta_update_mask) else: uA, ubias = _bound_oneside( last_uA, ub_upper_d if upper_d is None else upper_d, ub_lower_d if lower_d is None else lower_d, ub_upper_b if ub_upper_b is not None else upper_b, lower_b) lA, lbias = _bound_oneside( last_lA, lb_lower_d if lower_d is None else lower_d, lb_upper_d if upper_d is None else upper_d, lower_b, lb_upper_b if lb_upper_b is not None else upper_b) if self.cut_used: # propagate prerelu node in cut constraints lA, uA = self.cut_module.pre_cut( start_node, self.name, lA, uA, current_layer_shape, unstable_idx, batch_mask=self.inputs[0].alpha_beta_update_mask) self.masked_beta_lower = self.masked_beta_upper = None return [(lA, uA)], lbias, ubias def _transfer_alpha_lookup_idx(self, alpha_lookup_idx, device=None, dtype=None, non_blocking=False): if alpha_lookup_idx is None: return None alpha_lookup_idx = {spec_name: transfer(idx, device=device, dtype=dtype, non_blocking=non_blocking) if idx is not None else None for spec_name, idx in alpha_lookup_idx.items()} return alpha_lookup_idx def _transfer_alpha_indices(self, alpha_indices, device=None, dtype=None, non_blocking=False): if alpha_indices is None: return None alpha_indices = [transfer(indices, device=device, dtype=dtype, non_blocking=non_blocking) for indices in alpha_indices] return alpha_indices def dump_alpha(self, device=None, dtype=None, non_blocking=False): ret = {'alpha': self._transfer_alpha(self.alpha, device=device, dtype=dtype, non_blocking=non_blocking, require_grad=False)} if self.use_sparse_spec_alpha: ret['alpha_lookup_idx'] = self._transfer_alpha_lookup_idx(self.alpha_lookup_idx, device=device, dtype=None, non_blocking=non_blocking) if self.use_sparse_features_alpha: ret['alpha_indices'] = self._transfer_alpha_indices(self.alpha_indices, device=device, dtype=None, non_blocking=non_blocking) return ret def restore_alpha(self, alpha, device=None, dtype=None, non_blocking=False): self.alpha = self._transfer_alpha(alpha['alpha'], device=device, dtype=dtype, non_blocking=non_blocking, require_grad=True) if self.use_sparse_spec_alpha: self.alpha_lookup_idx = self._transfer_alpha_lookup_idx(alpha['alpha_lookup_idx'], device=device, dtype=None, non_blocking=non_blocking) if self.use_sparse_features_alpha: self.alpha_indices = self._transfer_alpha_indices(alpha['alpha_indices'], device=device, dtype=None, non_blocking=non_blocking) def drop_unused_alpha(self, keep_nodes): for spec_name in list(self.alpha.keys()): # If the spec_name is not in keep_nodes, we delete it. if spec_name not in keep_nodes: del self.alpha[spec_name] # if use_sparse_spec_alpha is True, we also delete the alpha_lookup_idx if needed. if self.use_sparse_spec_alpha: del self.alpha_lookup_idx[spec_name] # if there is no alpha left and use_sparse_features_alpha is True, # we also delete the alpha_indices. if not self.alpha and self.use_sparse_features_alpha: self.alpha_indices = None class BoundRelu(BoundTwoPieceLinear): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) if attr is None: attr = {} self.leaky_alpha = attr.get('alpha', 0) self.alpha_size = 2 # Alpha dimension is (2, output_shape, batch, *shape) for ReLU. def get_unstable_idx(self): self.alpha_indices = torch.logical_and( self.inputs[0].lower < 0, self.inputs[0].upper > 0).any(dim=0).nonzero(as_tuple=True) def clip_alpha(self): for v in self.alpha.values(): v.data = torch.clamp(v.data, self.leaky_alpha, 1.) def forward(self, x): self.shape = x.shape[1:] if self.flattened_nodes is None: self.flattened_nodes = x[0].reshape(-1).shape[0] if self.leaky_alpha > 0: return F.leaky_relu(x, negative_slope=self.leaky_alpha) else: return F.relu(x) def _relu_lower_bound_init(self, upper_k): """Return the initial lower bound without relaxation.""" if self.relu_options == "same-slope": # the same slope for upper and lower lower_k = upper_k elif self.relu_options == "zero-lb": # Always use slope 0 as lower bound. Any value between 0 and 1 is a valid lower bound for CROWN lower_k = torch.zeros_like(upper_k) lower_k = (upper_k >= 1.0).to(upper_k) if self.leaky_alpha > 0: lower_k += (upper_k < 1.0).to(upper_k) * self.leaky_alpha elif self.relu_options == "one-lb": # Always use slope 1 as lower bound lower_k = ((upper_k > self.leaky_alpha).to(upper_k) + (upper_k <= self.leaky_alpha).to(upper_k) * self.leaky_alpha) else: # adaptive if self.leaky_alpha == 0: lower_k = (upper_k > 0.5).to(upper_k) else: # FIXME this may not be optimal for leaky relu lower_k = ((upper_k > 0.5).to(upper_k) + (upper_k <= 0.5).to(upper_k) * self.leaky_alpha) return lower_k def _relu_upper_opt_same_slope(self, lb_lower_d, ub_lower_d, upper_d, lower, upper): """ When "same-slope" option is enabled in CROWN-Optimized method, lower_d is get directly from the optimizable paramters, so we force upper_d to be same as lower_d. We want the same-slope upper bound to be as tight as possible, so it should pass one of the vertices of the triangular convex hull of ReLU. upper_d is the slopes of the upper bounds compputed with normal triangle relaxation. For a single element: - lb_lower_d > upper_d => The same-slope upper bound should pass through the left endpoint of relu; - lb_lower_d < upper_d => The same-slope upper bound should pass through the right endpoint of relu. """ lower_y = F.relu(lower) upper_y = F.relu(upper) if lb_lower_d is None: lb_upper_d = lb_upper_b = None else: lb_upper_d = lb_lower_d b_left = lower_y - lb_upper_d * lower b_right = upper_y - lb_upper_d * upper use_left_end = (lb_lower_d >= upper_d) lb_upper_b = use_left_end * b_left + ~use_left_end * b_right if ub_lower_d is None: ub_upper_d = ub_upper_b = None else: ub_upper_d = ub_lower_d b_left = lower_y - ub_upper_d * lower b_right = upper_y - ub_upper_d * upper use_left_end = (ub_lower_d >= upper_d) ub_upper_b = use_left_end * b_left + ~use_left_end * b_right return lb_upper_d, lb_upper_b, ub_upper_d, ub_upper_b def _forward_relaxation(self, x): self._init_masks(x) self.mask_pos = self.mask_pos.to(x.lower) self.mask_both = self.mask_both.to(x.lower) upper_k, upper_b = self._relu_upper_bound( x.lower, x.upper, self.leaky_alpha) self.uw = self.mask_pos + self.mask_both * upper_k self.ub = self.mask_both * upper_b if self.opt_stage in ['opt', 'reuse']: # Each actual alpha in the forward mode has shape (batch_size, *relu_node_shape]. # But self.alpha has shape (2, output_shape, batch_size, *relu_node_shape] # and we do not need its first two dimensions. lower_k = self.alpha['_forward'][0, 0] else: lower_k = self._relu_lower_bound_init(upper_k) # NOTE #FIXME Saved for initialization bounds for optimization. # In the backward mode, same-slope bounds are used. # But here it is using adaptive bounds which seem to be better # for nn4sys benchmark with loose input bounds. Need confirmation # for other cases. self.lower_d = lower_k.detach() # saved for initializing optimized bounds self.lw = self.mask_both * lower_k + self.mask_pos def bound_dynamic_forward(self, x, max_dim=None, offset=0): if self.leaky_alpha > 0: raise NotImplementedError if not hasattr(self, 'upper_k'): # x.lower and x.upper remain same all the time, # so the following only need to do once self.upper_k, self.upper_b = self._relu_upper_bound( x.lower, x.upper, self.leaky_alpha) self.upper_b /= 2 self.device = x.lw.device self.batch_size = x.lower.shape[0] self.unstable = torch.logical_and(x.lower < 0, x.upper > 0).view(self.batch_size, -1).to(torch.int) self.tot_dim = x.tot_dim + int(self.unstable.sum(dim=-1).max()) self.b_new = self.upper_k * x.lb + self.upper_b b_new = self.b_new batch_size = self.batch_size device = self.device unstable = self.unstable if x.lw.shape[1]: # Compute only when x.lw is not empty w_new = self.upper_k.unsqueeze(1) * x.lw else: w_new = torch.empty_like(x.lw) if offset + w_new.shape[1] < x.tot_dim: return LinearBound( w_new, b_new, w_new, b_new, x_L=x.x_L, x_U=x.x_U, tot_dim=self.tot_dim) # Create new variables for unstable ReLU index = torch.cumsum(unstable, dim=-1).to(torch.int64) index = (index - (offset + w_new.shape[1] - x.tot_dim)).clamp(min=0) num_new_dim = int(index.max()) num_new_dim_actual = min(num_new_dim, max_dim - w_new.shape[1]) index = index.clamp(max=num_new_dim_actual+1) w_unstable = torch.zeros(batch_size, num_new_dim_actual + 2, unstable.size(-1), device=device) x_L_unstable = -torch.ones(batch_size, num_new_dim_actual, device=device) x_U_unstable = torch.ones(batch_size, num_new_dim_actual, device=device) w_unstable.scatter_(dim=1, index=index.unsqueeze(1), src=self.upper_b.view(batch_size, 1, -1), reduce='add') w_unstable = w_unstable[:, 1:-1].view(batch_size, num_new_dim_actual, *w_new.shape[2:]) w_new = torch.cat([w_new, w_unstable], dim=1) x_L_new = torch.cat([x.x_L, x_L_unstable], dim=-1) x_U_new = torch.cat([x.x_U, x_U_unstable], dim=-1) return LinearBound( w_new, b_new, w_new, b_new, x_L=x_L_new, x_U=x_U_new, tot_dim=self.tot_dim) def bound_forward(self, dim_in, x): self._forward_relaxation(x) lb = self.lw * x.lb ub = self.uw * x.ub + self.ub lw = (self.lw.unsqueeze(1) * x.lw) if x.lw is not None else None uw = (self.uw.unsqueeze(1) * x.uw) if x.uw is not None else None if not lw.requires_grad: del self.mask_both, self.mask_pos del self.lw, self.uw, self.ub return LinearBound(lw, lb, uw, ub) @staticmethod @torch.jit.script def _relu_upper_bound(lb, ub, leaky_alpha: float): """Upper bound slope and intercept according to CROWN relaxation.""" lb_r = lb.clamp(max=0) ub_r = ub.clamp(min=0) ub_r = torch.max(ub_r, lb_r + 1e-8) if leaky_alpha > 0: upper_d = (ub_r - leaky_alpha * lb_r) / (ub_r - lb_r) upper_b = - lb_r * upper_d + leaky_alpha * lb_r else: upper_d = ub_r / (ub_r - lb_r) upper_b = - lb_r * upper_d return upper_d, upper_b @staticmethod def _relu_mask_alpha(lower, upper, lb_lower_d : Optional[Tensor], ub_lower_d : Optional[Tensor], leaky_alpha : float = 0, ) -> Tuple[Optional[Tensor], Optional[Tensor], Tensor]: lower_mask = (lower >= 0).requires_grad_(False).to(lower.dtype) upper_mask = (upper <= 0).requires_grad_(False) if leaky_alpha > 0: zero_coeffs = False else: zero_coeffs = upper_mask.all() no_mask = (1. - lower_mask) * (1. - upper_mask.to(upper.dtype)) if lb_lower_d is not None: lb_lower_d = ( torch.clamp(lb_lower_d, min=leaky_alpha, max=1.) * no_mask + lower_mask) if leaky_alpha > 0: lb_lower_d += upper_mask * leaky_alpha if ub_lower_d is not None: ub_lower_d = ( torch.clamp(ub_lower_d, min=leaky_alpha, max=1.) * no_mask + lower_mask) if leaky_alpha > 0: ub_lower_d += upper_mask * leaky_alpha return lb_lower_d, ub_lower_d, zero_coeffs def _backward_relaxation(self, last_lA, last_uA, x, start_node, unstable_idx): # Usage of output constraints requires access to bounds of the previous iteration # (see _clear_and_set_new) if x is not None: lower = x.lower upper = x.upper else: lower = self.lower upper = self.upper # Upper bound slope and intercept according to CROWN relaxation. upper_d, upper_b = self._relu_upper_bound(lower, upper, self.leaky_alpha) flag_expand = False ub_lower_d = lb_lower_d = None ub_upper_d = lb_upper_d = None ub_upper_b = lb_upper_b = None lower_b = None # ReLU does not have lower bound intercept (=0). alpha_lookup_idx = None # For sparse-spec alpha. if self.opt_stage in ['opt', 'reuse']: # Alpha-CROWN. lower_d = None selected_alpha, alpha_lookup_idx = self.select_alpha_by_idx( last_lA, last_uA, unstable_idx, start_node) # The first dimension is lower/upper intermediate bound. if last_lA is not None: lb_lower_d = selected_alpha[0] if last_uA is not None: ub_lower_d = selected_alpha[1] if self.alpha_indices is not None: # Sparse alpha on the hwc dimension. We store slopes for unstable neurons in this layer only. # Recover to full alpha first. sparse_alpha_shape = lb_lower_d.shape if lb_lower_d is not None else ub_lower_d.shape full_alpha_shape = sparse_alpha_shape[:-1] + self.shape if lb_lower_d is not None: lb_lower_d = self.reconstruct_full_alpha( lb_lower_d, full_alpha_shape, self.alpha_indices) if ub_lower_d is not None: ub_lower_d = self.reconstruct_full_alpha( ub_lower_d, full_alpha_shape, self.alpha_indices) lb_lower_d, ub_lower_d, zero_coeffs = self._relu_mask_alpha(lower, upper, lb_lower_d, ub_lower_d, leaky_alpha=self.leaky_alpha) self.zero_backward_coeffs_l = self.zero_backward_coeffs_u = zero_coeffs flag_expand = True # we already have the spec dimension. if self.relu_options == "same-slope": # same-slope with optimized lower_d # We force upper_d to be the same as lower_d, and compute the corresponding upper_b lb_upper_d, lb_upper_b, ub_upper_d, ub_upper_b = self._relu_upper_opt_same_slope(lb_lower_d, ub_lower_d, upper_d, lower, upper) else: # FIXME: the shape can be incorrect if unstable_idx is not None. # This will cause problem if some ReLU layers are optimized, some are not. lower_d = self._relu_lower_bound_init(upper_d) # Upper bound always needs an extra specification dimension, since they only depend on lb and ub. upper_d = upper_d.unsqueeze(0) upper_b = upper_b.unsqueeze(0) if not flag_expand: # FIXME: The following lines seem unused since # flag_expand must be true when self.optstage in ['opt, 'reuse'] if self.opt_stage in ['opt', 'reuse']: # We have different slopes for lower and upper bounds propagation. lb_lower_d = lb_lower_d.unsqueeze(0) if last_lA is not None else None ub_lower_d = ub_lower_d.unsqueeze(0) if last_uA is not None else None if self.relu_options == "same-slope": upper_d = None lb_upper_d = lb_upper_d.unsqueeze(0) if last_lA is not None else None lb_upper_b = lb_upper_b.unsqueeze(0) if last_lA is not None else None ub_upper_d = ub_upper_d.unsqueeze(0) if last_uA is not None else None ub_upper_b = ub_upper_b.unsqueeze(0) if last_uA is not None else None else: lower_d = lower_d.unsqueeze(0) if self.opt_stage in ['opt', 'reuse'] and self.relu_options == "same-slope": # Remove upper_d and upper_b to avoid confusion later upper_d = None upper_b = None return (upper_d, upper_b, lower_d, lower_b, lb_lower_d, ub_lower_d, lb_upper_d, ub_upper_d, lb_upper_b, ub_upper_b, alpha_lookup_idx) def interval_propagate(self, *v): h_L, h_U = v[0][0], v[0][1] return self.forward(h_L), self.forward(h_U) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): if self.leaky_alpha > 0: raise NotImplementedError # e.g., last layer input gurobi vars (8,16,16) gvars_array = np.array(v[0]) this_layer_shape = gvars_array.shape assert gvars_array.shape == self.output_shape[1:] pre_lbs = self.inputs[0].lower.cpu().detach().numpy().reshape(-1) pre_ubs = self.inputs[0].upper.cpu().detach().numpy().reshape(-1) new_layer_gurobi_vars = [] relu_integer_vars = [] new_relu_layer_constrs = [] # predefined zero variable shared in the whole solver model zero_var = model.getVarByName("zero") for neuron_idx, pre_var in enumerate(gvars_array.reshape(-1)): pre_ub = pre_ubs[neuron_idx] pre_lb = pre_lbs[neuron_idx] if pre_lb >= 0: # ReLU is always passing var = pre_var elif pre_ub <= 0: var = zero_var else: ub = pre_ub var = model.addVar(ub=ub, lb=0, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'ReLU{self.name}_{neuron_idx}') if model_type == "mip" or model_type == "lp_integer": # binary indicator if model_type == "mip": a = model.addVar(vtype=grb.GRB.BINARY, name=f'aReLU{self.name}_{neuron_idx}') elif model_type == "lp_integer": a = model.addVar(ub=1, lb=0, vtype=grb.GRB.CONTINUOUS, name=f'aReLU{self.name}_{neuron_idx}') relu_integer_vars.append(a) new_relu_layer_constrs.append( model.addConstr(pre_var - pre_lb * (1 - a) >= var, name=f'ReLU{self.name}_{neuron_idx}_a_0')) new_relu_layer_constrs.append( model.addConstr(var >= pre_var, name=f'ReLU{self.name}_{neuron_idx}_a_1')) new_relu_layer_constrs.append( model.addConstr(pre_ub * a >= var, name=f'ReLU{self.name}_{neuron_idx}_a_2')) elif model_type == "lp": new_relu_layer_constrs.append( model.addConstr(var >= pre_var, name=f'ReLU{self.name}_{neuron_idx}_a_0')) new_relu_layer_constrs.append(model.addConstr( pre_ub * pre_var - (pre_ub - pre_lb) * var >= pre_ub * pre_lb, name=f'ReLU{self.name}_{neuron_idx}_a_1')) else: print(f"gurobi model type {model_type} not supported!") new_layer_gurobi_vars.append(var) new_layer_gurobi_vars = np.array(new_layer_gurobi_vars).reshape(this_layer_shape).tolist() if model_type in ["mip", "lp_integer"]: self.integer_vars = relu_integer_vars self.solver_vars = new_layer_gurobi_vars self.solver_constrs = new_relu_layer_constrs model.update() def build_gradient_node(self, grad_upstream): if self.leaky_alpha > 0: raise NotImplementedError node_grad = ReLUGrad() grad_input = (grad_upstream, self.inputs[0].forward_value) # An extra node is needed to consider the state of ReLU activation grad_extra_nodes = [self.inputs[0]] return [(node_grad, grad_input, grad_extra_nodes)] def get_split_mask(self, lower, upper, input_index): assert input_index == 0 return torch.logical_and(lower < 0, upper > 0) # Return unstable mask to determine which neuron should use constraints_solving concretization def get_unstable_mask(self, lower, upper): """Return a mask to indicate if each neuron is unstable. 0: Stable (linear) neuron; 1: unstable (nonlinear) neuron. """ return torch.logical_and(lower < 0, upper > 0) # Return heuristic to select which neuron should use constraints_solving concretization def compute_bound_improvement_heuristics(self, lower, upper): """Return a heuristic score for each lower-upper bound pair. It indicates the possible bound improvement for each neuron. We will then choose if a neuron's bound needs further tightened based on the heuristic """ # This heuristic is actually BaBSR-interception-only. return (-lower * upper).clamp(min=0) / (upper - lower + 1e-8).abs() class BoundLeakyRelu(BoundRelu): pass class BoundSign(BoundActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.splittable = True def forward(self, x): return torch.sign(x) def bound_relax(self, x, init=False): if init: self.init_linear_relaxation(x) mask_0 = torch.logical_and(x.lower == 0, x.upper == 0) mask_pos_0 = torch.logical_and(x.lower == 0, x.upper > 0) mask_neg_0 = torch.logical_and(x.lower < 0, x.upper == 0) mask_pos = x.lower > 0 mask_neg = x.upper < 0 mask_both = torch.logical_not(torch.logical_or(torch.logical_or( mask_0, torch.logical_or(mask_pos, mask_pos_0)), torch.logical_or(mask_neg, mask_neg_0))) self.add_linear_relaxation(mask=mask_0, type='lower', k=0, x0=torch.zeros_like(x.upper, requires_grad=True), y0=0) self.add_linear_relaxation(mask=mask_0, type='upper', k=0, x0=torch.zeros_like(x.upper, requires_grad=True), y0=0) self.add_linear_relaxation(mask=mask_pos_0, type='lower', k=1/x.upper.clamp(min=1e-8), x0=torch.zeros_like(x.upper), y0=0) self.add_linear_relaxation(mask=torch.logical_or(mask_pos_0, mask_pos), type='upper', k=0, x0=torch.zeros_like(x.upper, requires_grad=True), y0=1) self.add_linear_relaxation(mask=torch.logical_or(mask_neg_0, mask_neg), type='lower', k=0, x0=torch.zeros_like(x.upper, requires_grad=True), y0=-1) self.add_linear_relaxation(mask=mask_neg_0, type='upper', k=-1/x.lower.clamp(max=-1e-8), x0=torch.zeros_like(x.upper), y0=0) self.add_linear_relaxation(mask=mask_pos, type='lower', k=0, x0=torch.zeros_like(x.upper, requires_grad=True), y0=1) self.add_linear_relaxation(mask=mask_neg, type='upper', k=0, x0=torch.zeros_like(x.upper, requires_grad=True), y0=-1) self.add_linear_relaxation(mask=mask_both, type='lower', k=0, x0=torch.zeros_like(x.upper, requires_grad=True), y0=-1) self.add_linear_relaxation(mask=mask_both, type='upper', k=0, x0=torch.zeros_like(x.upper, requires_grad=True), y0=1) class SignMergeFunction_loose(torch.autograd.Function): # Modified SignMerge operator. # Change its backward function so that the "gradient" can be used for pgd attack @staticmethod def forward(ctx, input): ctx.save_for_backward(input) output = torch.sign(torch.sign(input) + 1e-1) return output @staticmethod def backward(ctx, grad_output): eps = 5 # should be carefully chosen input, = ctx.saved_tensors grad_input = grad_output.clone() grad_input[abs(input) >= eps] = 0 grad_input /= eps return grad_input class SignMergeFunction_tight(torch.autograd.Function): # Modified SignMerge operator. # Change its backward function so that the "gradient" can be used for pgd attack @staticmethod def forward(ctx, input): ctx.save_for_backward(input) output = torch.sign(torch.sign(input) + 1e-1) return output @staticmethod def backward(ctx, grad_output): eps = 0.1 # should be carefully chosen input, = ctx.saved_tensors grad_input = grad_output.clone() grad_input[abs(input) >= eps] = 0 grad_input /= eps return grad_input class BoundSignMerge(BoundTwoPieceLinear): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.alpha_size = 4 self.loose_function = SignMergeFunction_loose self.tight_function = SignMergeFunction_tight self.signmergefunction = self.tight_function # default def get_unstable_idx(self): self.alpha_indices = torch.logical_and( self.inputs[0].lower < 0, self.inputs[0].upper >= 0).any(dim=0).nonzero(as_tuple=True) def forward(self, x): self.shape = x.shape[1:] return self.signmergefunction.apply(x) def _mask_alpha(self, lower, upper, lb_lower_d, ub_lower_d, lb_upper_d, ub_upper_d): lower_mask = (lower >= 0.).requires_grad_(False).to(lower.dtype) upper_mask = (upper < 0.).requires_grad_(False).to(upper.dtype) no_mask = 1. - (lower_mask + upper_mask) if lb_lower_d is not None: lb_lower_d = torch.min(lb_lower_d, 2/upper.clamp(min=1e-8)) lb_lower_d = torch.clamp(lb_lower_d, min=0) * no_mask lb_upper_d = torch.min(lb_upper_d, -2/lower.clamp(max=-1e-8)) lb_upper_d = torch.clamp(lb_upper_d, min=0) * no_mask if ub_lower_d is not None: ub_lower_d = torch.min(ub_lower_d, 2/upper.clamp(min=1e-8)) ub_lower_d = torch.clamp(ub_lower_d, min=0) * no_mask ub_upper_d = torch.min(ub_upper_d, -2/lower.clamp(max=-1e-8)) ub_upper_d = torch.clamp(ub_upper_d, min=0) * no_mask return lb_lower_d, ub_lower_d, lb_upper_d, ub_upper_d def _backward_relaxation(self, last_lA, last_uA, x, start_node, unstable_idx): if x is not None: lower, upper = x.lower, x.upper else: lower, upper = self.lower, self.upper flag_expand = False ub_lower_d = lb_lower_d = lb_upper_d = ub_upper_d = None alpha_lookup_idx = None # For sparse-spec alpha. if self.opt_stage in ['opt', 'reuse']: # Alpha-CROWN. upper_d = lower_d = None selected_alpha, alpha_lookup_idx = self.select_alpha_by_idx( last_lA, last_uA, unstable_idx, start_node) # The first dimension is lower/upper intermediate bound. if last_lA is not None: lb_lower_d = selected_alpha[0] lb_upper_d = selected_alpha[2] if last_uA is not None: ub_lower_d = selected_alpha[1] ub_upper_d = selected_alpha[3] if self.alpha_indices is not None: # Sparse alpha on the hwc dimension. We store slopes for unstable neurons in this layer only. # Recover to full alpha first. sparse_alpha_shape = lb_lower_d.shape if lb_lower_d is not None else ub_lower_d.shape full_alpha_shape = sparse_alpha_shape[:-1] + self.shape if lb_lower_d is not None: lb_lower_d = self.reconstruct_full_alpha( lb_lower_d, full_alpha_shape, self.alpha_indices) lb_upper_d = self.reconstruct_full_alpha( lb_upper_d, full_alpha_shape, self.alpha_indices) if ub_lower_d is not None: ub_lower_d = self.reconstruct_full_alpha( ub_lower_d, full_alpha_shape, self.alpha_indices) ub_upper_d = self.reconstruct_full_alpha( ub_upper_d, full_alpha_shape, self.alpha_indices) lb_lower_d, ub_lower_d, lb_upper_d, ub_upper_d = self._mask_alpha(lower, upper, lb_lower_d, ub_lower_d, lb_upper_d, ub_upper_d) flag_expand = True # we already have the spec dimension. else: lower_d = torch.zeros_like(upper, requires_grad=True) upper_d = torch.zeros_like(upper, requires_grad=True) mask_pos = (x.lower >= 0.).requires_grad_(False).to(x.lower.dtype) mask_neg = (x.upper < 0.).requires_grad_(False).to(x.upper.dtype) lower_b = (-1 * (1 - mask_pos) + mask_pos).unsqueeze(0) upper_b = (-1 * mask_neg + (1 - mask_neg)).unsqueeze(0) # Upper bound always needs an extra specification dimension, since they only depend on lb and ub. if not flag_expand: if self.opt_stage in ['opt', 'reuse']: # We have different slopes for lower and upper bounds propagation. lb_lower_d = lb_lower_d.unsqueeze(0) if last_lA is not None else None ub_lower_d = ub_lower_d.unsqueeze(0) if last_uA is not None else None lb_upper_d = lb_lower_d.unsqueeze(0) if last_lA is not None else None ub_upper_d = ub_lower_d.unsqueeze(0) if last_uA is not None else None else: lower_d = lower_d.unsqueeze(0) upper_d = upper_d.unsqueeze(0) return (upper_d, upper_b, lower_d, lower_b, lb_lower_d, ub_lower_d, lb_upper_d, ub_upper_d, None, None, alpha_lookup_idx) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): # e.g., last layer input gurobi vars (8,16,16) gvars_array = np.array(v[0]) this_layer_shape = gvars_array.shape assert gvars_array.shape == self.output_shape[1:] pre_lbs = self.inputs[0].lower.cpu().detach().numpy().reshape(-1) pre_ubs = self.inputs[0].upper.cpu().detach().numpy().reshape(-1) new_layer_gurobi_vars = [] integer_vars = [] layer_constrs = [] # predefined zero variable shared in the whole solver model one_var = model.getVarByName("one") neg_one_var = model.getVarByName("neg_one") for neuron_idx, pre_var in enumerate(gvars_array.reshape(-1)): pre_ub = pre_ubs[neuron_idx] pre_lb = pre_lbs[neuron_idx] if pre_lb >= 0: var = one_var elif pre_ub < 0: var = neg_one_var else: ub = pre_ub var = model.addVar(ub=ub, lb=pre_lb, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'Sign{self.name}_{neuron_idx}') a = model.addVar(vtype=grb.GRB.BINARY, name=f'aSign{self.name}_{neuron_idx}') integer_vars.append(a) layer_constrs.append( model.addConstr(pre_lb * a <= pre_var, name=f'Sign{self.name}_{neuron_idx}_a_0')) layer_constrs.append( model.addConstr(pre_ub * (1 - a) >= pre_var, name=f'Sign{self.name}_{neuron_idx}_a_1')) layer_constrs.append( model.addConstr(var == 1 - 2*a, name=f'Sign{self.name}_{neuron_idx}_a_2')) new_layer_gurobi_vars.append(var) new_layer_gurobi_vars = np.array(new_layer_gurobi_vars).reshape(this_layer_shape).tolist() if model_type in ["mip", "lp_integer"]: self.integer_vars = integer_vars self.solver_vars = new_layer_gurobi_vars self.solver_constrs = layer_constrs model.update() def relu_grad(preact): return (preact > 0).float() class ReLUGradOp(Function): """ Local gradient of ReLU. Not including multiplication with gradients from other layers. """ @staticmethod def symbolic(_, g, g_relu, g_relu_rev, preact): return _.op('grad::Relu', g, g_relu, g_relu_rev, preact).setType(g.type()) @staticmethod def forward(ctx, g, g_relu, g_relu_rev, preact): return g * relu_grad(preact) class ReLUGrad(Module): def forward(self, g, preact): g_relu = F.relu(g) g_relu_rev = -F.relu(-g) return ReLUGradOp.apply(g, g_relu, g_relu_rev, preact) # FIXME reuse the function from auto_LiRPA.patches def _maybe_unfold(d_tensor, last_A): if d_tensor is None: return None #[batch, out_dim, in_c, in_H, in_W] d_shape = d_tensor.size() # Reshape to 4-D tensor to unfold. #[batch, out_dim*in_c, in_H, in_W] d_tensor = d_tensor.view(d_shape[0], -1, *d_shape[-2:]) # unfold the slope matrix as patches. # Patch shape is [batch, out_h, out_w, out_dim*in_c, H, W). d_unfolded = inplace_unfold( d_tensor, kernel_size=last_A.patches.shape[-2:], stride=last_A.stride, padding=last_A.padding) # Reshape to [batch, out_H, out_W, out_dim, in_C, H, W] d_unfolded_r = d_unfolded.view( *d_unfolded.shape[:3], d_shape[1], *d_unfolded.shape[-2:]) if last_A.unstable_idx is not None: if len(last_A.unstable_idx) == 4: # [batch, out_H, out_W, out_dim, in_C, H, W] # to [out_H, out_W, batch, out_dim, in_C, H, W] d_unfolded_r = d_unfolded_r.permute(1, 2, 0, 3, 4, 5, 6) d_unfolded_r = d_unfolded_r[ last_A.unstable_idx[2], last_A.unstable_idx[3]] else: raise NotImplementedError # For sparse patches, the shape after unfold is # (unstable_size, batch_size, in_c, H, W). # For regular patches, the shape after unfold is # (spec, batch, out_h, out_w, in_c, H, W). return d_unfolded_r class BoundReluGrad(BoundActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.requires_input_bounds = [3] self.recurjac = options.get('recurjac', False) @staticmethod def relu_grad(preact): return (preact > 0).float() def forward(self, g, g_relu, g_relu_rev, preact): if g.ndim == preact.ndim + 1: preact = preact.unsqueeze(1) return g * relu_grad(preact) def interval_propagate(self, *v): g_lower, g_upper = v[0] preact_lower, preact_upper = v[3] relu_grad_lower = relu_grad(preact_lower) relu_grad_upper = relu_grad(preact_upper) if g_lower.ndim == relu_grad_lower.ndim + 1: relu_grad_lower = relu_grad_lower.unsqueeze(1) relu_grad_upper = relu_grad_upper.unsqueeze(1) lower = torch.min(g_lower * relu_grad_lower, g_lower * relu_grad_upper) upper = torch.max(g_upper * relu_grad_lower, g_upper * relu_grad_upper) return lower, upper def bound_backward(self, last_lA, last_uA, g, g_relu, g_relu_rev, preact, **kwargs): mask_active = (preact.lower > 0).float() mask_inactive = (preact.upper < 0).float() mask_unstable = 1 - mask_active - mask_inactive if self.recurjac and self.inputs[0].perturbed: upper_grad = preact.upper >= 0 lower_interval = self.inputs[0].lower * upper_grad upper_interval = self.inputs[0].upper * upper_grad else: lower_interval = upper_interval = None def _bound_oneside(last_A, pos_interval=None, neg_interval=None): if last_A is None: return None, None, None, 0 if isinstance(last_A, torch.Tensor): if self.recurjac and self.inputs[0].perturbed: mask_unstable_grad = ( (self.inputs[0].lower < 0) * (self.inputs[0].upper > 0)) last_A_unstable = last_A * mask_unstable_grad bias = ( last_A_unstable.clamp(min=0) * pos_interval + last_A_unstable.clamp(max=0) * neg_interval) bias = bias.reshape( bias.shape[0], bias.shape[1], -1).sum(dim=-1) last_A = last_A * torch.logical_not(mask_unstable_grad) else: bias = 0 A = last_A * mask_active A_pos = last_A.clamp(min=0) * mask_unstable A_neg = last_A.clamp(max=0) * mask_unstable return A, A_pos, A_neg, bias elif isinstance(last_A, Patches): last_A_patches = last_A.patches if self.recurjac and self.inputs[0].perturbed: mask_unstable_grad = ( (self.inputs[0].lower < 0) * (self.inputs[0].upper > 0)) mask_unstable_grad_unfold = _maybe_unfold( mask_unstable_grad, last_A) last_A_unstable = ( last_A.to_matrix(mask_unstable_grad.shape) * mask_unstable_grad) bias = ( last_A_unstable.clamp(min=0) * pos_interval + last_A_unstable.clamp(max=0) * neg_interval) # FIXME Clean up patches. This implementation does not seem # to support general shapes. assert bias.ndim == 5 bias = bias.sum(dim=[-1, -2, -3]).view(-1, 1) last_A_patches = ( last_A_patches * torch.logical_not(mask_unstable_grad_unfold)) else: bias = 0 # need to unfold mask_active and mask_unstable # [batch, 1, in_c, in_H, in_W] mask_active_unfold = _maybe_unfold(mask_active, last_A) mask_unstable_unfold = _maybe_unfold(mask_unstable, last_A) # [spec, batch, 1, in_c, in_H, in_W] mask_active_unfold = mask_active_unfold.expand(last_A.shape) mask_unstable_unfold = mask_unstable_unfold.expand(last_A.shape) A = Patches( last_A_patches * mask_active_unfold, last_A.stride, last_A.padding, last_A.shape, last_A.identity, last_A.unstable_idx, last_A.output_shape) A_pos_patches = last_A_patches.clamp(min=0) * mask_unstable_unfold A_neg_patches = last_A_patches.clamp(max=0) * mask_unstable_unfold A_pos = Patches( A_pos_patches, last_A.stride, last_A.padding, last_A.shape, last_A.identity, last_A.unstable_idx, last_A.output_shape) A_neg = Patches( A_neg_patches, last_A.stride, last_A.padding, last_A.shape, last_A.identity, last_A.unstable_idx, last_A.output_shape) return A, A_pos, A_neg, bias lA, lA_pos, lA_neg, lbias = _bound_oneside( last_lA, pos_interval=lower_interval, neg_interval=upper_interval) uA, uA_pos, uA_neg, ubias = _bound_oneside( last_uA, pos_interval=upper_interval, neg_interval=lower_interval) return ( [(lA, uA), (lA_neg, uA_pos), (lA_pos, uA_neg), (None, None)], lbias, ubias) ================================================ FILE: auto_LiRPA/operators/reshape.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from torch.nn import Module from .base import * from ..patches import Patches, patches_to_matrix from .linear import BoundLinear from .constant import BoundConstant class BoundReshape(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) # It can be set to `view`, so that `view` instead of `reshape` will be used. self.option = options.get('reshape', 'reshape') def forward(self, x, shape): shape = list(shape) for i in range(len(shape)): if shape[i] == -1: shape[i] = prod(x.shape) // int(prod(shape[:i]) * prod(shape[(i + 1):])) self.shape = shape if self.option == 'view': return x.contiguous().view(shape) else: return x.reshape(shape) def bound_backward(self, last_lA, last_uA, x, shape, **kwargs): def _bound_oneside(A): if A is None: return None if type(A) == Patches: # output shape should be [batch, in_c, in_H, in_W] since it's followed by Conv2d assert len(self.output_shape) == 4 if type(self.inputs[0]) == BoundLinear: # Save the shape and it will be converted to matrix in Linear layer. return A.create_similar(input_shape=self.output_shape) if A.unstable_idx is None: patches = A.patches # non-sparse: [batch, out_dim, out_c, out_H, out_W, out_dim, in_c, H, W] # [out_dim*out_c, batch, out_H, out_W, out_dim*in_c, H, W] # expected next_A shape [batch, spec, in_c, in_H , in_W]. next_A = patches_to_matrix( pieces=patches, input_shape=self.output_shape, stride=A.stride, padding=A.padding) else: # sparse: [spec, batch, in_c, patch_H, patch_W] (specs depends on the number of unstable neurons). patches = A.patches # expected next_A shape [batch, spec, input_c, in_H, in_W]. next_A = patches_to_matrix( pieces=patches, input_shape=self.output_shape, stride=A.stride, padding=A.padding, output_shape=A.output_shape, unstable_idx=A.unstable_idx) # Reshape it to [spec, batch, *input_shape] (input_shape is the shape before Reshape operation). return next_A.transpose(0, 1).reshape(-1, A.shape[1], *self.input_shape[1:]) else: return A.reshape(A.shape[0], A.shape[1], *self.input_shape[1:]) #FIXME check reshape or view return [(_bound_oneside(last_lA), _bound_oneside(last_uA)), (None, None)], 0, 0 def bound_forward(self, dim_in, x, shape): batch_size = x.lw.shape[0] lw = x.lw.reshape(batch_size, dim_in, *self.shape[1:]) uw = x.uw.reshape(batch_size, dim_in, *self.shape[1:]) lb = x.lb.reshape(batch_size, *self.shape[1:]) ub = x.ub.reshape(batch_size, *self.shape[1:]) return LinearBound(lw, lb, uw, ub) def bound_dynamic_forward(self, x, shape, max_dim=None, offset=0): w = x.lw.reshape(x.lw.shape[0], x.lw.shape[1], *self.shape[1:]) b = x.lb.reshape(x.lb.shape[0], *self.shape[1:]) return LinearBound(w, b, w, b, x_L=x.x_L, x_U=x.x_U, tot_dim=x.tot_dim) def interval_propagate(self, *v): return Interval.make_interval( self.forward(v[0][0], v[1][0]), self.forward(v[0][1], v[1][0]), v[0]) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): if isinstance(v[0], Tensor): self.solver_vars = self.forward(*v) return gvar_array = np.array(v[0]) gvar_array = gvar_array.reshape(v[1].detach().cpu().numpy())[0] self.solver_vars = gvar_array.tolist() def build_gradient_node(self, grad_upstream): node_grad = ReshapeGrad() grad_input = (grad_upstream, self.inputs[0].forward_value) return [(node_grad, grad_input, [])] class BoundUnsqueeze(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.use_default_ibp = True if 'axes' in attr: self.axes = attr['axes'] assert len(self.axes) == 1 self.axes = self.axes[0] else: self.axes = None def forward(self, *x): data = x[0] if self.axes is not None: axes = self.axes else: axes = x[1].item() self.axes = axes return data.unsqueeze(axes) def bound_backward(self, last_lA, last_uA, *x, **kwargs): if self.axes is not None: axes = self.make_axis_non_negative(self.axes, 'output') else: axes = self.make_axis_non_negative(x[1].value.item(), 'output') if axes == 0: raise ValueError("Unsqueezing with axes == 0 is not allowed") else: def squeeze_A(last_A): if type(last_A) == Patches: return Patches( last_A.patches.squeeze(axes - 5), last_A.stride, last_A.padding, last_A.shape, last_A.identity, last_A.unstable_idx, last_A.output_shape) elif last_A is not None: return last_A.squeeze(axes + 1) else: return None lA = squeeze_A(last_lA) uA = squeeze_A(last_uA) return [(lA, uA), (None, None)], 0, 0 def bound_forward(self, dim_in, *x): axes = self.make_axis_non_negative( self.axes if self.axes is not None else x[1].lb.item(), 'output') x = x[0] if len(self.input_shape) == 0: lw, lb = x.lw.unsqueeze(1), x.lb.unsqueeze(0) uw, ub = x.uw.unsqueeze(1), x.ub.unsqueeze(0) else: lw, lb = x.lw.unsqueeze(axes + 1), x.lb.unsqueeze(axes) uw, ub = x.uw.unsqueeze(axes + 1), x.ub.unsqueeze(axes) return LinearBound(lw, lb, uw, ub) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(v[0]) def build_gradient_node(self, grad_upstream): axes = self.make_axis_non_negative(self.axes, 'output') if axes == 0: raise ValueError("Unsqueezing with axes == 0 is not allowed") node_grad = UnsqueezeGrad(axes) return [(node_grad, (grad_upstream,), [])] class UnsqueezeGrad(Module): def __init__(self, axes): super().__init__() self.axes = axes def forward(self, grad_last): return grad_last.squeeze(self.axes + 1) class BoundExpand(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.use_default_ibp = True def forward(self, x, y): y = y.clone() assert y.ndim == 1 n, m = x.ndim, y.shape[0] assert n <= m for i in range(n): if y[m - n + i] == 1: y[m - n + i] = x.shape[i] else: assert x.shape[i] == 1 or x.shape[i] == y[m - n + i] return x.expand(*list(y)) def bound_backward(self, last_lA, last_uA, *x, **kwargs): assert not self.is_input_perturbed(1) # Although torch.expand supports prepending dimensions, # bound computatiion doesn't since we must always keep # the batch dimension at the beginning assert ( len(x[0].output_shape) == len(self.output_shape) ), "BoundExpand with changed ndim is not supported by bound computation" n = len(self.output_shape) def _bound_oneside(A): if A is None: return None dims_to_sum = [i + 1 for i in range(1, n) if x[0].output_shape[i] == 1 and A.shape[i + 1] > 1] return A.sum(dim=dims_to_sum, keepdim=True) if dims_to_sum else A return [(_bound_oneside(last_lA), _bound_oneside(last_uA)), (None, None)], 0, 0 def bound_forward(self, dim_in, *x): # It doesn't support general Expand operator. # This is just for the Expand operator converted from torch.repeat, and here # it should just be an identical operator. shape = x[1].lb if not (len(x[0].lb.shape) == len(shape) and (shape == 1).all()): raise NotImplementedError("General onnx::Expand is not supported") return x[0] def build_gradient_node(self, grad_upstream): shape = self.inputs[1].forward_value if not (len(self.inputs[0].output_shape) == len(shape) and (shape == 1).all()): raise NotImplementedError("General onnx::Expand is not supported") return [(ExpandGrad(shape), (grad_upstream,), []), None] class ExpandGrad(Module): # It doesn't support general Expand operator. # This is just for the Expand operator converted from torch.repeat, and here # it should just be an identical operator. def __init__(self, shape): super().__init__() self.shape = shape def forward(self, grad_last): return grad_last class BoundSqueeze(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.use_default_ibp = True if 'axes' in attr: self.axes = attr['axes'] assert len(self.axes) == 1 self.axes = self.axes[0] else: self.axes = None def forward(self, *x): data = x[0] if self.axes is not None: axes = self.axes else: axes = x[1].item() return data.squeeze(axes) def bound_backward(self, last_lA, last_uA, *x, **kwargs): if self.axes is not None: axes = self.axes else: axes = self.make_axis_non_negative(x[1].value.item(), 'input') if axes == 0: raise ValueError("Squeezing with axes == 0 is not allowed") return [(last_lA.unsqueeze(axes + 1) if last_lA is not None else None, last_uA.unsqueeze(axes + 1) if last_uA is not None else None), (None, None)], 0, 0 def bound_forward(self, dim_in, *x): if self.axes is not None: axes = self.axes else: axes = self.make_axis_non_negative(x[1].lb.item(), 'input') x = x[0] return LinearBound( x.lw.squeeze(axes + 1), x.lb.squeeze(axes), x.uw.squeeze(axes + 1), x.ub.squeeze(axes) ) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(v[0]) class BoundFlatten(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.use_default_ibp = True self.axis = attr['axis'] def forward(self, x): return torch.flatten(x, self.axis) def bound_backward(self, last_lA, last_uA, x, **kwargs): def _bound_oneside(A): if A is None: return None return A.reshape(A.shape[0], A.shape[1], *self.input_shape[1:]) return [(_bound_oneside(last_lA), _bound_oneside(last_uA)), (None, None)], 0, 0 def bound_dynamic_forward(self, x, max_dim=None, offset=0): w = torch.flatten(x.lw, self.axis + 1) b = torch.flatten(x.lb, self.axis) return LinearBound(w, b, w, b, x_L=x.x_L, x_U=x.x_U, tot_dim=x.tot_dim) def bound_forward(self, dim_in, x): self.axis = self.make_axis_non_negative(self.axis) assert self.axis > 0 return LinearBound( torch.flatten(x.lw, self.axis + 1), torch.flatten(x.lb, self.axis), torch.flatten(x.uw, self.axis + 1), torch.flatten(x.ub, self.axis), ) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): # e.g., v[0] input shape (16, 8, 8) => output shape (1024,) self.solver_vars = np.array(v[0]).reshape(-1).tolist() model.update() def build_gradient_node(self, grad_upstream): node_grad = ReshapeGrad() grad_input = (grad_upstream, self.inputs[0].forward_value) return [(node_grad, grad_input, [])] class BoundATenUnflatten(BoundReshape): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) def forward(self, x, dim, sizes): self.dim = dim.item() self.sizes = sizes.tolist() fval = torch.unflatten(x, self.dim, self.sizes) self.shape = fval.shape return fval def bound_backward(self, last_lA, last_uA, *x, **kwargs): A, lbias, ubias = super().bound_backward(last_lA, last_uA, x[0], shape=None, kwargs=kwargs) # One more input for Unflatten A.append((None, None)) return A, lbias, ubias def bound_forward(self, dim_in, *x): return super().bound_forward(dim_in=dim_in, x=x[0], shape=None) def bound_dynamic_forward(self, *x, max_dim=None, offset=0): return super().bound_dynamic_forward(x=x[0], shape=None, max_dim=max_dim, offset=offset) def interval_propagate(self, x, dim, sizes): return Interval.make_interval( self.forward(x[0], dim[0], sizes[0]), self.forward(x[1], dim[0], sizes[0]), x) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): shape = torch.tensor(v[0].shape[0], *self.shape[1:]) return super().build_solver((v[0], shape), model=model, C=C, model_type=model_type, solver_pkg=solver_pkg) class ReshapeGrad(Module): def forward(self, grad_last, inp): if grad_last.numel() == inp.numel(): return grad_last.reshape(grad_last.shape[0], *inp.shape[1:]) else: return grad_last.reshape(*grad_last.shape[:2], *inp.shape[1:]) class BoundTranspose(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.perm = attr['perm'] self.perm_inv_inc_one = [-1] * (len(self.perm) + 1) self.perm_inv_inc_one[0] = 0 for i in range(len(self.perm)): self.perm_inv_inc_one[self.perm[i] + 1] = i + 1 self.use_default_ibp = True self.ibp_intermediate = True def forward(self, x): return x.permute(*self.perm) def bound_backward(self, last_lA, last_uA, x, **kwargs): def _bound_oneside(last_A): if last_A is None: return None return last_A.permute(self.perm_inv_inc_one) return [(_bound_oneside(last_lA), _bound_oneside(last_uA))], 0, 0 def bound_forward(self, dim_in, x): if self.input_shape[0] != 1: perm = [0] + [(p + 1) for p in self.perm] else: assert (self.perm[0] == 0) perm = [0, 1] + [(p + 1) for p in self.perm[1:]] lw, lb = x.lw.permute(*perm), x.lb.permute(self.perm) uw, ub = x.uw.permute(*perm), x.ub.permute(self.perm) return LinearBound(lw, lb, uw, ub) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(*v) def build_gradient_node(self, grad_upstream): node_grad = TransposeGrad(self.perm_inv_inc_one) grad_input = (grad_upstream,) return [(node_grad, grad_input, [])] class TransposeGrad(Module): def __init__(self, perm_inv): super().__init__() self.perm_inv = perm_inv def forward(self, grad_last): return grad_last.permute(*self.perm_inv) ================================================ FILE: auto_LiRPA/operators/resize.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Resize operator """ import torch from .base import * import numpy as np from .solver_utils import grb from ..patches import unify_shape, create_valid_mask, is_shape_used class BoundResize(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) # only support nearest mode for now assert attr["mode"] == "nearest" self.mode = attr["mode"] self.scale_factor = None def forward(self, x, size=None, scale_factor=None): # currently, forwarding size is not supported. assert isinstance(size, torch.Tensor) and len(size.tolist()) == 0 # currently, only support enlarge tensor size by an integer factor. assert len(scale_factor.tolist()) == 4 and np.array([tmp.is_integer() and tmp > 0 for tmp in scale_factor.tolist()]).all() assert (scale_factor[0:2].to(torch.long) == 1).all(), 'only support resize on the H and W dim' self.scale_factor = tuple([int(tmp) for tmp in scale_factor][2:]) if x.ndim == 4: final = F.interpolate( x, None, self.scale_factor, mode=self.mode) else: raise NotImplementedError( "Interpolation in 3D or interpolation with parameter size has not been implmented.") return final def interval_propagate(self, *v): l, u = zip(*v) return Interval.make_interval(self.forward(*l), self.forward(*u), v[0]) def bound_forward(self, dim_in, *inp): x = inp[0] lw, lb, uw, ub = x.lw, x.lb, x.uw, x.ub new_lw, new_lb, new_uw, new_ub = \ torch.nn.functional.upsample(lw, scale_factor=([1] * (lw.ndim - 4)) + list(self.scale_factor), mode=self.mode), \ torch.nn.functional.upsample(lb, scale_factor=([1] * (lb.ndim - 4)) + list(self.scale_factor), mode=self.mode), \ torch.nn.functional.upsample(uw, scale_factor=([1] * (uw.ndim - 4)) + list(self.scale_factor), mode=self.mode), \ torch.nn.functional.upsample(ub, scale_factor=([1] * (ub.ndim - 4)) + list(self.scale_factor), mode=self.mode) return LinearBound( lw = new_lw, lb = new_lb, uw = new_uw, ub = new_ub) def bound_backward(self, last_lA, last_uA, *x, **kwargs): def _bound_oneside(last_A): if last_A is None: return None assert type(last_A) is Patches or last_A.ndim == 5 # in case the kernel size cannot be divided by scale_factor, we round up the shape split_shape = tuple((torch.tensor( last_A.shape)[-2:] / torch.tensor(self.scale_factor)).ceil().to(torch.long).tolist()) new_shape = last_A.shape[:-2] + split_shape if not type(last_A) is Patches: # classical mode is simple to handle by # sum the grid elements by using avg_pool2d with divisor_override=1 return torch.nn.functional.avg_pool2d( last_A.reshape(-1, *last_A.shape[-2:]), kernel_size=self.scale_factor, stride=self.scale_factor, divisor_override=1).reshape(new_shape) else: # for patches mode assert type(last_A) is Patches assert self.scale_factor[0] == self.scale_factor[1] if self.scale_factor[0] == 1: # identity upsampling return last_A if isinstance(last_A.padding, int) and last_A.padding % self.scale_factor[0] == 0 and last_A.stride % self.scale_factor[0] == 0 and last_A.inserted_zeros == 0: # an easy case where patch sliding windows coincides with the nearest sampling scaling windows # in this case, we divide each patch to size of scale_factor sub-matrices, # and sum up each sub-matrices respectively # print(last_A.shape) padding = last_A.shape[-1] % self.scale_factor[-1] new_patches = torch.nn.functional.pad(last_A.patches, (0, padding, 0, padding)) new_patches = torch.nn.functional.avg_pool2d( new_patches.reshape(-1, *new_patches.shape[-2:]), kernel_size=self.scale_factor, stride=self.scale_factor, divisor_override=1).reshape(new_shape) return last_A.create_similar(patches=new_patches, stride=last_A.stride//self.scale_factor[0], padding=last_A.padding//self.scale_factor[0], ) else: """ The following part is created and mainly maintained by Linyi Time complexity = O(A.numel * scale_factor + outH * kerH + outW * kerW + A.numel * kerH * kerW) With Python loop complexity = O(outH + outW + kerH * kerW * scale_factor^2) """ # preparation: unify shape if last_A.padding: padding = unify_shape(last_A.padding) else: padding = (0,0,0,0) # padding = (left, right, top, bottom) if last_A.output_padding: output_padding = unify_shape(last_A.output_padding) else: output_padding = (0,0,0,0) # output_padding = (left, right, top, bottom) """ Step 0: filter out valid entries that maps to real cells of input Like with inserted zeros = 2, [x 0 0 x 0 0 x]. Only "x" cells are kept Borrowed from one_d generation from Conv patches """ one_d_unfolded_r = create_valid_mask(self.output_shape, last_A.patches.device, last_A.patches.dtype, last_A.patches.shape[-2:], last_A.stride, last_A.inserted_zeros, last_A.padding, last_A.output_padding, last_A.unstable_idx) patches = last_A.patches * one_d_unfolded_r """ Step 1: compute the coordinate mapping from patch coordinates to input coordinates Time complexity: O(outH + outW) note: last_A shape is [outC, batch, outH, outW, inC, kerH, kerW] We create H_idx_map and W_idx_map of shape [outH] and [outW] respectively, recording the start idx of row/column for patches at position [.,.,.,.,.,i,j] in H_idx_map[i] and W_idx_map[j] """ ker_size_h, ker_size_w = last_A.shape[-2], last_A.shape[-1] if last_A.unstable_idx is None: # we can get the real output H and W from shape[2] and shape [3] out_h, out_w = last_A.shape[2], last_A.shape[3] else: # it seems to be stored in output_shape out_h, out_w = last_A.output_shape[-2], last_A.output_shape[-1] h_idx_map = torch.arange(0, out_h) * last_A.stride - padding[-2] + output_padding[-2] * last_A.stride h_idx_map = h_idx_map.to(last_A.device) w_idx_map = torch.arange(0, out_w) * last_A.stride - padding[-4] + output_padding[-4] * last_A.stride w_idx_map = w_idx_map.to(last_A.device) r""" Step 2: compute the compressed patches Time complexity: O(outH * kerH + outW * kerW + A.numel * kerH * kerW) Upsampling needs to sum up A cells in scale_factor * scale_factor sub-blocks Example: when scale factor is 2 [ a b c d e f g h ---\ [ a+b+e+f c+d+g+h i j k l ---/ i+j+m+n k+l+o+p] m n o p] In patches mode, we need to sum up cells in each patch accordingly. The summing mechanism could change at different locations. For each spatial dimension, we create a binary sum_mask tensor [outH, ker_size_h, new_ker_size_h] to select the cells to sum up Example: For [a b c d] -> [a+b c+d], with 3x3 patch covering [0..2] and [2..4]. The first patch needs to sum to [a+b c]; the second patch needs to sum to [b c+d] So we have sum_mask [ for patch 1: [[1, 1, 0], (first entry sums up index 0 and 1) [0, 0, 1]]^T, (second entry sums up index 2) for patch 2: [[1, 0, 0], (first entry sums up index 0) [0, 1, 1]]^T (second entry sums up index 1 and 2) ] With the mask, we can now compute the new patches with einsum: [outC, batch, outH, outW, inC, kerH, kerW] * [outH, kerH, new_kerH] -> [outC, batch, outH, outW, inC, new_kerH, kerW] """ tot_scale_fac = ((last_A.inserted_zeros + 1) * self.scale_factor[0], (last_A.inserted_zeros + 1) * self.scale_factor[1]) new_ker_size_h, new_ker_size_w = \ (tot_scale_fac[0] + ker_size_h - 2) // tot_scale_fac[0] + 1, \ (tot_scale_fac[1] + ker_size_w - 2) // tot_scale_fac[1] + 1 min_h_idx, max_h_idx = h_idx_map[0], h_idx_map[-1] + ker_size_h shrank_h_idx = (torch.arange(min_h_idx, max_h_idx) + last_A.inserted_zeros).div(tot_scale_fac[0], rounding_mode='floor') if last_A.unstable_idx is None: # with nonsparse index, create full-sized sum musk for rows ker_h_indexer = torch.arange(0, ker_size_h).to(last_A.device) sum_mask_h = torch.zeros(last_A.shape[2], ker_size_h, new_ker_size_h).to(last_A.device) for i in range(last_A.shape[2]): sum_mask_h[i, ker_h_indexer, \ shrank_h_idx[h_idx_map[i] - min_h_idx: h_idx_map[i] - min_h_idx + ker_size_h] - shrank_h_idx[h_idx_map[i] - min_h_idx]] = 1 # set zero to those in padding area padding_place_mask = (ker_h_indexer + h_idx_map[i] < 0) sum_mask_h[i, padding_place_mask] = 0 else: # with sparse index, create sparse sum musk sum_mask_h = torch.zeros(last_A.shape[0], ker_size_h, new_ker_size_h).to(last_A.device) row_nos = last_A.unstable_idx[1] unstable_loc_indexer = torch.arange(0, row_nos.shape[0]).to(last_A.device) for k in range(ker_size_h): place_in_new_ker = shrank_h_idx[h_idx_map[row_nos] - min_h_idx + k] - shrank_h_idx[h_idx_map[row_nos] - min_h_idx] sum_mask_h[unstable_loc_indexer, k, place_in_new_ker] = 1 # set zero to those in padding area padding_place_mask = (h_idx_map[row_nos] + k < 0) sum_mask_h[padding_place_mask, k] = 0 min_w_idx, max_w_idx = w_idx_map[0], w_idx_map[-1] + ker_size_w shrank_w_idx = (torch.arange(min_w_idx, max_w_idx) + last_A.inserted_zeros).div(tot_scale_fac[1], rounding_mode='floor') if last_A.unstable_idx is None: # with nonsparse index, create full-sized sum musk for columns ker_w_indexer = torch.arange(0, ker_size_w).to(last_A.device) sum_mask_w = torch.zeros(last_A.shape[3], ker_size_w, new_ker_size_w).to(last_A.device) for i in range(last_A.shape[3]): sum_mask_w[i, ker_w_indexer, \ shrank_w_idx[w_idx_map[i] - min_w_idx: w_idx_map[i] - min_w_idx + ker_size_w] - shrank_w_idx[w_idx_map[i] - min_w_idx]] = 1 # set zero to those in padding area padding_place_mask = (ker_w_indexer + w_idx_map[i] < 0) sum_mask_w[i, padding_place_mask] = 0 else: # with sparse index, create sparse sum musk sum_mask_w = torch.zeros(last_A.shape[0], ker_size_w, new_ker_size_w).to(last_A.device) col_nos = last_A.unstable_idx[2] unstable_loc_indexer = torch.arange(0, col_nos.shape[0]).to(last_A.device) for k in range(ker_size_w): place_in_new_ker = shrank_w_idx[w_idx_map[col_nos] - min_w_idx + k] - shrank_w_idx[w_idx_map[col_nos] - min_w_idx] sum_mask_w[unstable_loc_indexer, k, place_in_new_ker] = 1 # set zero to those in padding area padding_place_mask = (w_idx_map[col_nos] + k < 0) sum_mask_w[padding_place_mask, k] = 0 if last_A.unstable_idx is None: # nonsparse aggregation new_patches = torch.einsum("ObhwIij,hix,wjy->ObhwIxy", patches, sum_mask_h, sum_mask_w) else: # sparse aggregation new_patches = torch.einsum("NbIij,Nix,Njy->NbIxy", patches, sum_mask_h, sum_mask_w) """ Step 3: broadcasting the new_patches by repeating elements, since later we would need to apply insert_zeros For example, scale_factor = 3, repeat patch [a,b] to [a,a,a,b,b,b] Time complexity: O(A.numel * scale_factor) """ ext_new_ker_size_h, ext_new_ker_size_w = \ new_ker_size_h * tot_scale_fac[0], new_ker_size_w * tot_scale_fac[1] ext_new_patches = torch.zeros(list(new_patches.shape[:-2]) + [ext_new_ker_size_h, ext_new_ker_size_w], device=new_patches.device) for i in range(ext_new_ker_size_h): for j in range(ext_new_ker_size_w): ext_new_patches[..., i, j] = new_patches[..., i // tot_scale_fac[0], j // tot_scale_fac[1]] """ Step 4: compute new padding, stride, shape, insert_zeros, and output_padding """ # stride should be the same after upsampling, stride is an integer # new_stride = last_A.stride # padding can change much, the beginning should extend by (scale - 1) entries, # the ending should extend by (ext_new_ker_size - ker_size) entries # padding = (left, right, top, bottom) new_padding = (padding[0] + (self.scale_factor[1] - 1) * (last_A.inserted_zeros + 1), padding[1] + ext_new_ker_size_w - ker_size_w, padding[2] + (self.scale_factor[0] - 1) * (last_A.inserted_zeros + 1), padding[3] + ext_new_ker_size_h - ker_size_h) if new_padding[0] == new_padding[1] and new_padding[1] == new_padding[2] and new_padding[2] == new_padding[3]: # simplify to an int new_padding = new_padding[0] # only support uniform scaling on H and W now, i.e., self.scale_factor[0] == self.scale_factor[1] inserted_zeros = tot_scale_fac[0] - 1 # output padding seems not to change # new_output_padding = last_A.output_padding """ Package and create """ # sparse tensor doesn't support einsum which is necessary for subsequent computes, so deprecated # if inserted_zeros >= 3: # # mask unused cells # input_shape = list(self.output_shape) # input_shape[-2], input_shape[-1] = input_shape[-2] // self.scale_factor[-2], \ # input_shape[-1] // self.scale_factor[-1] # one_unfolded = create_valid_mask(input_shape, ext_new_patches.device, # ext_new_patches.dtype, ext_new_patches.shape[-2:], # last_A.stride, inserted_zeros, new_padding, # last_A.output_padding, # last_A.unstable_idx if last_A.unstable_idx else None) # ext_new_patches = (ext_new_patches * one_unfolded).to_sparse() # print the shape change after upsampling, if needed # print(f'After upsampling, ' # f'{last_A.patches.shape} (pad={padding}, iz={last_A.inserted_zeros}, s={last_A.stride}) -> ' # f'{ext_new_patches.shape} (pad={new_padding}, iz={inserted_zeros}, s={last_A.stride})') ret_patches_A = last_A.create_similar(patches=ext_new_patches, padding=new_padding, inserted_zeros=inserted_zeros) if self.input_shape[-2] < ret_patches_A.shape[-2] and self.input_shape[-1] < ret_patches_A.shape[-2] \ and not is_shape_used(ret_patches_A.output_padding): # using matrix mode could be more memory efficient ret_matrix_A = ret_patches_A.to_matrix(self.input_shape) # print(f'After upsampling, to_matrix: {ret_matrix_A.shape}') ret_matrix_A = ret_matrix_A.transpose(0, 1) return ret_matrix_A else: return ret_patches_A last_lA = _bound_oneside(last_lA) last_uA = _bound_oneside(last_uA) return [(last_lA, last_uA), (None, None), (None, None)], 0, 0 ================================================ FILE: auto_LiRPA/operators/rnn.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """RNN.""" from .base import * class BoundRNN(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.complex = True self.output_index = output_index raise NotImplementedError( 'torch.nn.RNN is not supported at this time.' 'Please implement your RNN with torch.nn.RNNCell and a manual for-loop.' 'See an example of LSTM:' 'https://github.com/Verified-Intelligence/auto_LiRPA/blob/10a9b30/examples/sequence/lstm.py#L9') def forward(self, x, weight_input, weight_recurrent, bias, sequence_length, initial_h): assert (torch.sum(torch.abs(initial_h)) == 0) self.input_size = x.shape[-1] self.hidden_size = weight_input.shape[-2] class BoundRNNImpl(nn.Module): def __init__(self, input_size, hidden_size, weight_input, weight_recurrent, bias, output_index): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.cell = torch.nn.RNNCell( input_size=input_size, hidden_size=hidden_size ) self.cell.weight_ih.data.copy_(weight_input.squeeze(0).data) self.cell.weight_hh.data.copy_(weight_recurrent.squeeze(0).data) self.cell.bias_ih.data.copy_((bias.squeeze(0))[:hidden_size].data) self.cell.bias_hh.data.copy_((bias.squeeze(0))[hidden_size:].data) self.output_index = output_index def forward(self, x, hidden): length = x.shape[0] outputs = [] for i in range(length): hidden = self.cell(x[i, :], hidden) outputs.append(hidden.unsqueeze(0)) outputs = torch.cat(outputs, dim=0) if self.output_index == 0: return outputs else: return hidden self.model = BoundRNNImpl( self.input_size, self.hidden_size, weight_input, weight_recurrent, bias, self.output_index) self.input = (x, initial_h) return self.model(*self.input) ================================================ FILE: auto_LiRPA/operators/s_shaped.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """S-shaped base class, activation functions, and relevant ops.""" import torch from torch.nn import Module from torch.autograd import Function from .base import * from .activation_base import BoundOptimizableActivation class BoundSShaped(BoundOptimizableActivation): """ Base class for computing output bounds of globally and partially s-shaped nonlinear functions (e.g., sigmoid, tanh, sin, cos) over given input intervals. """ def __init__(self, attr=None, inputs=None, output_index=0, options=None, activation=(None, None, None), precompute=False): super().__init__(attr, inputs, output_index, options) if options is None: options = {} self.splittable = True self.inverse_s_shape = False self.ibp_intermediate = True self.activation = activation self.activation_name = activation[0] self.act_func = activation[1] self.d_act_func = activation[2] self.step_pre = 0.01 if precompute: self.precompute_relaxation(self.act_func, self.d_act_func) self.precompute_dfunc_values(self.act_func, self.d_act_func) # TODO make them configurable when implementing a general nonlinear activation. # Neurons whose gap between pre-activation bounds is smaller than this # threshold will be masked and don't need branching. self.split_min_gap = 1e-2 # 1e-4 # Neurons whose pre-activation bounds don't overlap with this range # are considered as stable (with values either 0 or 1) and don't need # branching. self.split_range = (self.range_l, self.range_u) # The initialization will be adjusted if the pre-activation bounds are too loose. self.loose_threshold = options.get(self.activation_name, {}).get( 'loose_threshold', None) self.convex_concave = None self.activation_bound_option = options.get('activation_bound_option', 'adaptive') self.inflections = [0.] self.extremes = [] self.sigmoid_like_mask = None # FIXME: Smoothness enhancement for s-shaped functions should be enabled by default. # This enhancement makes the linear bounds change smoothly between different cases. # We provide this option only to reproduce results from previous papers. self.disable_smoothness_enhancement = options.get( 's_shaped_disable_smoothness_enhancement', False) def opt_init(self): super().opt_init() self.tp_both_lower_init = {} self.tp_both_upper_init = {} def branch_input_domain(self, lb, ub): # For functions that are only partially s-shaped, such as sin and cos, the non-s-shaped intervals are identified # and masked here. sigmoid_like_mask marks the strictly s-shaped intervals, and branch_mask marks the non-s- # shaped ones. For globally s-shaped functions like tanh and sigmoid, sigmoid_like_mask stores all 1s and # branch_mask stores all 0s. self.sigmoid_like_mask = torch.ones_like(lb, dtype=torch.bool) self.branch_mask = torch.zeros_like(lb, dtype=torch.bool) def _init_opt_parameters_impl(self, size_spec, name_start, num_params=10): """Implementation of init_opt_parameters for each start_node.""" l, u = self.inputs[0].lower, self.inputs[0].upper shape = l.shape # Alpha dimension is (num_params, output_shape, batch, *shape) for the s-shaped activation function. alpha = torch.empty(num_params, size_spec, *shape, device=l.device) alpha.data[:4] = (l + u) / 2 alpha.data[4:6] = self.tp_both_lower_init[name_start] alpha.data[6:8] = self.tp_both_upper_init[name_start] if num_params > 8: alpha.data[8:] = 0 return alpha @torch.no_grad() def precompute_relaxation(self, func, dfunc, x_limit=500): """ This function precomputes the tangent lines that will be used as lower/upper bounds for S-shaped functions centered at 0 along the x-axis. """ self.x_limit = x_limit self.num_points_pre = int(self.x_limit / self.step_pre) max_iter = 100 logger.debug('Precomputing relaxation for %s (pre-activation limit: %f)', self.__class__.__name__, x_limit) def check_lower(upper, d): """Given two points upper, d (d <= upper), check if the slope at d will be less than f(upper) at upper.""" k = dfunc(d) # Return True if the slope is a lower bound. return k * (upper - d) + func(d) <= func(upper) def check_upper(lower, d): """Given two points lower, d (d >= lower), check if the slope at d will be greater than f(lower) at lower.""" k = dfunc(d) # Return True if the slope is a upper bound. return k * (lower - d) + func(d) >= func(lower) # Given an upper bound point (>=0), find a line that is guaranteed to be a lower bound of this function. upper = self.step_pre * torch.arange(0, self.num_points_pre + 5, device=self.device) r = torch.zeros_like(upper) # Initial guess, the tangent line is at -1. l = -torch.ones_like(upper) while True: # Check if the tangent line at the guessed point is an lower bound at f(upper). checked = check_lower(upper, l).int() # If the initial guess is not smaller enough, then double it (-2, -4, etc). l = checked * l + (1 - checked) * (l * 2) if checked.sum() == l.numel(): break # Now we have starting point at l, its tangent line is guaranteed to be an lower bound at f(upper). # We want to further tighten this bound by moving it closer to 0. for _ in range(max_iter): # Binary search. m = (l + r) / 2 checked = check_lower(upper, m).int() l = checked * m + (1 - checked) * l r = checked * r + (1 - checked) * m # At upper, a line with slope l is guaranteed to lower bound the function. self.d_lower = l.clone() # Do the same again: # Given an lower bound point (<=0), find a line that is guaranteed to be an upper bound of this function. lower = -self.step_pre * torch.arange(0, self.num_points_pre + 5, device=self.device) l = torch.zeros_like(upper) r = torch.ones_like(upper) while True: checked = check_upper(lower, r).int() r = checked * r + (1 - checked) * (r * 2) if checked.sum() == l.numel(): break for _ in range(max_iter): m = (l + r) / 2 checked = check_upper(lower, m).int() l = (1 - checked) * m + checked * l r = (1 - checked) * r + checked * m self.d_upper = r.clone() logger.debug('Done') def precompute_dfunc_values(self, func, dfunc, x_limit=500): """ This function precomputes a list of values for dfunc. """ upper = self.step_pre * torch.arange(0, self.num_points_pre + 5, device=self.device) self.dfunc_values = dfunc(upper) def forward(self, x): return self.act_func(x) def retrieve_from_precompute(self, precomputed_d, input_bound, default_d): """ precomputed_d: The precomputed tangent points. input_bound: The input bound of the function. default_d: If input bound goes out of precompute range, we will use default_d. All of the inputs should share the same shape. """ # divide input bound into number of steps to the inflection point (at x=0) index = torch.max( torch.zeros(input_bound.numel(), dtype=torch.long, device=input_bound.device), (input_bound / self.step_pre).to(torch.long).reshape(-1) ) + 1 # If precompute range is smaller than input, tangent points will be taken from default. # The default value should be a guaranteed bound if index.max() >= precomputed_d.numel(): warnings.warn(f'Pre-activation bounds are too loose for {self}') return torch.where( (index < precomputed_d.numel()).view(input_bound.shape), torch.index_select( precomputed_d, 0, index.clamp(max=precomputed_d.numel() - 1) ).view(input_bound.shape), default_d, ).view(input_bound.shape) else: return torch.index_select(precomputed_d, 0, index).view(input_bound.shape) def generate_d_lower_upper(self, lower, upper): # Indices of neurons with input upper bound >=0, whose optimal slope to # lower bound the function was pre-computed. # Note that for neurons with also input lower bound >=0, # they will be masked later. d_lower = self.retrieve_from_precompute(self.d_lower, upper, lower) # Indices of neurons with lower bound <=0, whose optimal slope to upper # bound the function was pre-computed. d_upper = self.retrieve_from_precompute(self.d_upper, -lower, upper) return d_lower, d_upper def retrieve_d_from_k(self, k, func): d_indices = torch.searchsorted(torch.flip(self.dfunc_values, [0]), k, right=False) d_indices = self.num_points_pre - d_indices + 4 d_left = d_indices * self.step_pre d_right = d_left + self.step_pre y_left = func(d_left) y_right = func(d_right) k_left = self.dfunc_values[d_indices] k_right = self.dfunc_values[torch.clamp(d_indices+1, max=self.dfunc_values.shape[0]-1)] # We choose the intersection of two tangent lines d_return = (k_left * d_left - k_right * d_right - y_left + y_right) / (k_left - k_right).clamp(min=1e-8) mask_almost_the_same = abs(k_left - k_right) < 1e-5 d_return[mask_almost_the_same] = d_left[mask_almost_the_same] y_d = k_left * (d_return - d_left) + y_left return d_return, y_d def bound_relax_impl_same_slope(self, x, func, dfunc): lower, upper = x.lower, x.upper y_l, y_u = func(lower), func(upper) # k_direct is the slope of the line directly connect (lower, func(lower)), (upper, func(upper)). k_direct = k = (y_u - y_l) / (upper - lower).clamp(min=1e-8) mask_almost_the_same = abs(upper - lower) < 1e-4 k_direct[mask_almost_the_same] = dfunc(lower)[mask_almost_the_same] mask_direct_lower = k_direct <= dfunc(lower) mask_direct_upper = k_direct <= dfunc(upper) # We now find the tangent line with the same slope of k_direct # In the case of "mask_direct_lower(or upper)", there should be only one possible tangent point # at which we obtain the same slope within the interval [lower, upper] d, y_d = self.retrieve_d_from_k(k_direct, func) d[lower + upper < 0] *= -1 # This is the case "direct upper" y_d[lower + upper < 0] = 2 * func(torch.tensor(0)) - y_d[lower + upper < 0] d_clamped = torch.clamp(d, min=lower, max=upper) y_d[d_clamped != d] = func(d_clamped[d_clamped != d]) self.add_linear_relaxation( mask=mask_direct_lower, type='lower', k=k_direct, x0=lower, y0=y_l ) self.add_linear_relaxation( mask=mask_direct_lower, type='upper', k=k_direct, x0=d_clamped, y0=y_d ) self.add_linear_relaxation( mask=mask_direct_upper, type='upper', k=k_direct, x0=upper, y0=y_u ) self.add_linear_relaxation( mask=mask_direct_upper, type='lower', k=k_direct, x0=d_clamped, y0=y_d ) # Now we turn to the case where no direct line can be used d_lower, d_upper = self.generate_d_lower_upper(lower, upper) mask_both = torch.logical_not(mask_direct_upper + mask_direct_lower) # To make sure upper and lower bounds have the same slope, # we need the two tangents to be symmetrical d_same_slope = torch.max(torch.abs(d_lower), torch.abs(d_upper)) k = dfunc(d_same_slope) y_d_same_slope = func(d_same_slope) y_d_same_slope_opposite = 2*func(torch.tensor(0)) - y_d_same_slope self.add_linear_relaxation( mask=mask_both, type='upper', k=k, x0=d_same_slope, y0=y_d_same_slope ) self.add_linear_relaxation( mask=mask_both, type='lower', k=k, x0=-d_same_slope, y0=y_d_same_slope_opposite ) def bound_relax_impl(self, x, func, dfunc): lower, upper = x.lower, x.upper y_l, y_u = func(lower), func(upper) # k_direct is the slope of the line directly connecting the two endpoints of the function inside the interval: # (lower, func(lower)) and (upper, func(upper)). k_direct = k = (y_u - y_l) / (upper - lower).clamp(min=1e-8) # Fixed bounds that cannot be optimized. # self.mask_neg are the masks for neurons with upper bound <= 0, i.e., the whole input interval lies below 0. # self.mask_pos are the masks for neurons with lower bound >= 0, i.e., the whole input interval lies above 0. # For negative intervals, we can derive the linear upper bound by connecting the two endpoints, # i.e., starting from (lower, func(lower)) and setting the slope to k_direct. self.add_linear_relaxation( mask=self.mask_neg, type='upper', k=k_direct, x0=lower, y0=y_l) # For positive intervals, we connect the two endpoints to find the linear lower bound instead. self.add_linear_relaxation( mask=self.mask_pos, type='lower', k=k_direct, x0=lower, y0=y_l) # Store the x-coordinates of the points of tangencies. # d_lower is the closest value to upper such that the tangent line at (d_lower, func(d_lower)) still lower- # bounds the function in interval (lower, upper). # d_upper is the closest value to lower such that the tangent line at (d_lower, func(d_lower)) still upper- # bounds the function in interval (lower, upper). # d_lower and d_upper can be regarded as the default points of tangencies to draw linear bounds through. d_lower, d_upper = self.generate_d_lower_upper(lower, upper) # self.mask_both is the masks for neurons where lower < 0 < upper, i.e., the input interval contains 0. # mask_direct_lower is the masks for neurons whose input interval contains zero and whose linear lower bound can # be derived by connecting the two endpoints. # mask_direct_upper is the masks for neurons whose input interval contains zero and whose linear upper bound can # be derived by connecting the two endpoints. if self.convex_concave is None: mask_direct_lower = k_direct < dfunc(lower) mask_direct_upper = k_direct < dfunc(upper) else: mask_direct_lower = torch.where( self.convex_concave, k_direct < dfunc(lower), k_direct > dfunc(upper)) mask_direct_upper = torch.where( self.convex_concave, k_direct < dfunc(upper), k_direct > dfunc(lower)) mask_direct_lower = torch.logical_and(mask_direct_lower, self.mask_both) mask_direct_upper = torch.logical_and(mask_direct_upper, self.mask_both) if self.opt_stage in ['opt', 'reuse']: if not hasattr(self, 'alpha'): # Raise an error if alpha is not created. self._no_bound_parameters() ns = self._start # Clamping is done here rather than after `opt.step()` call # because it depends on pre-activation bounds self.alpha[ns].data[0:2] = torch.max( torch.min(self.alpha[ns][0:2], upper), lower) self.alpha[ns].data[2:4] = torch.max( torch.min(self.alpha[ns][2:4], upper), lower) if self.convex_concave is None: self.alpha[ns].data[4:6] = torch.min( self.alpha[ns][4:6], d_lower) self.alpha[ns].data[6:8] = torch.max( self.alpha[ns][6:8], d_upper) else: self.alpha[ns].data[4:6, :] = torch.where( self.convex_concave, torch.max(lower, torch.min(self.alpha[ns][4:6, :], d_lower)), torch.min(upper, torch.max(self.alpha[ns][4:6, :], d_lower)) ) self.alpha[ns].data[6:8, :] = torch.where( self.convex_concave, torch.min(upper, torch.max(self.alpha[ns][6:8, :], d_upper)), torch.max(lower, torch.min(self.alpha[ns][6:8, :], d_upper)) ) # shape [2, out_c, n, c, h, w]. tp_pos = self.alpha[ns][0:2] # For upper bound relaxation tp_neg = self.alpha[ns][2:4] # For lower bound relaxation tp_both_lower = self.alpha[ns][4:6] tp_both_upper = self.alpha[ns][6:8] # No need to use tangent line, when the tangent point is at the left # side of the preactivation lower bound. Simply connect the two sides. self.add_linear_relaxation( mask=mask_direct_lower, type='lower', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_xor(self.mask_both, mask_direct_lower), type='lower', k=dfunc(tp_both_lower), x0=tp_both_lower, y0=func(tp_both_lower)) self.add_linear_relaxation( mask=mask_direct_upper, type='upper', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_xor(self.mask_both, mask_direct_upper), type='upper', k=dfunc(tp_both_upper), x0=tp_both_upper, y0=func(tp_both_upper)) self.add_linear_relaxation( mask=self.mask_neg, type='lower', k=dfunc(tp_neg), x0=tp_neg, y0=func(tp_neg)) self.add_linear_relaxation( mask=self.mask_pos, type='upper', k=dfunc(tp_pos), x0=tp_pos, y0=func(tp_pos)) else: if self.opt_stage == 'init': # Initialize optimizable slope. tp_both_lower_init = d_lower.detach() tp_both_upper_init = d_upper.detach() if self.loose_threshold is not None: # We will modify d_lower and d_upper inplace. # So make a copy for these two. tp_both_lower_init = tp_both_lower_init.clone() tp_both_upper_init = tp_both_upper_init.clone() # A different initialization if the pre-activation bounds # are too loose loose = torch.logical_or(lower < -self.loose_threshold, upper > self.loose_threshold) d_lower[loose] = lower[loose] d_upper[loose] = upper[loose] ns = self._start self.tp_both_lower_init[ns] = tp_both_lower_init self.tp_both_upper_init[ns] = tp_both_upper_init # Not optimized (vanilla CROWN bound). # Use the middle point slope as the lower/upper bound. Not optimized. m = (lower + upper) / 2 y_m = func(m) k_m = dfunc(m) # Lower bound is the middle point slope for the case input upper bound <= 0. # Note that the upper bound in this case is the direct line between (lower, func(lower)) and (upper, func(upper)). self.add_linear_relaxation(mask=self.mask_neg, type='lower', k=k_m, x0=m, y0=y_m) # Upper bound is the middle point slope for the case input lower bound >= 0. # Note that the lower bound in this case is the direct line between (lower, func(lower)) and (upper, func(upper)). self.add_linear_relaxation(mask=self.mask_pos, type='upper', k=k_m, x0=m, y0=y_m) # Now handle the case where input lower bound <=0 and upper bound >= 0. # A tangent line starting at d_lower is guaranteed to be a lower bound given the input upper bound. k = dfunc(d_lower) # Another possibility is to use the direct line as the lower bound, when this direct line does not intersect with f. # This is only valid when the slope at the input lower bound has a slope greater than the direct line. self.add_linear_relaxation(mask=mask_direct_lower, type='lower', k=k_direct, x0=lower, y0=y_l) # Otherwise (i.e., when the input interval cross zero and mask_direct_lower is not true), # we do not use the direct line, we use the d_lower slope. self.add_linear_relaxation( mask=torch.logical_xor(self.mask_both, mask_direct_lower), type='lower', k=k, x0=d_lower, y0=func(d_lower)) # Do the same for the upper bound side when input lower bound <=0 and upper bound >= 0. k = dfunc(d_upper) self.add_linear_relaxation( mask=mask_direct_upper, type='upper', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_xor(self.mask_both, mask_direct_upper), type='upper', k=k, x0=d_upper, y0=func(d_upper)) if self.disable_smoothness_enhancement: return # Partially modify the linear bound computation for intervals that contains 0 so that the linear bound # changes smoothly w.r.t to the input bounds. For example, when we fix the input lower bound and drag the # input upper bound, we do not expect the linear bound to change abruptly at any point. # Therefore, under certain conditions, we do not use the above heuristics. Instead, we draw a tangent line # through the middle point (m, func(m)) where m = (lower + upper) / 2 and use it as a linear bound. if self.inverse_s_shape: # When the function has an inverse s-shape (such as pow3), we switch to drawing a tangent line through # the middle point as the lower bound when the default point of tangency is on the left of the middle # point. Otherwise, the lower bound will be too loose on the side of the input upper bound. The change # will make the bound on the other side a little bit looser as a tradeoff for overall tightness. self.add_linear_relaxation( mask=torch.logical_and(self.mask_both, d_lower < m), type='lower', k=k_m, x0=m, y0=y_m) # We make a similar change to the linear upper bound when the default point of tangency is on # the right of the middle point. self.add_linear_relaxation( mask=torch.logical_and(self.mask_both, d_upper >= m), type='upper', k=k_m, x0=m, y0=y_m) elif self.sigmoid_like_mask is not None: # self.sigmoid_like_mask is originally defined for periodic functions like sin and cos. It marks # intervals on the s-shaped or flipped-s-shaped parts of the function. Whether the part is flipped-s- # shaped is determined by comparing func(lower) and func(upper). Currently, some overall s-shaped # function, such as tanh and sigmoid, also has this mask. In the future, we will make it default for # both completely and partially s-shaped functions to reduce branching in the code. y_l = func(lower) y_u = func(upper) # If the input interval is on the s-shaped part of the function, we switch to drawing a tangent line # through the middle point as the lower bound when the default point of tangency is on the right of the # middle point. self.add_linear_relaxation( mask=torch.logical_and(torch.logical_and(self.sigmoid_like_mask, y_l < y_u), d_lower >= m), type='lower', k=k_m, x0=m, y0=y_m) # We switch to drawing a tangent line through the middle point as the upper bound when the default point # of tangency is on the left of the middle point. self.add_linear_relaxation( mask=torch.logical_and(torch.logical_and(self.sigmoid_like_mask, y_l < y_u), d_upper < m), type='upper', k=k_m, x0=m, y0=y_m) # If the input interval is on the flipped-s-shaped part of the function, we flip the condition as well # as whether we change the lower or upper bound. self.add_linear_relaxation( mask=torch.logical_and(torch.logical_and(self.sigmoid_like_mask, y_l >= y_u), d_lower < m), type='lower', k=k_m, x0=m, y0=y_m) self.add_linear_relaxation( mask=torch.logical_and(torch.logical_and(self.sigmoid_like_mask, y_l >= y_u), d_upper >= m), type='upper', k=k_m, x0=m, y0=y_m) else: # Handle simple cases where the function has the most common s shape. Now it serves as a safeguard # against any child operator class whose self.sigmoid_like_mask is uninitialized. Here self.mask_both is # equivalent to self.sigmoid_like_mask & (y_l < y_u) in the case above. self.add_linear_relaxation( mask=torch.logical_and(self.mask_both, d_lower >= m), type='lower', k=k_m, x0=m, y0=y_m) self.add_linear_relaxation( mask=torch.logical_and(self.mask_both, d_upper < m), type='upper', k=k_m, x0=m, y0=y_m) def bound_relax_branch(self, lb, ub): # For functions that are only partially s-shaped, such as sin and cos, the non-s-shaped intervals are re-bounded # here. This method returns the linear bound coefficients (lower_slope, lower_bias, upper_slope, upper_bias) of # the non-s-shaped intervals. For globally s-shaped functions like tanh and sigmoid, the method returns 0s. return 0., 0., 0., 0. def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) lb = x.lower ub = x.upper self.branch_input_domain(lb, ub) if self.activation_bound_option == 'same-slope': self.bound_relax_impl_same_slope(x, self.act_func, self.d_act_func) else: self.bound_relax_impl(x, self.act_func, self.d_act_func) lower_slope, lower_bias, upper_slope, upper_bias = self.bound_relax_branch(lb, ub) self.lw = self.lw * self.sigmoid_like_mask + self.branch_mask * lower_slope self.lb = self.lb * self.sigmoid_like_mask + self.branch_mask * lower_bias self.uw = self.uw * self.sigmoid_like_mask + self.branch_mask * upper_slope self.ub = self.ub * self.sigmoid_like_mask + self.branch_mask * upper_bias def get_split_mask(self, lower, upper, input_index): assert input_index == 0 return torch.logical_and( upper - lower >= self.split_min_gap, torch.logical_or(upper >= self.split_range[0], lower <= self.split_range[1]) ) class BoundPow(BoundSShaped): def __init__(self, attr=None, inputs=None, output_index=0, options=None): self.exponent = 2 super().__init__(attr, inputs, output_index, options) self.ibp_intermediate = False self.has_constraint = True def act_func(x): return torch.pow(x, self.exponent) self.act_func = act_func def d_act_func(x): return self.exponent * torch.pow(x, self.exponent - 1) self.d_act_func = d_act_func def d2_act_func(x): return self.exponent * (self.exponent - 1) * torch.pow(x, self.exponent - 2) self.d2_act_func = d2_act_func def generate_d_lower_upper(self, lower, upper): if self.exponent % 2: # Indices of neurons with input upper bound >=0, # whose optimal slope to lower bound the function was pre-computed. # Note that for neurons with also input lower bound >=0, they will be masked later. d_upper = self.retrieve_from_precompute(self.d_upper, upper, lower) # Indices of neurons with lower bound <=0, # whose optimal slope to upper bound the function was pre-computed. d_lower = self.retrieve_from_precompute(self.d_lower, -lower, upper) return d_lower, d_upper else: return torch.zeros_like(upper), torch.zeros_like(upper) def branch_input_domain(self, lb, ub): lower = lb upper = ub num_inflection = torch.zeros_like(lower) inflection_mat = lower for inflection in self.inflections: num_inflection += torch.logical_and( lower <= inflection, upper >= inflection) inflection_mat = torch.where( torch.logical_and(lower <= inflection, upper >= inflection), torch.tensor(inflection, device=lb.device), inflection_mat) inflection_mask = num_inflection <= 1. extreme_mask = torch.ones_like(lower) for extreme in self.extremes: extreme_mask *= torch.logical_or(lower >= extreme, upper <= extreme) self.sigmoid_like_mask = torch.logical_and(inflection_mask, extreme_mask) self.branch_mask = torch.logical_xor(torch.ones_like(lower), self.sigmoid_like_mask) self.inflection_mat = torch.where(self.sigmoid_like_mask, inflection_mat, lower) self.mask_neg = torch.logical_and((self.d2_act_func(lower) >= 0), torch.logical_and((self.d2_act_func(upper) >= 0), self.sigmoid_like_mask)) self.mask_pos = torch.logical_and((self.d2_act_func(lower) < 0), torch.logical_and((self.d2_act_func(upper) < 0), self.sigmoid_like_mask)) self.mask_both = torch.logical_xor(self.sigmoid_like_mask, torch.logical_or(self.mask_neg, self.mask_pos)) self.convex_concave = self.d2_act_func(lower) >= 0 @torch.no_grad() def precompute_relaxation(self, func, dfunc, x_limit = 500): """ This function precomputes the tangent lines that will be used as lower/upper bounds for S-shapes functions. """ self.x_limit = x_limit self.num_points_pre = int(self.x_limit / self.step_pre) max_iter = 100 def check_lower(upper, d): """Given two points upper, d (d <= upper), check if the slope at d will be less than f(upper) at upper.""" k = dfunc(d) # Return True if the slope is a lower bound. return k * (upper - d) + func(d) <= func(upper) def check_upper(lower, d): """Given two points lower, d (d >= lower), check if the slope at d will be greater than f(lower) at lower.""" k = dfunc(d) # Return True if the slope is a upper bound. return k * (lower - d) + func(d) >= func(lower) # Given an upper bound point (>=0), find a line that is guaranteed to # be a lower bound of this function. upper = self.step_pre * torch.arange( 0, self.num_points_pre + 5, device=self.device) r = torch.zeros_like(upper) # Initial guess, the tangent line is at -1. l = -torch.ones_like(upper) while True: # Check if the tangent line at the guessed point is an lower bound at f(upper). checked = check_upper(upper, l).int() # If the initial guess is not smaller enough, then double it (-2, -4, etc). l = checked * l + (1 - checked) * (l * 2) if checked.sum() == l.numel(): break # Now we have starting point at l, its tangent line is guaranteed to # be an lower bound at f(upper). # We want to further tighten this bound by moving it closer to 0. for _ in range(max_iter): # Binary search. m = (l + r) / 2 checked = check_upper(upper, m).int() l = checked * m + (1 - checked) * l r = checked * r + (1 - checked) * m # At upper, a line with slope l is guaranteed to lower bound the function. self.d_upper = l.clone() # Do the same again: # Given an lower bound point (<=0), find a line that is guaranteed to # be an upper bound of this function. lower = -self.step_pre * torch.arange( 0, self.num_points_pre + 5, device=self.device) l = torch.zeros_like(upper) r = torch.ones_like(upper) while True: checked = check_lower(lower, r).int() r = checked * r + (1 - checked) * (r * 2) if checked.sum() == l.numel(): break for _ in range(max_iter): m = (l + r) / 2 checked = check_lower(lower, m).int() l = (1 - checked) * m + checked * l r = (1 - checked) * r + checked * m self.d_lower = r.clone() def forward(self, x, y): return torch.pow(x, y) def bound_backward(self, last_lA, last_uA, x, y, start_node=None, start_shape=None, **kwargs): assert not self.is_input_perturbed(1) self._start = start_node.name if start_node is not None else None y = y.value if y == int(y): x.upper = torch.max(x.upper, x.lower + 1e-8) self.exponent = int(y) assert self.exponent >= 2 if self.exponent % 2: self.precompute_relaxation(self.act_func, self.d_act_func) As, lbias, ubias = super().bound_backward( last_lA, last_uA, x, start_node, start_shape, **kwargs) return [As[0], (None, None)], lbias, ubias else: raise NotImplementedError('Exponent is not supported yet') def bound_forward(self, dim_in, x, y): assert y.lower == y.upper == int(y.lower) y = y.lower x.upper = torch.max(x.upper, x.lower + 1e-8) self.exponent = int(y) assert self.exponent >= 2 if self.exponent % 2: self.precompute_relaxation(self.act_func, self.d_act_func) return super().bound_forward(dim_in, x) def bound_relax_branch(self, lb, ub): if self.opt_stage in ['opt', 'reuse']: if not hasattr(self, 'alpha'): # Raise an error if alpha is not created. self._no_bound_parameters() ns = self._start self.alpha[ns].data[8:10] = torch.max( torch.min(self.alpha[ns][8:10], ub), lb) lb_point = self.alpha[ns][8:10] lower_slope = self.d_act_func(lb_point) lower_bias = self.act_func(lb_point) - lower_slope * lb_point else: lower_slope = 0 lower_bias = 0 upper_slope = (self.act_func(ub) - self.act_func(lb)) / (ub - lb).clamp(min=1e-8) upper_bias = self.act_func(ub) - ub * upper_slope return lower_slope, lower_bias, upper_slope, upper_bias def bound_relax(self, x, init=False, dim_opt=None): # For powers with odd exponents, such as x^3, the overall shape is inverse S-like. self.inverse_s_shape = self.exponent % 2 == 1 if self.exponent % 2: self.inflections = [0.] else: self.extremes = [0.] super().bound_relax(x, init, dim_opt) def interval_propagate(self, *v): assert not self.is_input_perturbed(1) exp = v[1][0] assert exp == int(exp) exp = int(exp) pl, pu = torch.pow(v[0][0], exp), torch.pow(v[0][1], exp) if exp % 2 == 1: return pl, pu else: pl, pu = torch.min(pl, pu), torch.max(pl, pu) mask = 1 - ((v[0][0] < 0) * (v[0][1] > 0)).to(pl.dtype) return pl * mask, pu def clamp_interim_bounds(self): if self.exponent % 2 == 0: self.cstr_lower = self.lower.clamp(min=0) self.cstr_upper = self.upper.clamp(min=0) self.cstr_interval = (self.cstr_lower, self.cstr_upper) def dtanh(x): return 1 - torch.tanh(x).pow(2) def dsigmoid(x): return torch.sigmoid(x) * (1 - torch.sigmoid(x)) def darctan(x): return (x.square() + 1.).reciprocal() def d2tanh(x): return -2 * torch.tanh(x) * (1 - torch.tanh(x).pow(2)) def d2sigmoid(x): return dsigmoid(x) * (1 - 2 * torch.sigmoid(x)) class BoundTanh(BoundSShaped): """ BoundTanh is based on the S-shaped BoundSShaped. In the meantime, it works as the base class for other globally S-shaped functions such as Sigmoid and Atan. """ def __init__(self, attr=None, inputs=None, output_index=0, options=None, activation=('tanh', torch.tanh, dtanh), precompute=True): super().__init__(attr, inputs, output_index, options, activation, precompute) def _init_opt_parameters_impl(self, size_spec, name_start): """Implementation of init_opt_parameters for each start_node.""" return super()._init_opt_parameters_impl(size_spec, name_start, num_params=8) def build_gradient_node(self, grad_upstream): node_grad = TanhGrad() grad_input = (grad_upstream, self.inputs[0].forward_value) grad_extra_nodes = [self.inputs[0]] return [(node_grad, grad_input, grad_extra_nodes)] class TanhGradOp(Function): @staticmethod def symbolic(_, preact): return _.op('grad::Tanh', preact).setType(preact.type()) @staticmethod def forward(ctx, preact): return 1 - torch.tanh(preact)**2 class TanhGrad(Module): def forward(self, g, preact): return g * TanhGradOp.apply(preact).unsqueeze(1) class BoundTanhGrad(BoundOptimizableActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None, activation=('tanh', dtanh, d2tanh), precompute=True): super().__init__(attr, inputs, output_index, options) self.requires_input_bounds = [0] # The inflection point is where d2f/dx2 = 0. self.inflection_point = 0.6585026 self.func = activation[1] self.dfunc = activation[2] if precompute: self.precompute_relaxation() def forward(self, x): return self.func(x) def interval_propagate(self, *v): lower, upper = v[0] f_lower = self.func(lower) f_upper = self.func(upper) next_lower = torch.min(f_lower, f_upper) next_upper = torch.max(f_lower, f_upper) mask_both = torch.logical_and(lower < 0, upper > 0) next_upper[mask_both] = self.func(torch.tensor(0)) return next_lower, next_upper def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) return self.bound_relax_impl(x) def precompute_relaxation(self, x_limit=500): """ This function precomputes the tangent lines that will be used as the lower/upper bounds for bell-shaped functions. Three tensors are precomputed: - self.precompute_x: The x values of the upper preactivation bound. - self.d_lower: The tangent points of the lower bound. - self.d_upper: The tangent points of the upper bound. """ self.x_limit = x_limit self.step_pre = 0.01 self.num_points_pre = int(self.x_limit / self.step_pre) max_iter = 100 func, dfunc = self.func, self.dfunc logger.debug('Precomputing relaxation for %s (pre-activation limit: %f)', self.__class__.__name__, x_limit) def check_lower(upper, d): """Given two points upper, d (d <= upper), check if the slope at d will be less than f(upper) at upper.""" k = dfunc(d) # Return True if the slope is a lower bound. return k * (upper - d) + func(d) <= func(upper) def check_upper(lower, d): """Given two points lower, d (d <= lower), check if the slope at d will be greater than f(lower) at lower.""" k = dfunc(d) # Return True if the slope is a upper bound. return k * (lower - d) + func(d) >= func(lower) self.precompute_x = torch.arange(-self.x_limit, self.x_limit + self.step_pre, self.step_pre, device=self.device) self.d_lower = torch.zeros_like(self.precompute_x) self.d_upper = torch.zeros_like(self.precompute_x) # upper point that needs lower precomputed tangent line mask_need_d_lower = self.precompute_x >= -self.inflection_point upper = self.precompute_x[mask_need_d_lower] # 1. Initial guess, the tangent is at -2*inflection_point (should be between (-inf, -inflection_point)) r = -self.inflection_point * torch.ones_like(upper) l = -2 * self.inflection_point * torch.ones_like(upper) while True: # Check if the tangent line at the guessed point is an lower bound at f(upper). checked = check_lower(upper, l).int() # If the initial guess is not smaller enough, then double it (-2, -4, etc). l = checked * l + (1 - checked) * (l * 2) if checked.sum() == l.numel(): break # Now we have starting point at l, its tangent line is guaranteed to be an lower bound at f(upper). # We want to further tighten this bound by moving it closer to upper. for _ in range(max_iter): # Binary search. m = (l + r) / 2 checked = check_lower(upper, m).int() l = checked * m + (1 - checked) * l r = checked * r + (1 - checked) * m # At upper, a line with slope l is guaranteed to lower bound the function. self.d_lower[mask_need_d_lower] = l.clone() # upper point that needs upper precomputed tangent line mask_need_upper_d = self.precompute_x >= self.inflection_point upper = self.precompute_x[mask_need_upper_d] # 1. Initial guess, the tangent is at inflection_point/2 (should be between (0, inflection_point)) r = self.inflection_point * torch.ones_like(upper) l = self.inflection_point / 2 * torch.ones_like(upper) while True: # Check if the tangent line at the guessed point is an upper bound at f(upper). checked = check_upper(upper, l).int() # If the initial guess is not smaller enough, then reduce it. l = checked * l + (1 - checked) * (l / 2) if checked.sum() == l.numel(): break # Now we have starting point at l, its tangent line is guaranteed to be an upper bound at f(upper). # We want to further tighten this bound by moving it closer to upper. for _ in range(max_iter): # Binary search. m = (l + r) / 2 checked = check_upper(upper, m).int() l = checked * m + (1 - checked) * l r = checked * r + (1 - checked) * m # At upper, a line with slope l is guaranteed to upper bound the function. self.d_upper[mask_need_upper_d] = l.clone() def retrieve_from_precompute(self, x, flip=False): if not flip: if x.max() > self.x_limit: warnings.warn(f'Pre-activation bounds are too loose for {self}') # Take the left endpoint of the interval x_indices = torch.searchsorted(self.precompute_x, x, right=True) - 1 return self.d_lower[x_indices], self.d_upper[x_indices] else: if x.min() < -self.x_limit: warnings.warn(f'Pre-activation bounds are too loose for {self}') # Take the right endpoint of the interval x_indices = torch.searchsorted(self.precompute_x, -x, right=False) return -self.d_lower[x_indices], -self.d_upper[x_indices] def bound_relax_impl(self, x): lower, upper = x.lower, x.upper func, dfunc = self.func, self.dfunc y_l, y_u = func(lower), func(upper) # k_direct is the slope of the line directly connect (lower, func(lower)), (upper, func(upper)). k_direct = (y_u - y_l) / (upper - lower).clamp(min=1e-8) # The tangent line at the midpoint can be a good approximation midpoint = (lower + upper) / 2 k_midpoint = dfunc(midpoint) y_midpoint = func(midpoint) # If -inflection_point <= lower < upper <= inflection_point, # we call it "completely concave" region. mask_completely_concave = torch.logical_and( lower >= -self.inflection_point, upper <= self.inflection_point ) self.add_linear_relaxation( mask=mask_completely_concave, type='lower', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=mask_completely_concave, type='upper', k=k_midpoint, x0=midpoint, y0=y_midpoint) # From now on, we assume at least one of the bounds is outside the completely concave region. # Without loss of generality, we assume upper > inflection_point (indicated by mask_right). mask_right = lower + upper >= 0 dl, du = self.retrieve_from_precompute(upper, flip=False) dl_, du_ = self.retrieve_from_precompute(lower, flip=True) # Case 1: Similar to a convex function mask_case1 = torch.logical_or( torch.logical_and(mask_right, lower >= self.inflection_point), torch.logical_and(torch.logical_not(mask_right), upper <= -self.inflection_point) ) self.add_linear_relaxation( mask=mask_case1, type='upper', k=k_direct, x0=lower, y0=y_l) self.add_linear_relaxation( mask=mask_case1, type='lower', k=k_midpoint, x0=midpoint, y0=y_midpoint) # Case 2: Similar to a S-shaped function mask_case2_right = torch.logical_and(mask_right, torch.logical_and( upper > self.inflection_point, lower < self.inflection_point)) # The upper tangent point is lineraly interpolated between 0 and du, # given lower ranging between -upper and du. d_mask_case2_right_upper = du * (lower + upper) / (du + upper) k_mask_case2_right_upper = dfunc(d_mask_case2_right_upper) y_mask_case2_right_upper = func(d_mask_case2_right_upper) self.add_linear_relaxation( mask=mask_case2_right, type='upper', k=k_mask_case2_right_upper, x0=d_mask_case2_right_upper, y0=y_mask_case2_right_upper) # The lower tangent point is found based on lower. d_mask_case2_right_lower = (dl_ + upper) / 2 k_mask_case2_right_lower = dfunc(d_mask_case2_right_lower) y_mask_case2_right_lower = func(d_mask_case2_right_lower) self.add_linear_relaxation( mask=torch.logical_and(mask_case2_right, dl_ < upper), type='lower', k=k_mask_case2_right_lower, x0=d_mask_case2_right_lower, y0=y_mask_case2_right_lower) self.add_linear_relaxation( mask=torch.logical_and(mask_case2_right, dl_ >= upper), type='lower', k=k_direct, x0=lower, y0=y_l) mask_case2_left = torch.logical_and(torch.logical_not(mask_right), torch.logical_and( lower < -self.inflection_point, upper > -self.inflection_point)) # The upper tangent point is lineraly interpolated between du_ and 0, # given upper ranging between du_ and -lower. d_mask_case2_left_upper = du_ * (upper + lower) / (du_ + lower) k_mask_case2_left_upper = dfunc(d_mask_case2_left_upper) y_mask_case2_left_upper = func(d_mask_case2_left_upper) self.add_linear_relaxation( mask=mask_case2_left, type='upper', k=k_mask_case2_left_upper, x0=d_mask_case2_left_upper, y0=y_mask_case2_left_upper) # The lower tangent point is found based on upper. d_mask_case2_left_lower = (dl + lower) / 2 k_mask_case2_left_lower = dfunc(d_mask_case2_left_lower) y_mask_case2_left_lower = func(d_mask_case2_left_lower) self.add_linear_relaxation( mask=torch.logical_and(mask_case2_left, dl > lower), type='lower', k=k_mask_case2_left_lower, x0=d_mask_case2_left_lower, y0=y_mask_case2_left_lower) self.add_linear_relaxation( mask=torch.logical_and(mask_case2_left, dl <= lower), type='lower', k=k_direct, x0=upper, y0=y_u) # If the lower and upper bounds are too close, we just use IBP bounds to avoid numerical issues. mask_very_close = upper - lower < 1e-6 if mask_very_close.any(): self.add_linear_relaxation( mask=torch.logical_and(mask_very_close, self.mask_neg), type='lower', k=0, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_and(mask_very_close, self.mask_neg), type='upper', k=0, x0=upper, y0=y_u) self.add_linear_relaxation( mask=torch.logical_and(mask_very_close, self.mask_pos), type='lower', k=0, x0=upper, y0=y_u) self.add_linear_relaxation( mask=torch.logical_and(mask_very_close, self.mask_pos), type='upper', k=0, x0=lower, y0=y_l) self.add_linear_relaxation( mask=torch.logical_and(mask_very_close, self.mask_both), type='lower', k=0, x0=lower, y0=torch.min(y_l, y_u)) self.add_linear_relaxation( mask=torch.logical_and(mask_very_close, self.mask_both), type='upper', k=0, x0=upper, y0=torch.full_like(y_l, func(torch.tensor(0)))) class BoundSigmoid(BoundTanh): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options, activation=('sigmoid', torch.sigmoid, dsigmoid)) def build_gradient_node(self, grad_upstream): node_grad = SigmoidGrad() grad_input = (grad_upstream, self.inputs[0].forward_value) grad_extra_nodes = [self.inputs[0]] return [(node_grad, grad_input, grad_extra_nodes)] class SigmoidGradOp(Function): @staticmethod def symbolic(_, preact): return _.op('grad::Sigmoid', preact).setType(preact.type()) @staticmethod def forward(ctx, preact): sigmoid_x = torch.sigmoid(preact) return sigmoid_x * (1 - sigmoid_x) class SigmoidGrad(Module): def forward(self, g, preact): return g * SigmoidGradOp.apply(preact).unsqueeze(1) class BoundSigmoidGrad(BoundTanhGrad): def __init__(self, attr=None, inputs=None, output_index=0, options=None, activation=('sigmoid', dsigmoid, d2sigmoid), precompute=True): super().__init__(attr, inputs, output_index, options, activation, precompute=False) self.inflection_point = 1.3169614 if precompute: self.precompute_relaxation() class BoundAtan(BoundTanh): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options, activation=('arctan', torch.arctan, darctan)) self.split_range = (-torch.inf, torch.inf) def build_gradient_node(self, grad_upstream): node_grad = AtanGrad() grad_input = (grad_upstream, self.inputs[0].forward_value) grad_extra_nodes = [self.inputs[0]] return [(node_grad, grad_input, grad_extra_nodes)] class AtanGrad(Module): def forward(self, g, preact): # arctan'(x) = 1 / (1 + x^2) return g / (1 + preact.square()).unsqueeze(1) class BoundTan(BoundAtan): """ The implementation of BoundTan is based on the S-shaped BoundAtan. We use the bounds from its inverse function and directly convert the bounds of the inverse function to bounds of the original function. This trick allows us to quickly implement bounds on inverse functions. """ def forward(self, x): return torch.tan(x) def _check_bounds(self, lower, upper): # Lower and upper bounds must be within the same [-½π, ½π] region. lower_periods = torch.floor((lower + 0.5 * torch.pi) / torch.pi) upper_periods = torch.floor((upper + 0.5 * torch.pi) / torch.pi) if not torch.allclose(lower_periods, upper_periods): print('Tan preactivation lower bounds:\n', lower) print('Tan preactivation upper bounds:\n', upper) raise ValueError("BoundTan received pre-activation bounds that produce infinity. " "The preactivation bounds are too loose. Try to reduce perturbation region.") # Return the period number for each neuron. # Period is 0 => bounds are within [-½π, ½π], # Period is 1 => bounds are within [-½π + π, ½π + π] # Period is -1 => bounds are within [-½π - π, ½π - π] return lower_periods def _init_masks(self, x): # The masks now must consider the periodicity. lower = torch.remainder(x.lower + 0.5 * torch.pi, torch.pi) - 0.5 * torch.pi upper = torch.remainder(x.upper + 0.5 * torch.pi, torch.pi) - 0.5 * torch.pi self.mask_pos = lower >= 0 self.mask_neg = upper <= 0 self.mask_both = torch.logical_not(torch.logical_or(self.mask_pos, self.mask_neg)) def interval_propagate(self, *v): # We need to check if the input lower and upper bounds are within the same period. # Otherwise the bounds become infinity. concrete_lower, concrete_upper = v[0][0], v[0][1] self._check_bounds(concrete_lower, concrete_upper) return super().interval_propagate(*v) def bound_relax(self, x, init=False, dim_opt=None): if init: self.init_linear_relaxation(x, dim_opt) periods = self._check_bounds(x.lower, x.upper) periods = torch.pi * periods # Create a fake x with inversed lower and upper. inverse_x = lambda: None inverse_x.lower = torch.tan(x.lower) inverse_x.upper = torch.tan(x.upper) super().bound_relax(inverse_x, init=init, dim_opt=dim_opt) # Lower slope, lower bias, upper slope and upper bias are saved to # self.lw, self.lb, self.uw, self.ub. We need to reverse them. # E.g., y = self.lw * x + self.lb, now becomes x = 1./self.lw * y - self.lb / self.lw # Additionally, we need to add the missing ½π periods. new_upper_slope = 1. / self.lw new_upper_bias = - self.lb / self.lw - periods / self.lw new_lower_slope = 1. / self.uw new_lower_bias = - self.ub / self.uw - periods / self.uw # NaN can happen if lw=0 or uw=0 when the pre-activation bounds are too close # Replace the bounds with interval bounds. if (self.lw == 0).any(): mask = self.lw == 0 new_upper_slope[mask] = 0 new_upper_bias[mask] = inverse_x.upper[mask] if (self.uw == 0).any(): mask = self.uw == 0 new_lower_slope[mask] = 0 new_lower_bias[mask] = inverse_x.lower[mask] self.lw = new_lower_slope self.lb = new_lower_bias self.uw = new_upper_slope self.ub = new_upper_bias ================================================ FILE: auto_LiRPA/operators/shape.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Shape operators """ from .base import * class BoundShape(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.never_perturbed = True @staticmethod def shape(x): return x.shape if isinstance(x, Tensor) else torch.tensor(x).shape def forward(self, x): self.from_input = False return BoundShape.shape(x) def bound_forward(self, dim_in, x): return self.forward_value def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): if not isinstance(v[0], Tensor): # e.g., v[0] input shape (8, 7, 7) => output its shape (1, 8, 7, 7) gvars_array = np.array(v[0]) self.solver_vars = torch.tensor(np.expand_dims(gvars_array, axis=0).shape).long() else: self.solver_vars = torch.tensor(self.forward(v[0])).long() ================================================ FILE: auto_LiRPA/operators/slice_concat.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Shape operators """ from torch.nn import Module from torch.autograd import Function from .base import * from ..patches import Patches from .constant import BoundConstant class BoundConcat(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.axis = attr['axis'] self.IBP_rets = None self.ibp_intermediate = True def forward(self, *x): # x is a list of tensors x = [(item if isinstance(item, Tensor) else torch.tensor(item)) for item in x] self.input_size = [item.shape[self.axis] for item in x] self.axis = self.make_axis_non_negative(self.axis) return torch.cat(x, dim=int(self.axis)) def interval_propagate(self, *v): norms = [] eps = [] # Collect perturbation information for all inputs. for i, _v in enumerate(v): if self.is_input_perturbed(i): n, e = Interval.get_perturbation(_v) norms.append(n) eps.append(e) else: norms.append(None) eps.append(0.0) eps = np.array(eps) # Supporting two cases: all inputs are Linf norm, or all inputs are L2 norm perturbed. # Some inputs can be constants without perturbations. all_inf = all(map(lambda x: x is None or x == torch.inf, norms)) all_2 = all(map(lambda x: x is None or x == 2, norms)) h_L = [_v[0] for _v in v] h_U = [_v[1] for _v in v] if all_inf: # Simply returns a tuple. Every subtensor has its own lower and upper bounds. return self.forward(*h_L), self.forward(*h_U) elif all_2: # Sum the L2 norm over all subtensors, and use that value as the new L2 norm. # This will be an over-approximation of the original perturbation (we can prove it). max_eps = np.sqrt(np.sum(eps * eps)) # For L2 norm perturbed inputs, lb=ub and for constants lb=ub. Just propagate one object. r = self.forward(*h_L) ptb = PerturbationLpNorm(norm=2, eps=max_eps) return Interval(r, r, ptb=ptb) else: raise RuntimeError(f"BoundConcat does not support inputs with norm {norms}") def bound_backward(self, last_lA, last_uA, *x, **kwargs): self.axis = self.make_axis_non_negative(self.axis, 'output') assert self.axis > 0 def _bound_oneside(last_A): if last_A is None: return None if isinstance(last_A, torch.Tensor): ret = list(torch.split(last_A, self.input_size, dim=self.axis + 1)) # Skip unused input nodes to reduce the cost of computing unused intermediate bounds for i in range(len(ret)): if (ret[i] == 0).all(): ret[i] = None return ret elif isinstance(last_A, Patches): assert len(self.input_shape) == 4 and self.axis == 1, "Split channel dimension is supported; others are unimplemented." # Patches shape can be [out_c, batch, out_h, out_w, in_c, patch_h, patch_w] # Or [spec, batch, in_c, patch_h, patch_w] (sparse) new_patches = torch.split(last_A.patches, self.input_size, dim=-3) # split the in_c dimension is easy. return [last_A.create_similar(p) for p in new_patches] else: raise RuntimeError(f'Unsupported type for last_A: {type(last_A)}') uA = _bound_oneside(last_uA) lA = _bound_oneside(last_lA) if uA is None: return [(lA[i] if lA is not None else None, None) for i in range(len(lA))], 0, 0 if lA is None: return [(None, uA[i] if uA is not None else None) for i in range(len(uA))], 0, 0 # To avoid issues in other parts of the code, we prune unused # lA and uA only when they are both unused. for i in range(len(lA)): if lA[i] is None and uA[i] is not None: lA[i] = torch.zeros_like(uA[i]) elif lA[i] is not None and uA[i] is None: uA[i] = torch.zeros_like(lA[i]) return [(lA[i], uA[i]) for i in range(len(lA))], 0, 0 def bound_forward(self, dim_in, *x): self.axis = self.make_axis_non_negative(self.axis) assert (self.axis == 0 and not self.from_input or self.from_input) # Concatenate each input's bounds along the axis. # If x[i].lw and x[i].uw is None, it means the input is a constant, # so we concatenate a tensor of zeros with the corresponding shape. lw = torch.cat([item.lw if item.lw is not None else torch.zeros(item.lb.shape[0], dim_in, *item.lb.shape[1:], device=item.lb.device) for item in x], dim=self.axis + 1) lb = torch.cat([item.lb for item in x], dim=self.axis) uw = torch.cat([item.uw if item.uw is not None else torch.zeros(item.ub.shape[0], dim_in, *item.ub.shape[1:], device=item.ub.device) for item in x], dim=self.axis + 1) ub = torch.cat([item.ub for item in x], dim=self.axis) return LinearBound(lw, lb, uw, ub) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(*v) def build_gradient_node(self, grad_upstream): ret = [] for i in range(len(self.inputs)): node_grad = ConcatGrad(self.axis, i) grad_input = (grad_upstream, ) + tuple(inp.forward_value for inp in self.inputs) ret.append((node_grad, grad_input, [])) return ret BoundConcatFromSequence = BoundConcat class BoundSlice(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.start = attr["starts"][0] if "starts" in attr else None self.end = attr["ends"][0] if "ends" in attr else None self.axes = attr["axes"][0] if "axes" in attr else None self.use_default_ibp = False self.ibp_intermediate = True def __repr__(self): attrs = {} if (len(self.inputs) == 5 and all(isinstance(item, BoundConstant) and item.value.numel() == 1 for item in self.inputs[1:])): attrs['start'] = self.inputs[1].value.item() attrs['end'] = self.inputs[2].value.item() attrs['axes'] = self.inputs[3].value.item() attrs['step'] = self.inputs[4].value.item() return super().__repr__(attrs) def _fixup_params(self, shape, start, end, axes, steps): if start < 0: start += shape[axes] if end < 0: if end == -9223372036854775807: # -inf in ONNX end = 0 # only possible when step == -1 else: end += shape[axes] if steps == -1: start, end = end, start + 1 # TODO: more test more negative step size. end = min(end, shape[axes]) return start, end # Older Pytorch version only passes steps as input. def forward(self, x, start=None, end=None, axes=None, steps=1): start = self.start if start is None else start end = self.end if end is None else end axes = self.axes if axes is None else axes assert (steps == 1 or steps == -1) and axes == int(axes) and start == int(start) and end == int(end) shape = x.shape if isinstance(x, Tensor) else [len(x)] start, end = self._fixup_params(shape, start, end, axes, steps) final = torch.narrow(x, dim=int(axes), start=int(start), length=int(end - start)) if steps == -1: final = torch.flip(final, dims=tuple(axes)) return final def interval_propagate(self, *v): lb = tuple(map(lambda x:x[0],v)) ub = tuple(map(lambda x:x[1],v)) return Interval.make_interval(self.forward(*lb), self.forward(*ub)) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(*v) def bound_backward(self, last_lA, last_uA, *x, **kwargs): def _bound_oneside(A, start, end, axes, steps): if A is None: return None if isinstance(A, torch.Tensor): # Reuse the batch and spec dimension of A, and replace other shapes with input. A_shape = A.shape[:2] + self.input_shape[1:] new_A = torch.zeros(size=A_shape, device=A.device, requires_grad=A.requires_grad) # Fill part of the new_A based on start, end, axes and steps. # Skip the spec dimension at the front (axes + 1). dim = axes if axes < 0 else axes + 1 indices = torch.arange(start, end, device=A.device) new_A = torch.index_copy(new_A, dim=dim, index=indices, source=A) elif isinstance(A, Patches): assert A.unstable_idx is None assert len(self.input_shape) == 4 and axes == 1, "Slice is only supported on channel dimension." patches = A.patches # patches shape is [out_c, batch, out_h, out_w, in_c, patch_h, patch_w]. new_patches_shape = patches.shape[:4] + (self.input_shape[1], ) + patches.shape[-2:] new_patches = torch.zeros( size=new_patches_shape, device=patches.device, requires_grad=patches.requires_grad) indices = torch.arange(start, end, device=patches.device) new_patches = torch.index_copy(new_patches, dim=-3, index=indices, source=patches) # Only the in_c dimension is changed. new_A = A.create_similar(new_patches) else: raise ValueError(f'Unsupport A type {type(A)}') return new_A start, end, axes = x[1].value.item(), x[2].value.item(), x[3].value.item() steps = x[4].value.item() if len(x) == 5 else 1 # If step is not specified, it is 1. # Other step size untested, do not enable for now. assert steps == 1 and axes == int(axes) and start == int(start) and end == int(end) start, end = self._fixup_params(self.input_shape, start, end, axes, steps) # Find the original shape of A. lA = _bound_oneside(last_lA, start, end, axes, steps) uA = _bound_oneside(last_uA, start, end, axes, steps) return [(lA, uA), (None, None), (None, None), (None, None), (None, None)], 0, 0 def bound_forward(self, dim_in, *inputs): assert len(inputs) == 5 or len(inputs) == 4 start = inputs[1].lb.item() end = inputs[2].lb.item() axis = self.make_axis_non_negative(inputs[3].lb.item()) assert axis > 0, "Slicing along the batch dimension is not supported yet" steps = inputs[4].lb.item() if len(inputs) == 5 else 1 # If step is not specified, it is 1. assert steps in [1, -1] x = inputs[0] shape = x.lb.shape start, end = self._fixup_params(shape, start, end, axis, steps) lw = torch.narrow(x.lw, dim=axis+1, start=start, length=end - start) uw = torch.narrow(x.uw, dim=axis+1, start=start, length=end - start) lb = torch.narrow(x.lb, dim=axis, start=start, length=end - start) ub = torch.narrow(x.ub, dim=axis, start=start, length=end - start) if steps == -1: lw = torch.flip(lw, dims=tuple(axis+1)) uw = torch.flip(uw, dims=tuple(axis+1)) lb = torch.flip(lb, dims=tuple(axis)) ub = torch.flip(ub, dims=tuple(axis)) return LinearBound(lw, lb, uw, ub) def build_gradient_node(self, grad_upstream): assert len(self.inputs) == 5 start = self.inputs[1].value.item() end = self.inputs[2].value.item() axes = self.inputs[3].value.item() steps = self.inputs[4].value.item() assert steps == 1 node_grad = SliceGrad(start, end, axes, steps) grad_input = (grad_upstream, self.inputs[0].forward_value) return [(node_grad, grad_input, [])] class BoundSplit(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.axis = attr['axis'] self.use_default_ibp = True if 'split' in attr: self.split = attr['split'] else: self.split = None def forward(self, *x): data = x[0] split = self.split if self.split is not None else x[1].tolist() if self.axis == -1: self.axis = len(data.shape) - 1 return torch.split(data, split, dim=self.axis)[self.output_index] def bound_backward(self, last_lA, last_uA, *x, **kwargs): assert self.axis > 0 split = self.split if self.split is not None else x[1].value.tolist() pre = sum(split[:self.output_index]) suc = sum(split[(self.output_index + 1):]) def _bound_oneside(last_A): if last_A is None: return None A = [] if pre > 0: A.append(torch.zeros( *last_A.shape[:(self.axis + 1)], pre, *last_A.shape[(self.axis + 2):], device=last_A.device)) A.append(last_A) if suc > 0: A.append(torch.zeros( *last_A.shape[:(self.axis + 1)], suc, *last_A.shape[(self.axis + 2):], device=last_A.device)) return torch.cat(A, dim=self.axis + 1) return [(_bound_oneside(last_lA), _bound_oneside(last_uA)), (None, None)], 0, 0 def bound_forward(self, dim_in, *x): assert self.axis > 0 and self.from_input split = self.split if self.split is not None else x[1].lb.tolist() x = x[0] lw = torch.split(x.lw, split, dim=self.axis + 1)[self.output_index] uw = torch.split(x.uw, split, dim=self.axis + 1)[self.output_index] lb = torch.split(x.lb, split, dim=self.axis)[self.output_index] ub = torch.split(x.ub, split, dim=self.axis)[self.output_index] return LinearBound(lw, lb, uw, ub) def build_solver(self, *v, model, C=None, model_type="mip", solver_pkg="gurobi"): self.solver_vars = self.forward(v[0]) def slice_grad(x, input_shape, start, end, axes, steps): assert steps == 1 assert axes > 0 out = torch.zeros(*x.shape[:2], *input_shape[1:]).to(x) end = min(end, input_shape[axes]) index = torch.arange(start, end, device=x.device) # Make index.ndim == x.ndim index = index.view( *((1,) * (axes + 1)), end - start, *((1,) * (x.ndim - axes - 2))) # Make index.shape == x.shape index = index.repeat( *x.shape[:axes + 1], 1, *x.shape[axes + 2:] ) out.scatter_(axes + 1, index, x) return out class SliceGradOp(Function): """ Local gradient of BoundSlice. Not including multiplication with gradients from other layers. """ @staticmethod def symbolic(_, grad_last, input, start=None, end=None, axes=None, steps=1): return _.op( 'grad::Slice', grad_last, input, start_i=start, end_i=end, axes_i=axes, steps_i=steps ).setType(grad_last.type()) @staticmethod def forward(ctx, grad_last, input, start, end, axes, steps): return slice_grad(grad_last, input.shape, start, end, axes, steps) class SliceGrad(Module): def __init__(self, start, end, axes, steps): super().__init__() self.start = start self.end = end self.axes = axes self.steps = steps def forward(self, grad_last, input): return SliceGradOp.apply( grad_last, input, self.start, self.end, self.axes, self.steps) class BoundSliceGrad(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.start = attr['start'] self.end = attr['end'] self.axes = attr['axes'] self.steps = attr['steps'] self.use_default_ibp = True def forward(self, grad_last, input): return slice_grad(grad_last, input.shape, self.start, self.end, self.axes, self.steps) def bound_backward(self, last_lA, last_uA, *args, **kwargs): def _bound_oneside(last_A): if last_A is None: return None assert self.axes > 0 last_A_ = last_A.reshape(-1, *self.inputs[1].output_shape[self.axes:]) last_A_ = last_A_[:, self.start:self.end] last_A = last_A_.reshape( *last_A.shape[:self.axes+2], -1, *self.inputs[1].output_shape[self.axes+1:]) return last_A return [(_bound_oneside(last_lA), _bound_oneside(last_uA)), (None, None)], 0, 0 def concat_grad(x, axis, input_index, *inputs): cur = 0 for i in range(input_index): cur += inputs[i].shape[axis] x_ = x.reshape(-1, *x.shape[axis + 1:]) ret = x_[:, cur:cur+inputs[input_index].shape[axis]] ret = ret.reshape(*x.shape[:axis + 1], *ret.shape[1:]) return ret class ConcatGradOp(Function): @staticmethod def symbolic(_, grad_last, axis, input_index, *inputs): return _.op('grad::Concat', grad_last, *inputs, axis_i=axis, input_index_i=input_index).setType(grad_last.type()) @staticmethod def forward(ctx, grad_last, axis, input_index, *inputs): return concat_grad(grad_last, axis, input_index, *inputs) class ConcatGrad(Module): def __init__(self, axis, input_index): super().__init__() self.input_index = input_index self.axis = axis def forward(self, grad_last, *input): return ConcatGradOp.apply(grad_last, self.axis, self.input_index, *input) class BoundConcatGrad(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.axis = attr['axis'] self.input_index = attr['input_index'] self.use_default_ibp = True def forward(self, grad_last, *inputs): return concat_grad(grad_last, self.axis, self.input_index, *inputs) def bound_backward(self, last_lA, last_uA, *args, **kwargs): def _bound_oneside(last_A): if last_A is None: return None assert self.axis > 0 start = sum([self.inputs[i + 1].output_shape[self.axis] for i in range(self.input_index)]) end = start + self.output_shape[self.axis+1] shape_behind = self.inputs[0].output_shape[self.axis+1:] A = torch.zeros(*last_A.shape[:self.axis+2], *shape_behind, device=last_A.device) A = A.view(-1, *shape_behind) A[:, start:end] = last_lA.reshape(-1, *last_A.shape[self.axis+2:]) A = A.view(*last_A.shape[:self.axis+2], *shape_behind) return A return ([(_bound_oneside(last_lA), _bound_oneside(last_uA))] + [(None, None)] * (len(self.inputs) - 1)), 0, 0 ================================================ FILE: auto_LiRPA/operators/softmax.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """ Softmax """ from .base import * class BoundSoftmaxImpl(nn.Module): def __init__(self, axis): super().__init__() self.axis = axis assert self.axis == int(self.axis) def forward(self, x): max_x = torch.max(x, dim=self.axis).values x = torch.exp(x - max_x.unsqueeze(self.axis)) s = torch.sum(x, dim=self.axis, keepdim=True) return x / s # The `option != 'complex'` case is not used in the auto_LiRPA main paper. class BoundSoftmax(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.axis = attr['axis'] self.option = options.get('softmax', 'complex') if self.option == 'complex': self.complex = True else: self.max_input = 30 def forward(self, x): assert self.axis == int(self.axis) if self.option == 'complex': self.input = (x,) self.model = BoundSoftmaxImpl(self.axis) self.model.device = self.device return self.model(x) else: return F.softmax(x, dim=self.axis) def interval_propagate(self, *v): assert self.option != 'complex' assert self.perturbed h_L, h_U = v[0] shift = h_U.max(dim=self.axis, keepdim=True).values exp_L, exp_U = torch.exp(h_L - shift), torch.exp(h_U - shift) lower = exp_L / (torch.sum(exp_U, dim=self.axis, keepdim=True) - exp_U + exp_L + epsilon) upper = exp_U / (torch.sum(exp_L, dim=self.axis, keepdim=True) - exp_L + exp_U + epsilon) return lower, upper ================================================ FILE: auto_LiRPA/operators/solver_utils.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### class DummyGurobipyClass: """A dummy class with error message when gurobi is not installed.""" def __getattr__(self, attr): def _f(*args, **kwargs): raise RuntimeError(f"method {attr} not available because gurobipy module was not built.") return _f try: import gurobipy as grb except ModuleNotFoundError: grb = DummyGurobipyClass() ================================================ FILE: auto_LiRPA/operators/tile.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """BoundTile""" from torch.nn import Module from .base import * class BoundTile(Bound): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.use_default_ibp = True def forward(self, x, repeats): return x.repeat(repeats.tolist()) def bound_backward(self, last_lA, last_uA, *x, **kwargs): assert not self.is_input_perturbed(1) repeats = x[1].value def _bound_oneside(A): if A is None: return None # block_shape: (specs, d1/r1, r1, d2/r2, r2, ..., dn/rn, rn) # Reshaping A to block_shape and sum along the "r" dimensions # is equivalent to summing up all block fragments of A. block_shape = [A.shape[0]] axes_to_sum = [] for i in range(len(repeats)): block_shape.append(A.size(i + 1) // repeats[i].item()) block_shape.append(repeats[i].item()) axes_to_sum.append(2 * i + 2) reshaped_A = A.reshape(*block_shape) next_A = reshaped_A.sum(dim=axes_to_sum) return next_A return [(_bound_oneside(last_lA), _bound_oneside(last_uA)), (None, None)], 0, 0 def bound_forward(self, dim_in, *x): assert (x[1].lb == x[1].ub).all(), "repeats should be constant." repeats = x[1].lb.tolist() assert repeats[0] == 1, "shouldn't repeat on the batch dimension." # lb and ub have the same shape as x, so we repeat then with "repeats" lb = x[0].lb.repeat(repeats) ub = x[0].ub.repeat(repeats) # lw and uw have shape (batch_size, input_dim, *shape_of_the_current_layer) # so we need to repeat them with "repeats" as well, but we need to # insert 1 at the second position to keep the input dimension unchanged. repeats.insert(1, 1) lw = x[0].lw.repeat(repeats) uw = x[0].uw.repeat(repeats) return LinearBound(lw, lb, uw, ub) ================================================ FILE: auto_LiRPA/operators/trigonometric.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from types import SimpleNamespace import torch from torch.autograd import Function from .activation_base import BoundActivation from .s_shaped import BoundSShaped class BoundSin(BoundSShaped): # Lookup tables shared by all BoundSin classes. xl_lower_tb = None xl_upper_tb = None xu_lower_tb = None xu_upper_tb = None func, d_func = torch.sin, torch.cos n_table_entries = 1001 def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.ibp_intermediate = True self.act_func = torch.sin self.d_act_func = torch.cos # Bound limits used by IBP. self.ibp_max_point = torch.pi / 2 self.ibp_min_point = torch.pi * 3 / 2 self.all_table_x = torch.linspace( 0, 2 * torch.pi, BoundSin.n_table_entries, device=self.device) self.precompute_relaxation(self.act_func, self.d_act_func, x_limit = torch.pi / 2) if BoundSin.xl_lower_tb is None: # Generate look-up tables. BoundSin.xl_lower_tb = BoundSin.get_lower_left_bound(self.all_table_x) BoundSin.xl_upper_tb = BoundSin.get_upper_left_bound(self.all_table_x) BoundSin.xu_lower_tb = BoundSin.get_lower_right_bound(self.all_table_x) BoundSin.xu_upper_tb = BoundSin.get_upper_right_bound(self.all_table_x) def d2_act_func(self, x): return -torch.sin(x) def _init_opt_parameters_impl(self, size_spec, name_start): """Implementation of init_opt_parameters for each start_node.""" l, u = self.inputs[0].lower, self.inputs[0].upper shape = [size_spec] + list(l.shape) alpha = torch.empty(12, *shape, device=l.device) alpha.data[:4] = ((l + u) / 2).unsqueeze(0).expand(4, *shape) alpha.data[4:6] = self.tp_both_lower_init[name_start].expand(2, *shape) alpha.data[6:8] = self.tp_both_upper_init[name_start].expand(2, *shape) alpha.data[8:10] = self.tp_lower_init[name_start].expand(2, *shape) alpha.data[10:12] = self.tp_upper_init[name_start].expand(2, *shape) return alpha def opt_init(self): super().opt_init() self.tp_both_lower_init = {} self.tp_both_upper_init = {} self.tp_lower_init = {} self.tp_upper_init = {} def branch_input_domain(self, lb, ub): # Map all input lower and upper bounds to the [0, 2*pi] interval. lb_clamped = lb - torch.floor(lb / (2 * torch.pi)) * (2 * torch.pi) ub_clamped = ub - torch.floor(ub / (2 * torch.pi)) * (2 * torch.pi) # Mask the mapped lower and upper bounds according to whether they are in [0, 0.5*pi), [0.5*pi, pi), # [pi, 1.5*pi), or [1.5*pi, 2*pi). mask_lb_1 = torch.logical_and(lb_clamped >= 0, lb_clamped < torch.pi / 2) mask_lb_2 = torch.logical_and(lb_clamped >= torch.pi / 2, lb_clamped < torch.pi) mask_lb_3 = torch.logical_and(lb_clamped >= torch.pi, lb_clamped < 3 * torch.pi / 2) mask_lb_4 = torch.logical_and(lb_clamped >= 3 * torch.pi / 2, lb_clamped < 2 * torch.pi) mask_ub_1 = torch.logical_and(ub_clamped >= 0, ub_clamped < torch.pi / 2) mask_ub_2 = torch.logical_and(ub_clamped >= torch.pi / 2, ub_clamped < torch.pi) mask_ub_3 = torch.logical_and(ub_clamped >= torch.pi, ub_clamped < 3 * torch.pi / 2) mask_ub_4 = torch.logical_and(ub_clamped >= 3 * torch.pi / 2, ub_clamped < 2 * torch.pi) self.sigmoid_like_mask = torch.logical_and( ub - lb <= torch.pi, torch.logical_or( torch.logical_and( torch.logical_or(mask_lb_2, mask_lb_3), torch.logical_or(mask_ub_2, mask_ub_3) ), torch.logical_and( torch.logical_or(mask_lb_1, mask_lb_4), torch.logical_or(mask_ub_1, mask_ub_4) ) ) ) self.branch_mask = torch.logical_not(self.sigmoid_like_mask) self.mask_neg = torch.logical_and(torch.logical_or(mask_lb_3, mask_lb_4), torch.logical_and(torch.logical_or(mask_ub_3, mask_ub_4), self.sigmoid_like_mask)) self.mask_pos = torch.logical_and(torch.logical_or(mask_lb_1, mask_lb_2), torch.logical_and(torch.logical_or(mask_ub_1, mask_ub_2), self.sigmoid_like_mask)) self.mask_both = torch.logical_xor(self.sigmoid_like_mask, torch.logical_or(self.mask_neg, self.mask_pos)) self.convex_concave = self.d2_act_func(lb) >= 0 def generate_d_lower_upper(self, lower, upper): # Indices of neurons with input upper bound >=0, whose optimal slope to lower bound the function was pre-computed. # Note that for neurons with also input lower bound >=0, they will be masked later. k_tensor = torch.floor(upper / (2 * torch.pi)) upper_clamped = upper - k_tensor * (2 * torch.pi) case1_mask = torch.logical_and(upper_clamped >= 0, upper_clamped <= torch.pi / 2) upper_clamped_new = upper_clamped.clamp(min=0, max=torch.pi / 2) index = torch.max( torch.zeros(upper.numel(), dtype=torch.long, device=upper.device), (upper_clamped_new / self.step_pre).to(torch.long).reshape(-1) ) + 1 # Lookup the lower bound slope from the pre-computed table. d_lower = (torch.index_select(self.d_lower, 0, index).view(lower.shape) + k_tensor * 2 * torch.pi) * case1_mask case2_mask = torch.logical_and(upper_clamped >= torch.pi, upper_clamped <= 3 * torch.pi / 2) upper_clamped_new = upper_clamped.clamp(min=torch.pi, max=3 * torch.pi / 2) index = torch.max( torch.zeros(upper.numel(), dtype=torch.long, device=upper.device), ((torch.pi - upper_clamped_new) / -self.step_pre).to(torch.long).reshape(-1) ) + 1 # Lookup the lower bound slope from the pre-computed table. d_upper = (torch.pi - torch.index_select(self.d_upper, 0, index).view(lower.shape) + k_tensor * 2 * torch.pi) * case2_mask # Indices of neurons with lower bound <=0, whose optimal slope to upper bound the function was pre-computed. k_tensor = torch.floor(lower / (2 * torch.pi)) lower_clamped = lower - k_tensor * (2 * torch.pi) case3_mask = torch.logical_and(lower_clamped >= 3 * torch.pi / 2, lower_clamped <= 2 * torch.pi) lower_clamped_new = lower_clamped.clamp(min=(3 * torch.pi / 2), max=2 * torch.pi) index = torch.max( torch.zeros(lower.numel(), dtype=torch.long, device=lower.device), ((lower_clamped_new - 2 * torch.pi) / -self.step_pre).to(torch.long).reshape(-1) ) + 1 d_upper += (torch.index_select(self.d_upper, 0, index).view(upper.shape) + (k_tensor + 1) * 2 * torch.pi) * case3_mask case4_mask = torch.logical_and(lower_clamped >= torch.pi / 2, lower_clamped <= torch.pi) lower_clamped_new = lower_clamped.clamp(min=(torch.pi / 2), max=3 * torch.pi) index = torch.max( torch.zeros(lower.numel(), dtype=torch.long, device=lower.device), ((torch.pi - lower_clamped_new) / self.step_pre).to(torch.long).reshape(-1) ) + 1 d_lower += (torch.pi - torch.index_select(self.d_lower, 0, index).view(upper.shape) + k_tensor * 2 * torch.pi) * case4_mask return d_lower, d_upper @staticmethod def arcsin(c): """Arcsin with gradient fixes. arcsin(-1) and arcsin(1) have pathological gradients and should be avoided. """ if c.min() > -1 and c.max() < 1: return torch.arcsin(c) c_ = c.clone() mask_neg = c == -1 mask_pos = c == 1 c_[mask_neg] = 0 c_[mask_pos] = 0 ret = torch.arcsin(c_) ret[mask_neg] = -torch.pi / 2 ret[mask_pos] = torch.pi / 2 return ret @staticmethod def get_intersection(start, end, c, theta=0.): """Get the number of intersections between y = sin(x + theta) and y = c between start and end.""" # Use arcsine to find the first 2 intersections. crossing1 = BoundSin.arcsin(c) - theta crossing2 = torch.pi - crossing1 - 2 * theta # Problematic at exact 1/2 pi, but ok in our case (happens only when lb=ub). return BoundSin.n_crossing(start, end, crossing1) + BoundSin.n_crossing(start, end, crossing2) @staticmethod def n_crossing(start, end, s): """Check how many times we will encounter value s + k*2*pi within start and end for any integer k.""" cycles = torch.floor((end - start) / (2 * torch.pi)) # Number of 2pi cycles. # Move s and end to the same 2 * pi cycle as start. dist = torch.floor((s - start) / (2 * torch.pi)) real_s = s - dist * 2 * torch.pi real_end = end - cycles * 2 * torch.pi return (real_s >= start).to(s) * (real_s <= real_end).to(s) + cycles @staticmethod def check_bound(tangent_point, x): """Check whether the tangent line at tangent_point is a valid lower/upper bound for x.""" # evaluate the value of the tangent line at x and see it is >= 0 or <=0. d = BoundSin.d_func(tangent_point) val = d * (x - tangent_point) + BoundSin.func(tangent_point) # We want a positive margin when finding a lower line, but as close to 0 as possible. # We want a negative margin when finding a upper line, but as close to 0 as possible. margin = BoundSin.func(x) - val return margin @staticmethod @torch.no_grad() def get_lower_left_bound(xl, steps=20): """Get a global lower bound given lower bound on x. Return slope and intercept.""" dtype = xl.dtype # Constrain xl into the -0.5 pi to 1.5 pi region. cycles = torch.floor((xl + 0.5 * torch.pi) / (2 * torch.pi)) * (2 * torch.pi) xl = xl - cycles use_tangent_line = (xl >= torch.pi).to(dtype) # Case 1: xl > pi, Lower tangent line is the only possible lower bound. # Case 2: Binary search needed. Testing from another tangent endpoint in [pi, 1.5*pi]. It must be in this region. left = torch.pi * torch.ones_like(xl) # The right end guarantees the margin > 0 because it is basically a IBP lower bound (-1). right = (1.5 * torch.pi) * torch.ones_like(xl) last_right = right.clone() for _ in range(steps): mid = (left + right) / 2. margin = BoundSin.check_bound(mid, xl) pos_mask = (margin > 0).to(dtype) # We want to margin > 0 but at small as possible. neg_mask = 1.0 - pos_mask right = mid * pos_mask + right * neg_mask # We have positive margin, reduce right hand side. last_right = mid * pos_mask + last_right * neg_mask # Always sound, since the margin is positive. left = mid * neg_mask + left * pos_mask d = xl * use_tangent_line + last_right * (1. - use_tangent_line) # Return slope and bias. return [d, cycles] @staticmethod @torch.no_grad() def get_upper_left_bound(xl, steps=20): """Get a global upper bound given lower bound on x. Return slope and intercept.""" dtype = xl.dtype # Constrain xl into the -0.5 pi to 1.5 pi region. cycles = torch.floor((xl - 0.5 * torch.pi) / (2 * torch.pi)) * (2 * torch.pi) xl = xl - cycles use_tangent_line = (xl >= 2.0 * torch.pi).to(dtype) # Case 1: xl > pi, Lower tangent line is the only possible lower bound. # Case 2: Binary search needed. Testing from another tangent endpoint in [pi, 1.5*pi]. It must be in this region. left = (2.0 * torch.pi) * torch.ones_like(xl) # The right end guarantees the margin > 0 because it is basically a IBP lower bound (-1). right = (2.5 * torch.pi) * torch.ones_like(xl) last_right = right.clone() for _ in range(steps): mid = (left + right) / 2. margin = BoundSin.check_bound(mid, xl) pos_mask = (margin > 0).to(dtype) # We want to margin < 0 but at small as possible. neg_mask = 1.0 - pos_mask right = mid * neg_mask + right * pos_mask # We have positive margin, reduce right hand side. last_right = mid * neg_mask + last_right * pos_mask # Always sound, since the margin is positive. left = mid * pos_mask + left * neg_mask d = xl * use_tangent_line + last_right * (1. - use_tangent_line) # Return slope and bias. return [d, cycles] @staticmethod @torch.no_grad() def get_lower_right_bound(xu, steps=20): """Get a global lower bound given upper bound on x. Return slope and intercept.""" # Constrain xu into the -0.5 pi to 1.5 pi region. cycles = torch.floor((xu + 0.5 * torch.pi) / (2 * torch.pi)) * (2 * torch.pi) xu = xu - cycles d, _ = BoundSin.get_lower_left_bound(torch.pi - xu, steps) return [3 * torch.pi - d, cycles - 2 * torch.pi] @staticmethod @torch.no_grad() def get_upper_right_bound(xu, steps=20): """Get a global upper bound given upper bound on x. Return slope and intercept.""" # Constrain xu into the 0.5 pi to 2.5 pi region. cycles = torch.floor((xu - 0.5 * torch.pi) / (2 * torch.pi)) * (2 * torch.pi) xu = xu - cycles d, _ = BoundSin.get_upper_left_bound(3 * torch.pi - xu, steps) return [5 * torch.pi - d, cycles - 2 * torch.pi] def get_bound_tb(self, lb, ub): """Find lower or upper bounds from lookup table.""" lower, upper = lb, ub step = 2 * torch.pi / (BoundSin.n_table_entries - 1) # Move to 0 to 2 pi region. lb_cycles = torch.floor(lb / (2 * torch.pi)) * (2 * torch.pi) lb = torch.clamp(lb - lb_cycles, min=0, max=2 * torch.pi) ub_cycles = torch.floor(ub / (2 * torch.pi)) * (2 * torch.pi) ub = torch.clamp(ub - ub_cycles, min=0, max=2 * torch.pi) # Find the indice within the lookup table from 0 - 2pi. indices_lb = lb.div(step).long() indices_ub = ub.div(step).long() tangent_left_lower = BoundSin.xl_lower_tb[0][indices_lb] tangent_left_upper = BoundSin.xl_upper_tb[0][indices_lb] tangent_right_lower = BoundSin.xu_lower_tb[0][indices_ub] tangent_right_upper = BoundSin.xu_upper_tb[0][indices_ub] if self.opt_stage in ['opt', 'reuse']: if not hasattr(self, 'alpha'): # Raise an error if alpha is not created. self._no_bound_parameters() ns = self._start self.alpha[ns].data[8:10, :] = torch.min( torch.max(self.alpha[ns][8:10, :], tangent_left_lower), tangent_right_lower) self.alpha[ns].data[10:12, :] = torch.min( torch.max(self.alpha[ns][10:12, :], tangent_left_upper), tangent_right_upper) tangent_lower = self.alpha[ns][8:10, :] tangent_upper = self.alpha[ns][10:12, :] else: # add cycles to optimizable tangent region unfolded_left_lower = (tangent_left_lower + BoundSin.xl_lower_tb[1][indices_lb] + lb_cycles) left_lower_ends = 1.5*torch.pi + BoundSin.xl_lower_tb[1][indices_lb] + lb_cycles unfolded_right_lower = (tangent_right_lower + BoundSin.xu_lower_tb[1][indices_ub] + ub_cycles) right_lower_ends = 1.5*torch.pi + BoundSin.xu_lower_tb[1][indices_ub] + ub_cycles mid = (lower + upper) / 2 leftmost_mask = torch.logical_and(mid < unfolded_left_lower, unfolded_left_lower <= upper) left_range_mask = torch.logical_and(mid >= unfolded_left_lower, mid < left_lower_ends) inbetween_mask = torch.logical_and(mid >= left_lower_ends, mid < right_lower_ends) rightmost_mask = torch.logical_and(mid >= unfolded_right_lower, unfolded_right_lower >= lower) right_range_mask = torch.logical_and(~left_range_mask, torch.logical_and(mid >= right_lower_ends, mid < unfolded_right_lower)) tangent_lower = (leftmost_mask * tangent_left_lower + left_range_mask * (mid - BoundSin.xl_lower_tb[1][indices_lb] - lb_cycles) + inbetween_mask * 1.5*torch.pi + rightmost_mask * tangent_right_lower + right_range_mask * (mid - BoundSin.xu_lower_tb[1][indices_ub] - ub_cycles)) unfolded_left_upper = (tangent_left_upper + BoundSin.xl_upper_tb[1][indices_lb] + lb_cycles) left_upper_ends = 2.5*torch.pi + BoundSin.xl_upper_tb[1][indices_lb] + lb_cycles unfolded_right_upper = (tangent_right_upper + BoundSin.xu_upper_tb[1][indices_ub] + ub_cycles) right_upper_ends = 2.5*torch.pi + BoundSin.xu_upper_tb[1][indices_ub] + ub_cycles mid = (lower + upper) / 2 leftmost_mask = torch.logical_and(mid < unfolded_left_upper, unfolded_left_upper <= upper) left_range_mask = torch.logical_and(mid >= unfolded_left_upper, mid < left_upper_ends) inbetween_mask = torch.logical_and(mid >= left_upper_ends, mid < right_upper_ends) rightmost_mask = torch.logical_and(mid >= unfolded_right_upper, unfolded_right_upper >= lower) right_range_mask = torch.logical_and(~left_range_mask, torch.logical_and(mid >= right_upper_ends, mid < unfolded_right_upper)) tangent_upper = (leftmost_mask * tangent_left_upper + left_range_mask * (mid - BoundSin.xl_upper_tb[1][indices_lb] - lb_cycles) + inbetween_mask * 2.5*torch.pi + rightmost_mask * tangent_right_upper + right_range_mask * (mid - BoundSin.xu_upper_tb[1][indices_ub] - ub_cycles)) if self.opt_stage == 'init': ns = self._start self.tp_lower_init[ns] = tangent_lower.detach() self.tp_upper_init[ns] = tangent_upper.detach() d_lower = BoundSin.d_func(tangent_lower) b_lower = BoundSin.func(tangent_lower) - d_lower * (tangent_lower + torch.where(tangent_lower <= 1.5*torch.pi, BoundSin.xl_lower_tb[1][indices_lb] + lb_cycles, BoundSin.xu_lower_tb[1][indices_ub] + ub_cycles)) d_upper = BoundSin.d_func(tangent_upper) b_upper = BoundSin.func(tangent_upper) - d_upper * (tangent_upper + torch.where(tangent_upper <= 2.5*torch.pi, BoundSin.xl_upper_tb[1][indices_lb] + lb_cycles, BoundSin.xu_upper_tb[1][indices_ub] + ub_cycles)) return d_lower, b_lower, d_upper, b_upper def forward(self, x): return torch.sin(x) def interval_propagate(self, *v): # Check if a point is in [l, u], considering the 2pi period def check_crossing(ll, uu, point): return ((((uu - point) / (2 * torch.pi)).floor() - ((ll - point) / (2 * torch.pi)).floor()) > 0).to(h_Ls.dtype) h_L, h_U = v[0][0], v[0][1] h_Ls, h_Us = self.forward(h_L), self.forward(h_U) # If crossing pi/2, then max is fixed 1.0 max_mask = check_crossing(h_L, h_U, self.ibp_max_point) # If crossing pi*3/2, then min is fixed -1.0 min_mask = check_crossing(h_L, h_U, self.ibp_min_point) ub = torch.max(h_Ls, h_Us) ub = max_mask + (1 - max_mask) * ub lb = torch.min(h_Ls, h_Us) lb = - min_mask + (1 - min_mask) * lb return lb, ub def bound_relax_branch(self, lb, ub): dtype = lb.dtype ub = torch.max(ub, lb + 1e-8) # Case 1: Connect the two points as a line sub = self.func(ub) slb = self.func(lb) mid = (sub + slb) / 2. smid = self.func((ub + lb) / 2) gap = smid - mid case1_line_slope = (sub - slb) / (ub - lb).clamp(min=1e-10) case1_line_bias = slb - case1_line_slope * lb # Check if there are crossings between the line and the sin function. grad_crossings = self.get_intersection(lb, ub, case1_line_slope, theta=0.5 * torch.pi) # If there is no crossing, then we can connect the two points together as a lower/upper bound. use_line = grad_crossings == 1 # Connected line is the upper bound. upper_use_line = torch.logical_and(gap < 0, use_line) # Connected line is the lower bound. lower_use_line = torch.logical_and(gap >= 0, use_line) # Case 2: we will try the global lower/upper bounds at lb and ub. # For the points and lb and ub, we can construct both lower and upper bounds. (case_2_lower_slope, case_2_lower_bias, case_2_upper_slope, case_2_upper_bias) = self.get_bound_tb(lb, ub) # Finally, choose between case 1 and case 2. lower_use_line = lower_use_line.to(dtype) not_lower_use_line = 1. - lower_use_line upper_use_line = upper_use_line.to(dtype) not_upper_use_line = 1. - upper_use_line lower_slope = lower_use_line * case1_line_slope + not_lower_use_line * case_2_lower_slope lower_bias = lower_use_line * case1_line_bias + not_lower_use_line * case_2_lower_bias upper_slope = upper_use_line * case1_line_slope + not_upper_use_line * case_2_upper_slope upper_bias = upper_use_line * case1_line_bias + not_upper_use_line * case_2_upper_bias return lower_slope, lower_bias, upper_slope, upper_bias class BoundCos(BoundSin): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.ibp_max_point = 0.0 self.ibp_min_point = torch.pi def forward(self, x): return torch.cos(x) def bound_relax(self, x, init=False, dim_opt=None): # Shift the input by half_pi, and shifting the linear bounds back. half_pi = 0.5 * torch.pi x_shifted = SimpleNamespace() x_shifted.lower = x.lower + half_pi x_shifted.upper = x.upper + half_pi super().bound_relax(x_shifted, init=init, dim_opt=dim_opt) self.lb = self.lb + self.lw * half_pi self.ub = self.ub + self.uw * half_pi class BoundSec(BoundActivation): def __init__(self, attr=None, inputs=None, output_index=0, options=None): super().__init__(attr, inputs, output_index, options) self.ibp_intermediate = True def forward(self, x): return 1. / torch.cos(x) def bound_relax(self, x, init=False): assert x.lower.min() > -torch.pi / 2 assert x.upper.max() < torch.pi / 2 x_L = x.lower x_U = x.upper y_L = self.forward(x_L) y_U = self.forward(x_U) mask_close = x_U - x_L < 1e-8 upper_k = torch.where( mask_close, y_L * torch.tan(x_L), (y_U - y_L) / (x_U - x_L).clamp(min=1e-8) ) self.uw = upper_k self.ub = -upper_k * x_L + y_L mid = (x_L + x_U) / 2 y_mid = self.forward(mid) lower_k = y_mid * torch.tan(mid) self.lw = lower_k self.lb = -lower_k * mid + y_mid def interval_propagate(self, *v): h_L, h_U = v[0][0], v[0][1] assert h_L.min() > -torch.pi / 2 assert h_U.max() < torch.pi / 2 y_L = self.forward(h_L) y_U = self.forward(h_U) lower = (h_U < 0) * (y_U - 1) + (h_L > 0) * (y_L - 1) + 1 upper = torch.max(y_L, y_U) return lower, upper class SinGradOp(Function): @staticmethod def symbolic(_, x): return _.op('grad::Sin', x) @staticmethod def forward(ctx, input): return torch.cos(input) class CosGradOp(Function): @staticmethod def symbolic(_, x): return _.op('grad::Cos', x) @staticmethod def forward(ctx, input): return -torch.sin(input) class TanhGradOp(Function): @staticmethod def symbolic(_, x): return _.op('grad::Tanh', x) @staticmethod def forward(ctx, input): return 1 - torch.tanh(input)**2 ================================================ FILE: auto_LiRPA/opt_pruner.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """Pruning during the optimization.""" import time import torch class OptPruner: def __init__(self, x, threshold, multi_spec_keep_func, loss_reduction_func, decision_thresh, fix_interm_bounds, epsilon_over_decision_thresh): self.x = x self.threshold = threshold self.multi_spec_keep_func = multi_spec_keep_func self.loss_reduction_func = loss_reduction_func self.decision_thresh = decision_thresh self.fix_interm_bounds = fix_interm_bounds self.epsilon_over_decision_thresh = epsilon_over_decision_thresh # For computing the positive domain ratio self.original_size = x[0].shape[0] self.pruning_in_iteration = False self.preserve_mask = None self.preserve_mask_next = None self.time = 0 # For holding full-sized alphas self.cached_alphas = {} def prune(self, x, C, ret_l, ret_u, ret, full_l, full_ret_l, full_ret_u, full_ret, interm_bounds, aux_reference_bounds, reference_bounds, stop_criterion_func, bound_lower): # positive domains may already be filtered out, so we use all domains - # negative domains to compute # FIXME Only using ret_l but not ret_u. if self.decision_thresh is not None and ret_l is not None: if (isinstance(self.decision_thresh, torch.Tensor) and self.decision_thresh.numel() > 1 and self.preserve_mask is not None): if self.decision_thresh.shape[-1] == 1: # single spec with pruned domains negative_domain = ( ret_l.view(-1) <= self.decision_thresh[self.preserve_mask].view(-1) ).sum() else: # multiple spec with pruned domains negative_domain = self.multi_spec_keep_func( ret_l <= self.decision_thresh[self.preserve_mask]).sum() else: if ret_l.shape[-1] == 1: # single spec negative_domain = ( ret_l.view(-1) <= self.decision_thresh.view(-1)).sum() else: # multiple spec negative_domain = self.multi_spec_keep_func( ret_l <= self.decision_thresh).sum() positive_domain_num = self.original_size - negative_domain else: positive_domain_num = -1 positive_domain_ratio = float( positive_domain_num) / float(self.original_size) # threshold is 10% by default self.next_iter_pruning_in_iteration = ( self.decision_thresh is not None and positive_domain_ratio > self.threshold) if self.pruning_in_iteration: stime = time.time() self.get_preserve_mask(ret_l) # prune C if C is not None and C.shape[0] == x[0].shape[0]: C = C[self.now_preserve_mask] # means C is also batch specific # prune x x, pre_prune_size = self._prune_x(x) # prune bounds ret_prune = self._prune_bounds_by_mask( ret_l, ret_u, ret, interm_bounds, aux_reference_bounds, reference_bounds, pre_prune_size) full_l, full_ret_l, full_ret_u, full_ret = ret_prune self.time += time.time() - stime stop_criterion = stop_criterion_func( full_ret_l) if bound_lower else stop_criterion_func(-full_ret_u) if (type(stop_criterion) != bool and stop_criterion.numel() > 1 and self.pruning_in_iteration): stop_criterion = stop_criterion[self.preserve_mask] return (x, C, full_l, full_ret_l, full_ret_u, full_ret, stop_criterion) def prune_idx(self, idx_mask, idx, x): if self.pruning_in_iteration: # local sparse index of preserved samples where # idx == true local_idx = idx_mask[self.preserve_mask].nonzero().view(-1) # idx is global sparse index of preserved samples where # idx == true new_idx = torch.zeros_like( idx_mask, dtype=torch.bool, device=x[0].device) new_idx[self.preserve_mask] = idx_mask[self.preserve_mask] idx = new_idx.nonzero().view(-1) reference_idx = local_idx else: reference_idx = idx return reference_idx, idx def next_iter(self): if self.pruning_in_iteration: self.preserve_mask = self.preserve_mask_next if (not self.pruning_in_iteration and self.next_iter_pruning_in_iteration): # init preserve_mask etc self.preserve_mask = torch.arange( 0, self.x[0].shape[0], device=self.x[0].device, dtype=torch.long) self.pruning_in_iteration = True def update_best(self, full_ret_l, full_ret_u, best_ret): if self.pruning_in_iteration: # overwrite pruned cells in best_ret by threshold + eps fin_l, fin_u = best_ret if fin_l is not None: new_fin_l = full_ret_l new_fin_l[self.preserve_mask] = fin_l[self.preserve_mask] fin_l = new_fin_l if fin_u is not None: new_fin_u = full_ret_u new_fin_u[self.preserve_mask] = fin_u[self.preserve_mask] fin_u = new_fin_u best_ret = (fin_l, fin_u) return best_ret def update_ratio(self, full_l, full_ret_l): if self.decision_thresh is not None and full_l.numel() > 0: stime = time.time() with torch.no_grad(): if isinstance(self.decision_thresh, torch.Tensor): if self.decision_thresh.shape[-1] == 1: neg_domain_num = torch.sum( full_ret_l.view(-1) <= self.decision_thresh.view(-1) ).item() else: neg_domain_num = torch.sum(self.multi_spec_keep_func( full_ret_l <= self.decision_thresh)).item() else: if full_l.shape[-1] == 1: neg_domain_num = torch.sum( full_ret_l.view(-1) <= self.decision_thresh).item() else: neg_domain_num = torch.sum(self.multi_spec_keep_func( full_ret_l <= self.decision_thresh)).item() now_pruning_ratio = ( 1.0 - float(neg_domain_num) / float(full_l.shape[0])) print('pruning_in_iteration open status:', self.pruning_in_iteration) print('ratio of positive domain =', full_l.shape[0] - neg_domain_num, '/', full_l.numel(), '=', now_pruning_ratio) self.time += time.time() - stime print('pruning-in-iteration extra time:', self.time) @torch.no_grad() def _prune_x(self, x): """ Prune x by given now_preserve_mask. """ x = list(x) pre_prune_size = x[0].shape[0] x[0].data = x[0][self.now_preserve_mask].data if hasattr(x[0], 'ptb'): if x[0].ptb.x_L is not None: x[0].ptb.x_L = x[0].ptb.x_L[self.now_preserve_mask] if x[0].ptb.x_U is not None: x[0].ptb.x_U = x[0].ptb.x_U[self.now_preserve_mask] x = tuple(x) return x, pre_prune_size def _prune_dict_of_lists(self, dict_of_lists, pre_prune_size): if dict_of_lists is not None: for k, v in dict_of_lists.items(): v_l, v_r = v[0], v[1] if v_l.shape[0] == pre_prune_size: # the first dim is batch size and matches the preserve mask v_l = v_l[self.now_preserve_mask] if v_r.shape[0] == pre_prune_size: # the first dim is batch size and matches the preserve mask v_r = v_r[self.now_preserve_mask] dict_of_lists[k] = [v_l, v_r] @torch.no_grad() def _prune_bounds_by_mask(self, ret_l, ret_u, ret, interm_bounds, aux_reference_bounds, reference_bounds, pre_prune_size): """ Prune bounds by given now_preserve_mask. """ full_ret_l, full_l = self._recover_bounds_to_full_batch(ret_l) full_ret_u, full_u = self._recover_bounds_to_full_batch(ret_u) full_ret = (full_ret_l, full_ret_u) + ret[2:] if self.fix_interm_bounds: interval_to_prune = interm_bounds else: interval_to_prune = None self._prune_dict_of_lists(interval_to_prune, pre_prune_size) self._prune_dict_of_lists(aux_reference_bounds, pre_prune_size) self._prune_dict_of_lists(reference_bounds, pre_prune_size) # update the global mask here for possible next iteration self.preserve_mask_next = self.preserve_mask[self.now_preserve_mask] return full_l, full_ret_l, full_ret_u, full_ret @torch.no_grad() def get_preserve_mask(self, ret_l): """ Get preserve mask by decision_thresh to filter out the satisfied bounds. """ if (isinstance(self.decision_thresh, torch.Tensor) and self.decision_thresh.numel() > 1): if self.decision_thresh.shape[-1] == 1: self.now_preserve_mask = ( ret_l <= self.decision_thresh[self.preserve_mask] ).view(-1).nonzero().view(-1) else: self.now_preserve_mask = self.multi_spec_keep_func( ret_l <= self.decision_thresh[self.preserve_mask] ).nonzero().view(-1) else: if self.decision_thresh.shape[-1] == 1: self.now_preserve_mask = ( ret_l <= self.decision_thresh).view(-1).nonzero().view(-1) else: self.now_preserve_mask = self.multi_spec_keep_func( ret_l <= self.decision_thresh).nonzero().view(-1) def _recover_bounds_to_full_batch(self, ret): """ Recover lower and upper bounds to full batch size so that later we can directly update using the full batch size of l and u. """ if ret is not None: if (isinstance(self.decision_thresh, torch.Tensor) and self.decision_thresh.numel() > 1): full_ret = ( self.decision_thresh.clone().to(ret.device).type(ret.dtype) + self.epsilon_over_decision_thresh) else: num_decision_thresh = self.decision_thresh if isinstance(num_decision_thresh, torch.Tensor): num_decision_thresh = num_decision_thresh.item() full_ret = torch.full( (self.original_size,) + tuple(ret.shape[1:]), fill_value=(num_decision_thresh + self.epsilon_over_decision_thresh), device=ret.device, dtype=ret.dtype) full_ret[self.preserve_mask] = ret if full_ret.shape[1] > 1: full_reduced_ret = self.loss_reduction_func(full_ret) else: full_reduced_ret = full_ret else: full_ret = full_reduced_ret = None return full_ret, full_reduced_ret def cache_full_sized_alpha(self, optimizable_activations: list): """ When preserve mask is in use, cache the full-sized alphas in self.cached_alphas, and rewrite the alphas in nodes according to the preserve mask. The full-sized alphas will be recovered back to nodes after compute_bounds, via the function named recover_full_sized_alphas() :param optimizable_activations: list of nodes that may have slope alphas as optimizable variables :return: None """ if self.pruning_in_iteration: for act in optimizable_activations: if act.name in self.cached_alphas: self.cached_alphas[act.name].clear() self.cached_alphas[act.name] = {} if act.alpha is not None: for start_node in act.alpha: # cached alphas and alphas stored in nodes should share the same memory space self.cached_alphas[act.name][start_node] = act.alpha[start_node] act.alpha[start_node] = act.alpha[start_node][:, :, self.preserve_mask] def recover_full_sized_alpha(self, optimizable_activations: list): """ After bound computation, recover the full-sized alphas back to nodes. :param optimizable_activations: ist of nodes that may have slope alphas as optimizable variables :return: None """ if self.pruning_in_iteration: for act in optimizable_activations: for start_node in self.cached_alphas[act.name]: act.alpha[start_node] = self.cached_alphas[act.name][start_node] def clean_full_sized_alpha_cache(self): for act_node in self.cached_alphas: self.cached_alphas[act_node].clear() self.cached_alphas.clear() ================================================ FILE: auto_LiRPA/optimize_graph.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### """Optimize the graph to merge nodes and remove unnecessary ones. Initial and experimental code only. """ from auto_LiRPA.bound_ops import * from auto_LiRPA.utils import logger import torch from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule def _optimize_graph(self: 'BoundedModule'): """Optimize the graph to remove some unnecessary nodes.""" merge_identical_act(self) convert_sqr(self) div_to_mul(self) merge_sec(self) minmax_to_relu(self) optimize_relu_relation(self) if self.bound_opts['optimize_graph']['optimizer'] is not None: # Use the custom graph optimizer self.bound_opts['optimize_graph']['optimizer'](self) for node in list(self.nodes()): if (not node.output_name and node.name != self.final_name and node.name not in self.root_names): self.delete_node(node) def _copy_node_properties(new, ref): new.output_shape = ref.output_shape new.device = ref.device new.attr['device'] = ref.attr['device'] new.batch_dim = ref.batch_dim new.from_complex_node = ref.from_complex_node def merge_sec(model: 'BoundedModule'): nodes = list(model.nodes()) for node in nodes: if type(node) == BoundReciprocal and type(node.inputs[0]) == BoundCos: node_new = BoundSec(inputs=[node.inputs[0].inputs[0]]) node_new.name = f'{node.inputs[0].name}/sec' _copy_node_properties(node_new, node) if node_new.name in model._modules: node_existing = model._modules[node_new.name] assert isinstance(node_existing, BoundSec) assert node_existing.inputs[0] == node.inputs[0].inputs[0] model.replace_node(node, node_existing) else: model.add_nodes([node_new]) model.replace_node(node, node_new) def div_to_mul(model: 'BoundedModule'): nodes = list(model.nodes()) for node in nodes: if type(node) == BoundDiv: logger.debug('Replacing BoundDiv node: %s', node) node_reciprocal = BoundReciprocal(inputs=[node.inputs[1]]) node_reciprocal.name = f'{node.name}/reciprocal' # Properties of the reciprocal node only depend on inputs[1], i.e. # the node of denominator. They can be different from those of # the original BoundDiv node, due to possible broadcasting and # perturbed/unperturbed switching in multiplication. _copy_node_properties(node_reciprocal, node.inputs[1]) model.add_nodes([node_reciprocal]) node_mul = BoundMul(inputs=[node.inputs[0], node_reciprocal], options=model.bound_opts) node_mul.name = f'{node.name}/mul' _copy_node_properties(node_mul, node) model.add_nodes([node_mul]) model.replace_node(node, node_mul) def convert_sqr(model: 'BoundedModule'): """Replace BoundMul or Bound Pow with BoundSqr if applicable. 1. If the two inputs nodes of a BoundMul node are the same, use BoundSqr. 2. Pow(x, 2) can be replaced with BoundSqr. """ nodes = list(model.nodes()) for node in nodes: replace = False if type(node) == BoundMul and node.inputs[0] == node.inputs[1]: replace = True elif type(node) == BoundPow: if ((isinstance(node.inputs[1], BoundBuffers) and node.inputs[1].buffer == 2) or (isinstance(node.inputs[1], BoundConstant) and node.inputs[1].value == 2)): replace = True if replace: node_new = BoundSqr(inputs=[node.inputs[0]]) node_new.name = f'{node.name}/sqr' _copy_node_properties(node_new, node) model.add_nodes([node_new]) logger.debug('Replaceing %s with %s', node, node_new) model.replace_node(node, node_new) def merge_identical_act(model: 'BoundedModule'): """Merge identical BoundActivation""" nodes = list(model.nodes()) merged = [False] * len(nodes) for i in range(len(nodes)): if (not merged[i] and isinstance(nodes[i], BoundActivation) and len(nodes[i].inputs) == 1): for j in range(i + 1, len(nodes)): if (not merged[j] and type(nodes[j]) == type(nodes[i]) and len(nodes[i].inputs) == 1): if nodes[i].inputs[0] == nodes[j].inputs[0]: logger.debug('Merging node %s to %s', nodes[j], nodes[i]) model.replace_node(nodes[j], nodes[i]) merged[j] = True def minmax_to_relu(model: 'BoundedModule'): """Replace BoundMinMax with BoundRelu if one of its inputs is constant""" nodes = list(model.nodes()) for node in nodes: if type(node) == BoundMax: for i, input_node in enumerate(node.inputs): if not input_node.perturbed: logger.debug('Replacing BoundMax node %s', node) # max(x, c) = ReLU(x - c) + c node_sub = BoundSub(inputs=[node.inputs[1-i], input_node], options=model.bound_opts) node_sub.name = f'{node.name}/sub' _copy_node_properties(node_sub, node) node_relu = BoundRelu(inputs=[node_sub], options=model.bound_opts) node_relu.name = f'{node.name}/relu' _copy_node_properties(node_relu, node) node_add = BoundAdd(inputs=[node_relu, input_node], options=model.bound_opts) node_add.name = f'{node.name}/add' _copy_node_properties(node_add, node) model.add_nodes([node_sub, node_relu, node_add]) model.replace_node(node, node_add) break elif type(node) == BoundMin: for i, input_node in enumerate(node.inputs): if not input_node.perturbed: logger.debug('Replacing BoundMin node %s', node) # min(x, c) = -ReLU(c - x) + c node_sub_1 = BoundSub(inputs=[input_node, node.inputs[1-i]], options=model.bound_opts) node_sub_1.name = f'{node.name}/sub/1' _copy_node_properties(node_sub_1, node) node_relu = BoundRelu(inputs=[node_sub_1], options=model.bound_opts) node_relu.name = f'{node.name}/relu' _copy_node_properties(node_relu, node) node_sub_2 = BoundSub(inputs=[input_node, node_relu], options=model.bound_opts) node_sub_2.name = f'{node.name}/sub/2' _copy_node_properties(node_sub_2, node) model.add_nodes([node_sub_1, node_relu, node_sub_2]) model.replace_node(node, node_sub_2) break def _pair_row(Ws, bs, Wm, j, atol=1e-8): """ Checks the relation ReLU(x) - ReLU(-x) = x. Return the index at the merge weight if the relation exists, otherwise return None. """ # Check whether this fits the pattern in docstring. if not (torch.allclose(Ws[j+1], -Ws[j], atol=atol) and abs(float(bs[j] + bs[j+1])) < atol): return None # Make merge weight 4D so Gemm and Conv share same indexing if Wm.dim() == 2: # Gemm path Wm4 = Wm.unsqueeze(-1).unsqueeze(-1) else: # Conv path Wm4 = Wm # Find corresponding columns of the merge weight # We check 1) The two nonzero element are in the same row # 2) The two entries are +1 and -1 # If the check pass, we return the row index, otherwise it # is not a valid pattern match and we return None. rows = torch.nonzero(Wm4[:, [j, j+1], 0, 0], as_tuple=False) if rows.size(0) != 2 or rows[0, 0] != rows[1, 0]: return None r = int(rows[0, 0]) ok = (abs(float(Wm4[r, j, 0, 0] - 1)) < atol and abs(float(Wm4[r, j+1, 0, 0] + 1)) < atol and torch.count_nonzero(Wm4[r]) == 2) return r if ok else None def optimize_relu_relation(model: 'BoundedModule'): """ This graph optimization detects the optimizable path with the identity ReLU(ReLU(x + b) - ReLu(-x - b)) = ReLU(x + b) for both linear layer and convolution layer. Replace the sequence of nodes with pattern Gemm -> ReLU -> Gemm -> ReLU or Conv -> ReLU -> Conv -> ReLU to one single Gemm -> ReLU or Conv -> ReLU. """ nodes = list(model.nodes()) i = 0 while i + 3 < len(nodes): A, B, C, D = nodes[i:i+4] # In Conv layers, we detect whether the optimization can be done # for pairs of channels. If so, the optimization eliminates one # Conv layer and recover the original results with the identity # in docstring. if (isinstance(A, BoundConv) and isinstance(B, BoundRelu) and isinstance(C, BoundConv) and isinstance(D, BoundRelu) and tuple(C.attr['kernel_shape'])==(1,1)): # Here use forward() to extract weights to handle BoundParam/BoundConstant, or any other node # that could represent weights a unified interface. Ws = C.inputs[1].forward() Wc = A.inputs[1].forward() # We only care about 2D conv if Ws.ndim != 4 or Wc.ndim != 4: i += 1 continue bs = C.inputs[2].forward() if C.has_bias else torch.zeros_like(Ws[:, 0, 0, 0]) bc = A.inputs[2].forward() if A.has_bias else torch.zeros_like(Wc[:, 0, 0, 0]) # Detect whether and where the identity presents in the weight matrix. pairs, skip = {}, set() for j in range(0, Wc.size(0) - 1): r = _pair_row(Wc, bc, Ws, j) if r is not None: pairs[j] = r skip.add(j + 1) if pairs: Cout, Cin, kH, kW = Ws.size(0), Wc.size(1), *Wc.shape[2:] W_new = torch.empty((Cout, Cin, kH, kW), dtype=Wc.dtype, device=Wc.device) b_new = torch.empty((Cout,), dtype=bc.dtype, device=bc.device) # Build fused weight and bias dst = 0 for src in range(Wc.size(0)): if src in skip: continue b_new[dst] = bs[pairs[src]] + bc[src] if src in pairs else bc[src] W_new[dst] = Wc[src] dst += 1 # Modify the graph using the newly built weights and bias weight_node = BoundParams('fused_weight', torch.nn.Parameter(W_new)) bias_node = BoundParams('fused_bias', torch.nn.Parameter(b_new)) weight_node.name = f'{A.name}/optimized/weight' bias_node.name = f'{A.name}/optimized/bias' fused = BoundConv( attr=A.attr.copy(), inputs=[A.inputs[0], weight_node, bias_node], output_index=A.output_index, options=model.bound_opts ) fused.name = f'{A.name}/optimized' _copy_node_properties(fused, A) relu = BoundRelu(inputs=[fused], options=model.bound_opts) relu.name = f'{A.name}/optimized/relu' _copy_node_properties(relu, D) model.add_nodes([weight_node, bias_node, fused, relu]) model.replace_node(D, relu) model.replace_node(A, fused) model.delete_node(B) model.delete_node(C) # Skip the full sequence once the pattern is detected i += 4 continue # In Linear layer, we detect whether the optimization can be # done for pair of rows. The code structure is similar the # one at Conv branch. elif (isinstance(A, BoundLinear) and isinstance(B, BoundRelu) and isinstance(C, BoundLinear) and isinstance(D, BoundRelu)): Ws = A.inputs[1].forward() Wm = C.inputs[1].forward() bs = A.inputs[2].forward() if len(A.inputs) == 3 else torch.zeros_like(Ws[:, 0]) bm = C.inputs[2].forward() if len(C.inputs) == 3 else torch.zeros_like(Wm[:, 0]) pairs, skip = {}, set() for j in range(0, Ws.size(0) - 1): r = _pair_row(Ws, bs, Wm, j) if r is not None: pairs[j] = r skip.add(j + 1) if pairs: n_out = Wm.shape[0] W_new = torch.empty((n_out, Ws.shape[1]), dtype=Ws.dtype, device=A.attr['device']) b_new = torch.empty((n_out,), dtype=bs.dtype, device=A.attr['device']) dst = 0 for src in range(Ws.size(0)): if src in skip: continue b_new[dst] = bm[pairs[src]] + bs[src] if src in pairs else bs[src] W_new[dst] = Ws[src] dst += 1 weight_node = BoundParams('fused_weight', torch.nn.Parameter(W_new), attr=dict(device=A.attr['device'])) bias_node = BoundParams('fused_bias', torch.nn.Parameter(b_new), attr=dict(device=A.attr['device'])) weight_node.name = f'{A.name}/optimized/weight' bias_node.name = f'{A.name}/optimized/bias' fused = BoundLinear( attr=A.attr.copy(), inputs=[A.inputs[0], weight_node, bias_node], output_index=A.output_index, options=model.bound_opts ) fused.name = f'{A.name}/optimized' _copy_node_properties(fused, A) relu = BoundRelu(inputs=[fused], options=model.bound_opts) relu.name = f'{A.name}/optimized/relu' _copy_node_properties(relu, D) model.add_nodes([weight_node, bias_node, fused, relu]) model.replace_node(D, relu) model.delete_node(A) model.delete_node(B) model.delete_node(C) i += 4 continue i += 1 ================================================ FILE: auto_LiRPA/optimized_bounds.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import time import os from collections import OrderedDict from contextlib import ExitStack import torch from torch import optim, Tensor from .beta_crown import print_optimized_beta from .cuda_utils import double2float from .utils import reduction_sum, multi_spec_keep_func_all, clone_sub_A_dict from .opt_pruner import OptPruner from .perturbations import PerturbationLpNorm from typing import TYPE_CHECKING, Union, Tuple, Optional, Dict if TYPE_CHECKING: from .bound_general import BoundedModule default_optimize_bound_args = { 'enable_alpha_crown': True, # Enable optimization of alpha. 'enable_beta_crown': False, # Enable beta split constraint. 'apply_output_constraints_to': [], # Enable optimization w.r.t. output constraints. 'tighten_input_bounds': False, # Don't tighten input bounds # If output constraints are activated, use only bounds computed with them. 'best_of_oc_and_no_oc': False, 'directly_optimize': [], # No layer should be directly optimized 'oc_lr': 0.1, # learning rate for dualized output constraints 'share_gammas': False, 'iteration': 20, # Number of alpha/beta optimization iterations. # Share some alpha variables to save memory at the cost of slightly # looser bounds. 'use_shared_alpha': False, # Optimizer used for alpha and beta optimization. 'optimizer': 'adam', # Save best results of alpha/beta/bounds during optimization. 'keep_best': True, # Only optimize bounds of last layer during alpha/beta CROWN. 'fix_interm_bounds': True, # Learning rate for the optimizable parameter alpha in alpha-CROWN. 'lr_alpha': 0.5, # Learning rate for the optimizable parameter beta in beta-CROWN. 'lr_beta': 0.05, 'lr_cut_beta': 5e-3, # Learning rate for optimizing cut betas. # Initial alpha variables by calling CROWN once. 'init_alpha': True, 'lr_coeffs': 0.01, # Learning rate for coeffs for refinement # Layers to be refined, separated by commas. # -1 means preactivation before last activation. 'intermediate_refinement_layers': [-1], # When batch size is not 1, this reduction function is applied to # reduce the bounds into a scalar. 'loss_reduction_func': reduction_sum, # Criteria function of early stop. 'stop_criterion_func': lambda x: False, # Learning rate decay factor during bounds optimization. 'lr_decay': 0.98, # Number of iterations that we will start considering early stop # if tracking no improvement. 'early_stop_patience': 10, # Start to save optimized best bounds # when current_iteration > int(iteration*start_save_best) 'start_save_best': 0.5, # Use double fp (float64) at the last iteration in alpha/beta CROWN. 'use_float64_in_last_iteration': False, # Prune verified domain within iteration. 'pruning_in_iteration': False, # Percentage of the minimum domains that can apply pruning. 'pruning_in_iteration_threshold': 0.2, # For specification that will output multiple bounds for one # property, we use this function to prune them. 'multi_spec_keep_func': multi_spec_keep_func_all, # Use the newly fixed loss function. By default, it is set to False # for compatibility with existing use cases. # Try to ensure that the parameters always match with the optimized bounds. 'deterministic': False, 'max_time': 1e9, } def opt_reuse(self: 'BoundedModule'): for node in self.get_enabled_opt_act(): node.opt_reuse() def opt_no_reuse(self: 'BoundedModule'): for node in self.get_enabled_opt_act(): node.opt_no_reuse() def _set_alpha(optimizable_activations, parameters, alphas, lr): """Set best_alphas, alphas and parameters list.""" for node in optimizable_activations: alphas.extend(list(node.alpha.values())) node.opt_start() # Alpha has shape (2, output_shape, batch_dim, node_shape) parameters.append({'params': alphas, 'lr': lr, 'batch_dim': 2}) # best_alpha is a dictionary of dictionary. Each key is the alpha variable # for one activation layer, and each value is a dictionary contains all # activation layers after that layer as keys. best_alphas = OrderedDict() for m in optimizable_activations: best_alphas[m.name] = {} for alpha_m in m.alpha: best_alphas[m.name][alpha_m] = m.alpha[alpha_m].detach().clone() # We will directly replace the dictionary for each activation layer after # optimization, so the saved alpha might not have require_grad=True. m.alpha[alpha_m].requires_grad_() return best_alphas def _set_gammas(nodes, parameters): """ Adds gammas to parameters list """ gammas = [] gamma_lr = 0.1 for node in nodes: if hasattr(node, 'gammas'): gammas.append(node.gammas_underlying_tensor) # The learning rate is the same for all layers gamma_lr = node.options['optimize_bound_args']['oc_lr'] parameters.append({'params': gammas, 'lr': gamma_lr}) def _save_ret_first_time(bounds, best_ret): """Save results at the first iteration to best_ret.""" if bounds is not None: best_ret.append(bounds.detach().clone()) else: best_ret.append(None) def _to_float64(self: 'BoundedModule', C, x, aux_reference_bounds, interm_bounds): """ Transfer variables to float64 only in the last iteration to help alleviate floating point error. """ self.to(torch.float64) C = C.to(torch.float64) x = self._to(x, torch.float64) # best_intermediate_bounds is linked to aux_reference_bounds! # we only need call .to() for one of them self._to(aux_reference_bounds, torch.float64, inplace=True) interm_bounds = self._to( interm_bounds, torch.float64) return C, x, interm_bounds def _to_default_dtype(self: 'BoundedModule', x, total_loss, full_ret, ret, best_intermediate_bounds, return_A): """ Switch back to default precision from float64 typically to adapt to afterwards operations. """ total_loss = total_loss.to(torch.get_default_dtype()) self.to(torch.get_default_dtype()) x[0].to(torch.get_default_dtype()) full_ret = list(full_ret) if isinstance(ret[0], torch.Tensor): # round down lower bound full_ret[0] = double2float(full_ret[0], 'down') if isinstance(ret[1], torch.Tensor): # round up upper bound full_ret[1] = double2float(full_ret[1], 'up') for _k, _v in best_intermediate_bounds.items(): _v[0] = double2float(_v[0], 'down') _v[1] = double2float(_v[1], 'up') best_intermediate_bounds[_k] = _v if return_A: full_ret[2] = self._to(full_ret[2], torch.get_default_dtype()) return total_loss, x, full_ret def _get_idx_mask(idx: int, full_ret_bound: Tensor, best_ret_bound: Tensor, loss_reduction_func ) -> Tuple[Tensor, Optional[Tensor]]: """ Get index for improved elements. :param idx: 0 := updating the lower bound, 1 := updating the upper bound :param full_ret_bound: Lower/upper bound results for this iteration :param best_ret_bound: The best lower/upper bound results seen thus far :param loss_reduction_func: Loss reduction function that reduces the losses to just the batch dimension. :return: idx_mask: A mask on the batch dimension where the mask is true if a sub-problem has seen loss improvement. improved_idx: A Tensor of the indices in the batch dimension that have seen loss improvement. """ assert idx in (0, 1), 'idx must be 0 (lower bound) or 1 (upper bound)' reduced_full = loss_reduction_func(full_ret_bound) reduced_best = loss_reduction_func(best_ret_bound) idx_mask = (reduced_full > reduced_best) if idx == 0 else (reduced_full < reduced_best) idx_mask = idx_mask.view(-1) improved_idx = idx_mask.nonzero(as_tuple=True)[0] if idx_mask.any() else None return idx_mask, improved_idx def _update_best_ret( full_ret: Dict[str, Dict[str, Dict[str, Union[Tensor, 'Patches', Tuple]]]], best_ret: Dict[str, Dict[str, Dict[str, Union[Tensor, 'Patches', Tuple]]]], loss_reduction_func, idx: int, deterministic: bool = False, best_out_in_A_dict: Optional[Dict[str, Union[Tensor, 'Patches', Tuple]]] = None, out_in_keys: Optional[Tuple[str, str]] = None ): """ Update best_ret_bound and best_ret by comparing with new results. :param full_ret: The full return from the 'compute_bounds' method. :param best_ret: The best return during optimization in the same format as 'full_ret' :param loss_reduction_func: Loss reduction function that reduces the losses to just the batch dimension. :param idx: 0 := updating the lower bound, 1 := updating the upper bound :param deterministic: If true, problems that have seen loss improvement will have their bounds directly saved as the new best bound. Otherwise, the current bounds will be compared to the current best bounds and the comparison result is saved as the new best bound. In other words, deterministic is true if an improvement in the loss function is a sufficient condition for bound improvement. :param best_out_in_A_dict: If given, this is the A_dict entry corresponding to the output :param out_in_keys: If given, this is a tuple whose first element is the first index into the A_dict and whose second element is the second index into the A_dict. In particular, the first element should be the name of the output layer of the network, and the second element should be the name of the input layer. If these indices are not given correctly, an indexing error will be thrown. If given, it is assumed that we should use these keys to update lA/uA/lbias/ubias depending on if the bounds have improved. Therefore, we must assert that 'full_ret' and 'best_ret' contain an A_dict. :return: best_ret: best_out_in_A_dict: An updated A_dict entry corresponding to the output/input layer need_update: Set to True in this method if at least one sub-problem has seen bound improvement. idx_mask: A mask on the batch dimension where the mask is true if a sub-problem has seen loss improvement. improved_idx: A Tensor of the indices in the batch dimension that have seen loss improvement. """ assert idx in (0, 1), 'idx must be 0 (lower bound) or 1 (upper bound)' idx_mask, improved_idx = _get_idx_mask(idx, full_ret[idx], best_ret[idx], loss_reduction_func) if improved_idx is None: return best_ret, best_out_in_A_dict, False, idx_mask, None compare_fn = torch.max if idx == 0 else torch.min # Update detailed return tensors (if present) if full_ret[idx] is not None: if deterministic: best_ret[idx][improved_idx] = full_ret[idx][improved_idx] if out_in_keys is not None: _update_A_dict( best_out_in_A_dict, full_ret[2][out_in_keys[0]][out_in_keys[1]], improved_idx ) else: if out_in_keys is not None: # Since we must also update the A_dict, we don't want to use the original # 'compare' method as we need to know which specific problems have # seen improvement. cmp_op = (lambda x, y: (x > y)) if idx==0 else (lambda x, y: (x < y)) c_mask = cmp_op(full_ret[idx][improved_idx], best_ret[idx][improved_idx]) best_ret[idx][improved_idx] = torch.where( c_mask, full_ret[idx][improved_idx], best_ret[idx][improved_idx]) # Also update the lA/uA/lbias/ubias matrices/vectors from the output layer to # the input layer if the bounds have improved and if the output and input layer # keys were specified _update_A_dict( best_out_in_A_dict, full_ret[2][out_in_keys[0]][out_in_keys[1]], improved_idx, c_mask ) else: # Simple tensor-wise comparison (no A_dict) best_ret[idx][improved_idx] = compare_fn( full_ret[idx][improved_idx], best_ret[idx][improved_idx]) return best_ret, best_out_in_A_dict, True, idx_mask, improved_idx def _update_A_dict(best_A, full_A, improved_idx, c_mask: Optional[Tensor] = None): """ Update best_A dict by full_A for entries at improved_idx. :param best_A: The A_dict entry to be updated. :param full_A: The A_dict entry containing the new values. :param improved_idx: The indices in the batch dimension that have seen bound improvement. :param c_mask: A mask on the batch dimension where the mask is true if a sub-problem has seen bound improvement. If None, then the entire slice at improved_idx will be replaced. """ for key, val in full_A.items(): if val is None: # An entry for lA/uA/lbias/ubias may be None depending on if we are # lower or upper bounding the network continue target = best_A[key][improved_idx] source = val[improved_idx] if c_mask is not None: c_mask_expanded = c_mask.view( *c_mask.shape, *([1] * (val.dim() - c_mask.dim())) ).expand_as(val[improved_idx]) # Selectively update entries based on c_mask best_A[key][improved_idx] = torch.where(c_mask_expanded, source, target) else: # Replace the entire slice if no mask is provided best_A[key][improved_idx] = source def _update_optimizable_activations( optimizable_activations, interm_bounds, fix_interm_bounds, best_intermediate_bounds, reference_idx, idx, alpha, best_alphas, deterministic): """ Update bounds and alpha of optimizable_activations. """ for node in optimizable_activations: # Update best intermediate layer bounds only when they are optimized. # If they are already fixed in interm_bounds, then do # nothing. if node.name not in best_intermediate_bounds: continue if (interm_bounds is None or node.inputs[0].name not in interm_bounds or not fix_interm_bounds): if deterministic: best_intermediate_bounds[node.name][0][idx] = node.inputs[0].lower[reference_idx] best_intermediate_bounds[node.name][1][idx] = node.inputs[0].upper[reference_idx] else: best_intermediate_bounds[node.name][0][idx] = torch.max( best_intermediate_bounds[node.name][0][idx], node.inputs[0].lower[reference_idx]) best_intermediate_bounds[node.name][1][idx] = torch.min( best_intermediate_bounds[node.name][1][idx], node.inputs[0].upper[reference_idx]) if alpha: # Each alpha has shape (2, output_shape, batch, *shape) for act. # For other activation function this can be different. for alpha_m in node.alpha: best_alphas[node.name][alpha_m][:, :, idx] = node.alpha[alpha_m][:, :, idx] def update_best_beta(self: 'BoundedModule', enable_opt_interm_bounds, betas, best_betas, idx): """ Update best beta by given idx. """ if enable_opt_interm_bounds and betas: for node in self.splittable_activations: for node_input in node.inputs: for key in node_input.sparse_betas.keys(): best_betas[node_input.name][key] = ( node_input.sparse_betas[key].val.detach().clone()) if self.cut_used: for gbidx, general_betas in enumerate(self.cut_beta_params): # FIXME need to check if 'cut' is a node name best_betas['cut'][gbidx] = general_betas.detach().clone() else: for node in self.nodes_with_beta: best_betas[node.name][idx] = node.sparse_betas[0].val[idx] if self.cut_used: regular_beta_length = len(betas) - len(self.cut_beta_params) for cut_beta_idx in range(len(self.cut_beta_params)): # general cut beta crown general_betas best_betas['cut'][cut_beta_idx][:, :, idx, :] = betas[regular_beta_length + cut_beta_idx][:, :, idx, :] def _get_optimized_bounds( self: 'BoundedModule', x=None, aux=None, C=None, IBP=False, forward=False, method='backward', bound_side='lower', reuse_ibp=False, return_A=False, average_A=False, final_node_name=None, interm_bounds=None, reference_bounds=None, aux_reference_bounds=None, needed_A_dict=None, cutter=None, decision_thresh=None, epsilon_over_decision_thresh=1e-4): """ Optimize CROWN lower/upper bounds by alpha and/or beta. """ opts = self.bound_opts['optimize_bound_args'] iteration = opts['iteration'] max_time = opts['max_time'] beta = opts['enable_beta_crown'] alpha = opts['enable_alpha_crown'] apply_output_constraints_to = opts['apply_output_constraints_to'] opt_choice = opts['optimizer'] keep_best = opts['keep_best'] fix_interm_bounds = opts['fix_interm_bounds'] loss_reduction_func = opts['loss_reduction_func'] stop_criterion_func = opts['stop_criterion_func'] use_float64_in_last_iteration = opts['use_float64_in_last_iteration'] early_stop_patience = opts['early_stop_patience'] start_save_best = opts['start_save_best'] multi_spec_keep_func = opts['multi_spec_keep_func'] deterministic = opts['deterministic'] enable_opt_interm_bounds = self.bound_opts.get( 'enable_opt_interm_bounds', False) sparse_intermediate_bounds = self.bound_opts.get( 'sparse_intermediate_bounds', False) verbosity = self.bound_opts['verbosity'] if bound_side not in ['lower', 'upper']: raise ValueError(bound_side) bound_lower = bound_side == 'lower' bound_upper = bound_side == 'upper' assert alpha or beta, ( 'nothing to optimize, use compute bound instead!') if C is not None: self.final_shape = C.size()[:2] self.bound_opts.update({'final_shape': self.final_shape}) if opts['init_alpha']: # TODO: this should set up aux_reference_bounds. self.init_alpha(x, share_alphas=opts['use_shared_alpha'], method=method, c=C, final_node_name=final_node_name) optimizable_activations = self.get_enabled_opt_act() alphas, parameters = [], [] dense_coeffs_mask = [] if alpha: best_alphas = _set_alpha( optimizable_activations, parameters, alphas, opts['lr_alpha']) else: best_alphas = None if beta: ret_set_beta = self.set_beta( enable_opt_interm_bounds, parameters, opts['lr_beta'], opts['lr_cut_beta'], cutter, dense_coeffs_mask) betas, best_betas, coeffs, dense_coeffs_mask = ret_set_beta[:4] if apply_output_constraints_to is not None and len(apply_output_constraints_to) > 0: _set_gammas(self.nodes(), parameters) start = time.time() if isinstance(decision_thresh, torch.Tensor): if decision_thresh.dim() == 1: # add the spec dim to be aligned with compute_bounds return decision_thresh = decision_thresh.unsqueeze(-1) if opts['pruning_in_iteration']: if return_A: raise NotImplementedError( 'Pruning in iteration optimization does not support ' 'return A yet. ' 'Please fix or discard this optimization by setting ' '--disable_pruning_in_iteration ' 'or bab: pruning_in_iteration: false') pruner = OptPruner( x, threshold=opts['pruning_in_iteration_threshold'], multi_spec_keep_func=multi_spec_keep_func, loss_reduction_func=loss_reduction_func, decision_thresh=decision_thresh, epsilon_over_decision_thresh=epsilon_over_decision_thresh, fix_interm_bounds=fix_interm_bounds) else: pruner = None if opt_choice == 'adam-autolr': opt = AdamElementLR(parameters) elif opt_choice == 'adam': opt = optim.Adam(parameters) elif opt_choice == 'sgd': opt = optim.SGD(parameters, momentum=0.9) else: raise NotImplementedError(opt_choice) # Create a weight vector to scale learning rate. loss_weight = torch.ones(size=(x[0].size(0),), device=x[0].device) scheduler = optim.lr_scheduler.ExponentialLR(opt, opts['lr_decay']) # best_intermediate_bounds is linked to aux_reference_bounds! best_intermediate_bounds = {} if (sparse_intermediate_bounds and aux_reference_bounds is None and reference_bounds is not None): aux_reference_bounds = {} for name, (lb, ub) in reference_bounds.items(): aux_reference_bounds[name] = [ lb.detach().clone(), ub.detach().clone()] if aux_reference_bounds is None: aux_reference_bounds = {} if len(apply_output_constraints_to) > 0: # INVPROP requires that all layers have cached bounds. This may not be the case # unless we explicitly compute them. self.bound_opts['optimize_bound_args']['apply_output_constraints_to'] = [] with torch.no_grad(): self.compute_bounds( x=x, C=C, method='backward', bound_lower=bound_lower, bound_upper=bound_upper, final_node_name=final_node_name, interm_bounds=interm_bounds) self.bound_opts['optimize_bound_args']['apply_output_constraints_to'] = ( apply_output_constraints_to ) if (return_A and self.output_name[0] in needed_A_dict.keys() and self.input_name[0] in needed_A_dict[self.output_name[0]]): # If the A dict will be returned, and we expect to retrieve the hyperplanes relating the # output layer to the input layer, then we store these keys and pass them to the # '_update_best_ret' method so that these entries may be updated during the optimization # process. Only these output/input layer entries will be updated, and if other entries need # to be updated, '_update_best_ret' is not the correct method to update them. out_in_keys = (self.output_name[0], self.input_name[0]) else: out_in_keys = None need_grad = True patience = 0 ret_0 = None for i in range(iteration): if i == 0: # If we are at the first iteration, we need to # set the constraints_optimized to None self.constraints_optimized = None if cutter: # cuts may be optimized by cutter self.cut_module = cutter.cut_module if self.constraints_optimized is not None: for root in self.roots(): if ( hasattr(root, 'perturbation') and root.perturbation is not None # Currently constraints solving is designed for LpNorm. and isinstance(root.perturbation, PerturbationLpNorm) ): # Reset the constraints for this root. # TODO: Currently, the `reset` function simply overwrites, # should support more sophisticated reset logic. root.perturbation.reset_constraints( self.constraints_optimized, decision_thresh) intermediate_constr = None if not fix_interm_bounds: # If we still optimize all intermediate neurons, we can use # interm_bounds as reference bounds. if reference_bounds is None: reference_bounds = {} if interm_bounds is not None: reference_bounds.update(interm_bounds) interm_bounds = {} if i == iteration - 1: # No grad update needed for the last iteration need_grad = False if (self.device == 'cuda' and torch.get_default_dtype() == torch.float32 and use_float64_in_last_iteration): C, x, interm_bounds = self._to_float64( C, x, aux_reference_bounds, interm_bounds) if pruner: # we will use last update preserve mask in caller functions to recover # lA, l, u, etc to full batch size self.last_update_preserve_mask = pruner.preserve_mask pruner.cache_full_sized_alpha(optimizable_activations) # If input bounds are tightened with output constraints, they depend on the # relaxations of all other layers. The current iteration will recompute them. # This involves concretizing them, so they will depend on themselves. # To avoid a loop of gradients, remove gradients here. tighten_input_bounds = ( self.bound_opts['optimize_bound_args']['tighten_input_bounds'] ) if tighten_input_bounds: for root in self.roots(): if hasattr(root, 'perturbation') and root.perturbation is not None: root.perturbation.x_L = root.perturbation.x_L.detach() root.perturbation.x_U = root.perturbation.x_U.detach() with torch.no_grad() if not need_grad else ExitStack(): # ret is lb, ub or lb, ub, A_dict (if return_A is set to true) ret = self.compute_bounds( x, aux, C, method=method, IBP=IBP, forward=forward, bound_lower=bound_lower, bound_upper=bound_upper, reuse_ibp=reuse_ibp, return_A=return_A, final_node_name=final_node_name, average_A=average_A, # When intermediate bounds are recomputed, we must set it # to None interm_bounds=interm_bounds if fix_interm_bounds else None, # This is the currently tightest interval, which will be used to # pass split constraints when intermediate betas are used. reference_bounds=reference_bounds, # This is the interval used for checking for unstable neurons. aux_reference_bounds=aux_reference_bounds if sparse_intermediate_bounds else None, # These are intermediate layer beta variables and their # corresponding A matrices and biases. intermediate_constr=intermediate_constr, needed_A_dict=needed_A_dict, update_mask=pruner.preserve_mask if pruner else None, cache_bounds=len(apply_output_constraints_to) > 0, ) # If output constraints are used, it's possible that no inputs satisfy them. # If one of the layer that uses output constraints realizes this, it sets # self.infeasible_bounds = True for this element in the batch. if self.infeasible_bounds is not None and torch.any(self.infeasible_bounds): if ret[0] is not None: ret = ( torch.where( self.infeasible_bounds.unsqueeze(1), torch.full_like(ret[0], float('inf')), ret[0], ), ret[1], ) if ret[1] is not None: ret = ( ret[0], torch.where( self.infeasible_bounds.unsqueeze(1), torch.full_like(ret[1], float('-inf')), ret[1], ), ) ret_l, ret_u = ret[0], ret[1] if pruner: pruner.recover_full_sized_alpha(optimizable_activations) if (self.cut_used and i % cutter.log_interval == 0 and len(self.cut_beta_params) > 0): # betas[-1]: (2(0 lower, 1 upper), spec, batch, num_constrs) if ret_l is not None: print(i, 'lb beta sum:', f'{self.cut_beta_params[-1][0].sum() / ret_l.size(0)},', f'worst {ret_l.min()}') if ret_u is not None: print(i, 'lb beta sum:', f'{self.cut_beta_params[-1][1].sum() / ret_u.size(0)},', f'worst {ret_u.min()}') if i == 0: # save results at the first iteration best_ret = [ret.detach().clone() if ret is not None else None for ret in ret[:2]] ret_0 = ret[0].detach().clone() if bound_lower else ret[1].detach().clone() for node in optimizable_activations: if node.inputs[0].lower is None and node.inputs[0].upper is None: continue new_intermediate = [node.inputs[0].lower.detach().clone(), node.inputs[0].upper.detach().clone()] best_intermediate_bounds[node.name] = new_intermediate if sparse_intermediate_bounds: # Always using the best bounds so far as the reference # bounds. aux_reference_bounds[node.inputs[0].name] = new_intermediate if out_in_keys is not None: best_out_in_A_dict = clone_sub_A_dict(ret[2], out_in_keys) else: best_out_in_A_dict = None l = ret_l # Reduction over the spec dimension. if ret_l is not None and ret_l.shape[1] != 1: l = loss_reduction_func(ret_l) u = ret_u if ret_u is not None and ret_u.shape[1] != 1: u = loss_reduction_func(ret_u) # full_l, full_ret_l and full_u, full_ret_u is used for update the best full_ret_l, full_ret_u = ret_l, ret_u full_l = l full_ret = ret if pruner: (x, C, full_l, full_ret_l, full_ret_u, full_ret, stop_criterion) = pruner.prune( x, C, ret_l, ret_u, ret, full_l, full_ret_l, full_ret_u, full_ret, interm_bounds, aux_reference_bounds, reference_bounds, stop_criterion_func, bound_lower) else: stop_criterion = (stop_criterion_func(full_ret_l) if bound_lower else stop_criterion_func(-full_ret_u)) loss_ = l if bound_lower else -u total_loss = -1 * loss_ directly_optimize_layers = self.bound_opts['optimize_bound_args']['directly_optimize'] for directly_optimize_layer_name in directly_optimize_layers: total_loss += ( self[directly_optimize_layer_name].upper.sum() - self[directly_optimize_layer_name].lower.sum() ) if type(stop_criterion) == bool: loss = total_loss.sum() * (not stop_criterion) else: assert total_loss.shape == stop_criterion.shape loss = (total_loss * stop_criterion.logical_not()).sum() stop_criterion_final = isinstance( stop_criterion, torch.Tensor) and stop_criterion.all() if i == iteration - 1: best_ret = list(best_ret) if best_ret[0] is not None: best_ret[0] = best_ret[0].to(torch.get_default_dtype()) if best_ret[1] is not None: best_ret[1] = best_ret[1].to(torch.get_default_dtype()) if (i == iteration - 1 and self.device == 'cuda' and torch.get_default_dtype() == torch.float32 and use_float64_in_last_iteration): total_loss, x, full_ret = self._to_default_dtype( x, total_loss, full_ret, ret, best_intermediate_bounds, return_A) with torch.no_grad(): # for lb and ub, we update them in every iteration since updating them is cheap need_update = False improved_idx = None if keep_best: if best_ret[0] is not None: ( best_ret, best_out_in_A_dict, need_update, idx_mask, improved_idx, ) = _update_best_ret( full_ret, best_ret, loss_reduction_func, idx=0, deterministic=deterministic, best_out_in_A_dict=best_out_in_A_dict, out_in_keys=out_in_keys, ) if best_ret[1] is not None: ( best_ret, best_out_in_A_dict, need_update, idx_mask, improved_idx, ) = _update_best_ret( full_ret, best_ret, loss_reduction_func, idx=1, deterministic=deterministic, best_out_in_A_dict=best_out_in_A_dict, out_in_keys=out_in_keys, ) else: # Not saving the best, just keep the last iteration. if full_ret[0] is not None: best_ret[0] = full_ret[0] if full_ret[1] is not None: best_ret[1] = full_ret[1] if return_A: best_ret = [best_ret[0], best_ret[1], full_ret[2]] if out_in_keys is not None: # Update A_dict entry for output/input layer # This entry corresponds to the best bounds. # Other A_dict entries may not, as they are copied from the last iteration. best_ret[2][out_in_keys[0]][out_in_keys[1]] = best_out_in_A_dict patience = 0 if need_update else patience + 1 time_spent = time.time() - start # Save variables if this is the best iteration. # To save computational cost, we only check keep_best at the first # (in case divergence) and second half iterations # or before early stop by either stop_criterion or # early_stop_patience reached if ( i < 1 or i > int(iteration * start_save_best) or deterministic or stop_criterion_final or patience == early_stop_patience or time_spent > max_time ): # compare with the first iteration results and get improved indexes if bound_lower: if deterministic: idx_mask, idx = improved_idx, None else: idx_mask, idx = _get_idx_mask(0, full_ret_l, ret_0, loss_reduction_func) ret_0[idx] = full_ret_l[idx] else: if deterministic: idx_mask, idx = improved_idx, None else: idx_mask, idx = _get_idx_mask(1, full_ret_u, ret_0, loss_reduction_func) ret_0[idx] = full_ret_u[idx] if idx is not None: # for update propose, we condition the idx to update only # on domains preserved if pruner: reference_idx, idx = pruner.prune_idx(idx_mask, idx, x) else: reference_idx = idx _update_optimizable_activations( optimizable_activations, interm_bounds, fix_interm_bounds, best_intermediate_bounds, reference_idx, idx, alpha, best_alphas, deterministic) if beta: self.update_best_beta(enable_opt_interm_bounds, betas, best_betas, idx) if os.environ.get('AUTOLIRPA_DEBUG_OPT', False): print(f'****** iter [{i}]', f'loss: {loss.item()}, lr: {opt.param_groups[0]["lr"]}', (' pruning_in_iteration open status: ' f'{pruner.pruning_in_iteration}') if pruner else '') if stop_criterion_final: print(f'\nall verified at {i}th iter') break if patience > early_stop_patience: print(f'Early stop at {i}th iter due to {early_stop_patience}' ' iterations no improvement!') break if time_spent > max_time: print(f'Early stop at {i}th iter due to exceeding the time limit ' f'for the optimization (time spent: {time_spent})') break if i != iteration - 1 and not loss.requires_grad: assert i == 0, (i, iteration) print('[WARNING] No optimizable parameters found. Will skip optimiziation. ' 'This happens e.g. if all optimizable layers are freezed or the ' 'network has no optimizable layers.') break opt.zero_grad(set_to_none=True) if verbosity > 2: current_lr = [param_group['lr'] for param_group in opt.param_groups] print(f'*** iter [{i}]\n', f'loss: {loss.item()}', total_loss.squeeze().detach().cpu().numpy(), 'lr: ', current_lr) if beta: print_optimized_beta(optimizable_activations) if beta and i == 0 and verbosity > 2: breakpoint() if i != iteration - 1: # we do not need to update parameters in the last step since the # best result already obtained loss.backward() # All intermediate variables are not needed at this point. self._clear_and_set_new( None, cache_bounds=len(apply_output_constraints_to) > 0, ) if opt_choice == 'adam-autolr': opt.step(lr_scale=[loss_weight, loss_weight]) else: opt.step() if beta: for b in betas: b.data = (b >= 0) * b.data for dmi in range(len(dense_coeffs_mask)): # apply dense mask to the dense split coeffs matrix coeffs[dmi].data = ( dense_coeffs_mask[dmi].float() * coeffs[dmi].data) if alpha: for m in optimizable_activations: m.clip_alpha() if apply_output_constraints_to is not None and len(apply_output_constraints_to) > 0: for m in self.nodes(): m.clip_gammas() scheduler.step() if pruner: pruner.next_iter() if pruner: best_ret = pruner.update_best(full_ret_l, full_ret_u, best_ret) if verbosity > 3: breakpoint() if keep_best: # Set all variables to their saved best values. with torch.no_grad(): for idx, node in enumerate(optimizable_activations): if node.name not in best_intermediate_bounds: continue if alpha: # Assigns a new dictionary. node.alpha = best_alphas[node.name] # Update best intermediate layer bounds only when they are # optimized. If they are already fixed in # interm_bounds, then do nothing. best_intermediate = best_intermediate_bounds[node.name] node.inputs[0].lower.data = best_intermediate[0].data node.inputs[0].upper.data = best_intermediate[1].data if beta: for node in self.nodes_with_beta: assert getattr(node, 'sparse_betas', None) is not None if enable_opt_interm_bounds: for key in node.sparse_betas.keys(): node.sparse_betas[key].val.copy_( best_betas[node.name][key]) else: node.sparse_betas[0].val.copy_(best_betas[node.name]) if self.cut_used: for ii in range(len(self.cut_beta_params)): self.cut_beta_params[ii].data = best_betas['cut'][ii].data if interm_bounds is not None and not fix_interm_bounds: for l in self._modules.values(): if (l.name in interm_bounds.keys() and l.is_lower_bound_current()): l.lower = torch.max(l.lower, interm_bounds[l.name][0]) l.upper = torch.min(l.upper, interm_bounds[l.name][1]) infeasible_neurons = l.lower > l.upper if infeasible_neurons.any(): print(f'Infeasibility detected in layer {l.name}.', infeasible_neurons.sum().item(), infeasible_neurons.nonzero()[:, 0]) if verbosity > 0: if best_ret[0] is not None: # FIXME: unify the handling of l and u. print('best_l after optimization:', best_ret[0].sum().item()) if beta: print('beta sum per layer:', [p.sum().item() for p in betas]) print('alpha/beta optimization time:', time.time() - start) for node in optimizable_activations: node.opt_end() if pruner: pruner.update_ratio(full_l, full_ret_l) pruner.clean_full_sized_alpha_cache() if os.environ.get('AUTOLIRPA_DEBUG_OPT', False): print() return best_ret def init_alpha(self: 'BoundedModule', x, share_alphas=False, method='backward', c=None, bound_lower=True, bound_upper=True, final_node_name=None, interm_bounds=None, reference_alphas=None, skip_bound_compute=False): self(*x) # Do a forward pass to set perturbed nodes final = (self.final_node() if final_node_name is None else self[final_node_name]) self._set_used_nodes(final) optimizable_activations = self.get_enabled_opt_act() for node in optimizable_activations: # TODO(7/6/2023) In the future, we may need to enable alpha sharing # automatically by consider the size of all the optimizable nodes in the # graph. For now, only an adhoc check in MatMul is added. node._all_optimizable_activations = optimizable_activations # initialize the parameters node.opt_init() apply_output_constraints_to = ( self.bound_opts['optimize_bound_args']['apply_output_constraints_to'] ) if (not skip_bound_compute or interm_bounds is None or reference_alphas is None or not all( [act.name in reference_alphas for act in optimizable_activations])): skipped = False # if new interval is None, then CROWN interval is not present # in this case, we still need to redo a CROWN pass to initialize # lower/upper with torch.no_grad(): # We temporarilly deactivate output constraints self.bound_opts['optimize_bound_args']['apply_output_constraints_to'] = [] l, u = self.compute_bounds( x=x, C=c, method=method, bound_lower=bound_lower, bound_upper=bound_upper, final_node_name=final_node_name, interm_bounds=interm_bounds) self.bound_opts['optimize_bound_args']['apply_output_constraints_to'] = ( apply_output_constraints_to ) if len(apply_output_constraints_to) > 0: # Some layers, such as the BoundTanh layer, do some of their initialization # in the forward pass. We need to call the forward pass again to ensure # that they are initialized for the output constraints, too. l, u = self.compute_bounds( x=x, C=c, method=method, bound_lower=bound_lower, bound_upper=bound_upper, final_node_name=final_node_name, interm_bounds=interm_bounds, cache_bounds=True) else: # we skip, but we still would like to figure out the "used", # "perturbed", "backward_from" of each note in the graph skipped = True # this set the "perturbed" property self.set_input(*x, interm_bounds=interm_bounds) self.backward_from = {node: [final] for node in self._modules} l = u = None final_node_name = final_node_name or self.final_name init_intermediate_bounds = {} for node in optimizable_activations: start_nodes = [] if method in ['forward', 'forward+backward']: start_nodes.append(('_forward', 1, None, False)) if method in ['backward', 'forward+backward']: start_nodes += self.get_alpha_crown_start_nodes( node, c=c, share_alphas=share_alphas, final_node_name=final_node_name, ) if not start_nodes: continue if skipped: node.restore_alpha(reference_alphas[node.name], device=x[0].device, dtype=x[0].dtype) else: node.init_opt_parameters(start_nodes) if node in self.splittable_activations: for i in node.requires_input_bounds: input_node = node.inputs[i] if (not input_node.perturbed or node.inputs[i].lower is None and node.inputs[i].upper is None): continue init_intermediate_bounds[node.inputs[i].name] = ( [node.inputs[i].lower.detach(), node.inputs[i].upper.detach()]) if ( apply_output_constraints_to is not None and len(apply_output_constraints_to) > 0 and hasattr(self, 'constraints') ): # self.constraints.shape = (batch_size, num_constraints, num_output_neurons) # For abCROWN we know that: # If the output constraints are a conjunction, the shape is (1, num_constraints, *) # If the output constraints are a disjunction, the shape is (num_constraints, 1, *) # Checking which entry is 1 allows to discern both cases. # If auto_LiRPA is used directly, we could have batches of inputs with more than one # constraint. This is currently not supported. if self.constraints.size(0) == 1: num_gammas = self.constraints.size(1) elif self.constraints.size(1) == 1: num_gammas = self.constraints.size(0) else: raise NotImplementedError( 'To use output constraints, either have a batch size of 1 or use only one ' 'output constraint' ) for node in self.nodes(): node.init_gammas(num_gammas) if self.bound_opts['verbosity'] >= 1: print('Optimizable variables initialized.') if skip_bound_compute: return init_intermediate_bounds else: return l, u, init_intermediate_bounds ================================================ FILE: auto_LiRPA/output_constraints.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .utils import * from .bound_ops import * from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule def invprop_enabled(self: 'BoundedModule'): return self.bound_opts['optimize_bound_args']['apply_output_constraints_to'] def invprop_init_infeasible_bounds(self: 'BoundedModule', bound_node, C): # Infeasible bounds can result from unsatisfiable output constraints. # We track them to set the corresponding lower bounds to inf and upper bounds to # -inf. if self.infeasible_bounds is None: device = bound_node.attr['device'] if isinstance(C, Patches): self.infeasible_bounds = torch.full((C.shape[1],), False, device=device) else: assert isinstance(C, (torch.Tensor, eyeC, OneHotC)), type(C) self.infeasible_bounds = torch.full((C.shape[0],), False, device=device) def invprop_check_infeasible_bounds(self: 'BoundedModule', lb, ub): if torch.any(self.infeasible_bounds): if lb is not None: assert lb.size(0) == self.infeasible_bounds.size(0) lb = torch.where(self.infeasible_bounds.unsqueeze(1), torch.tensor(float('inf'), device=lb.device), lb) if ub is not None: assert ub.size(0) == self.infeasible_bounds.size(0) ub = torch.where(self.infeasible_bounds.unsqueeze(1), torch.tensor(float('-inf'), device=ub.device), ub) return lb, ub def backward_general_invprop( self: 'BoundedModule', initial_As, initial_lb, initial_ub, bound_node, C, start_backpropagation_at_node = None, bound_lower=True, bound_upper=True, average_A=False, need_A_only=False, unstable_idx=None, update_mask=None, ): use_beta_crown = self.bound_opts['optimize_bound_args']['enable_beta_crown'] # Sometimes, not using output constraints can give better results. # When this flag is set, the bounds are computed both with and without # output constraints, and the best of the two is returned. best_of_oc_and_no_oc = ( self.bound_opts['optimize_bound_args']['best_of_oc_and_no_oc'] ) assert not use_beta_crown assert not self.cut_used assert initial_As is None assert initial_lb is None assert initial_ub is None if best_of_oc_and_no_oc: # Important: If input bounds are tightened, then this call must be done # *before* the use of output constraints. # At the end of backward_general, the bounds are concretized. For the input # bounds, those concrete bounds are used to overwrite the bounds in the # input perturbations, so they'll then be used by all other layers during # their concretization. These input bounds *must* have their gradients # w.r.t. the relaxations set up. The call to backward_general without # output constraints will overwrite these bounds with values that do not # have gradients. So it must come first. with torch.no_grad(): o_res = self.backward_general( bound_node=bound_node, C=C, start_backpropagation_at_node=start_backpropagation_at_node, bound_lower=bound_lower, bound_upper=bound_upper, average_A=average_A, need_A_only=need_A_only, unstable_idx=unstable_idx, update_mask=update_mask, apply_output_constraints_to=[], ) res = self.backward_general_with_output_constraint( bound_node=bound_node, C=C, start_backporpagation_at_node=start_backpropagation_at_node, bound_lower=bound_lower, bound_upper=bound_upper, average_A=average_A, need_A_only=need_A_only, unstable_idx=unstable_idx, update_mask=update_mask, ) if best_of_oc_and_no_oc: # We use the best of both results. This would convert Infs to NaNs # (because inf - inf = nan), so those entries get masked. res0_inf_mask = torch.isinf(res[0]) r0 = res[0] - res[0].detach() + torch.max(res[0].detach(), o_res[0].detach()) r0 = torch.where(res0_inf_mask, res[0], r0) res1_inf_mask = torch.isinf(res[1]) r1 = res[1] - res[1].detach() + torch.min(res[1].detach(), o_res[1].detach()) r1 = torch.where(res1_inf_mask, res[1], r1) if self.return_A: if res[2] != {}: raise NotImplementedError( "Merging of A not implemented yet. If set, try disabling --best_of_oc_and_no_oc" ) res = (r0, r1, {}) else: res = (r0, r1) batch_size = res[0].size(0) infeasible_bounds = torch.any(res[0].reshape((batch_size, -1)) > res[1].reshape((batch_size, -1)), dim=1) if torch.any(infeasible_bounds): self.infeasible_bounds = torch.logical_or(self.infeasible_bounds, infeasible_bounds) return res def backward_general_with_output_constraint( self: 'BoundedModule', bound_node, C, start_backporpagation_at_node = None, bound_lower=True, bound_upper=True, average_A=False, need_A_only=False, unstable_idx=None, update_mask=None, ): assert start_backporpagation_at_node is None assert not isinstance(C, str) neurons_in_layer = 1 for d in bound_node.output_shape[1:]: neurons_in_layer *= d # backward_general uses C to compute batch_size, output_dim and output_shape, just like below. # When output constraints are applied, it will perform a different backpropagation, # but those variables need to be computed regardless. So we need to retain the original C # and pass it on to backward_general. If initial_As is set (which it is, if this code here # is executed), it will not use C for anything else. orig_C = C C, batch_size, output_dim, output_shape = self._preprocess_C(C, bound_node) device = bound_node.device if device is None and hasattr(C, 'device'): device = C.device # self.constraints.shape == (batch_size, num_constraints, output_neurons) batch_size = self.constraints.size(0) num_constraints = self.constraints.size(1) # 1) Linear: Hx + d # Result is a tensor, <= 0 for all entries if output constraint is satisfied H = self.constraints.transpose(1,2) # (batch_size, output_neurons, num_constraints) d = -self.thresholds # (batch) assert H.ndim == 3 assert H.size(0) == batch_size assert H.size(2) == num_constraints assert d.ndim == 1 if batch_size > 1: assert num_constraints == 1 assert d.size(0) == batch_size else: assert d.size(0) == num_constraints if hasattr(bound_node, 'gammas'): gammas = bound_node.gammas else: if hasattr(bound_node, 'opt_stage'): assert bound_node.opt_stage not in ['opt', 'reuse'] if batch_size == 1: gammas = torch.zeros((2, num_constraints, neurons_in_layer), device=device) else: gammas = torch.zeros((2, batch_size, neurons_in_layer), device=device) # H.shape = (batch_size, output_neurons, num_constraints==1) # We need used_weight.shape = (batch_size, this_layer_neurons, prev_layer_neurons) # This is satisfied by H, because it will be transposed before being accessed and # output_neurons == prev_layer_neurons linear_Hxd_layer_weight_value = nn.Parameter(H.to(gammas)) linear_Hxd_layer_weight = BoundParams( ori_name="/linear_Hxd_layer_weight", value=None, perturbation=None, ) linear_Hxd_layer_weight.name = "linear_Hxd_layer_weight" linear_Hxd_layer_weight.lower = linear_Hxd_layer_weight_value linear_Hxd_layer_weight.upper = linear_Hxd_layer_weight_value if batch_size == 1: linear_Hxd_layer_bias_value = nn.Parameter(d.float().to(device)) else: linear_Hxd_layer_bias_value = nn.Parameter(d.float().to(device).unsqueeze(1)) linear_Hxd_layer_bias = BoundParams( ori_name="/linear_Hxd_layer_bias", value=None, perturbation=None, ) linear_Hxd_layer_bias.name = "linear_Hxd_layer_bias" linear_Hxd_layer_bias.lower = linear_Hxd_layer_bias_value linear_Hxd_layer_bias.upper = linear_Hxd_layer_bias_value linear_Hxd_layer = BoundLinear( attr=None, inputs=[ self.final_node(), linear_Hxd_layer_weight, linear_Hxd_layer_bias, ], output_index=0, options=self.bound_opts, ) linear_Hxd_layer.name = "/linear_Hxd_layer" linear_Hxd_layer.device = device linear_Hxd_layer.perturbed = True linear_Hxd_layer.output_shape = torch.Size([1, num_constraints]) linear_Hxd_layer.batch_dim = bound_node.batch_dim linear_Hxd_layer.batched_weight_and_bias = (batch_size > 1) # 2) Gamma # A seperate gamma per output constraint. All gammas are always positive. # Depending on the configuration, gammas are shared across neurons in the # optimized layer. gamma_layer_weight = BoundParams( ori_name="/gamma_layer_weight", value=None, perturbation=None, ) gamma_layer_weight.name = "gamma_layer_weight" assert gammas.ndim == 3 assert gammas.size(0) == 2 if batch_size == 1: # gammas.shape = (2, num_constraints, this_layer_neurons) assert gammas.ndim == 3 assert gammas.size(0) == 2 assert gammas.size(1) == num_constraints this_layer_neurons = gammas.size(2) # In linear.py, these weights will be used to compute next_A based on last_A: # last_A.shape = (unstable_neurons, batch_size==1, this_layer_neurons) # next_A.shape = (unstable_neurons, batch_size==1, prev_layer_neurons) # prev_layer_neurons == num_constraints # So we set the weights as # (num_constraints, this_layer_neurons) # This will be transposed and accessed by linear.py as # (this_layer_neurons, num_constraints) # Note that the shape will be further modified in linear.py gamma_layer_weight.lower = gammas[0].unsqueeze(0) gamma_layer_weight.upper = -gammas[1].unsqueeze(0) else: # ABCrown optimized the computation by transposing the query. # Instead of one batch entry with N constraints, we have N batch entries # with one contraint each. We do not support multiple batch entries # each with multiple constraints. # gammas.shape = (2, batch_size, this_layer_neurons) # Here, we can only check that the batch size is correct. assert gammas.size(1) == batch_size assert num_constraints == 1 this_layer_neurons = gammas.size(2) # In linear.py, these weights will be used to compute next_A based on last_A: # last_A.shape = (unstable_neurons, batch_size, this_layer_neurons) # next_A.shape = (unstable_neurons, batch_size, prev_layer_neurons==1) # prev_layer_neurons == 1 because it's num_constraints # So we set the weights as # (batch_size, 1, this_layer_neurons) # This will be transposed and accessed by linear.py as # (batch_size, this_layer_neurons, 1) # Note that the shape will be further modified in linear.py gamma_layer_weight.lower = gammas[0].unsqueeze(1) gamma_layer_weight.upper = -gammas[1].unsqueeze(1) gamma_layer = BoundLinear( attr=None, inputs=[linear_Hxd_layer, gamma_layer_weight], output_index=0, options=self.bound_opts, ) gamma_layer.name = "/gamma_layer" gamma_layer.device = device gamma_layer.perturbed = True gamma_layer.input_shape = linear_Hxd_layer.output_shape gamma_layer.output_shape = torch.Size([1, this_layer_neurons]) gamma_layer.batch_dim = bound_node.batch_dim gamma_layer.use_seperate_weights_for_lower_and_upper_bounds = True gamma_layer.batched_weight_and_bias = (batch_size > 1) # 3) Reshape # To the same shape as the layer that's optimized. reshape_layer_output_shape = BoundBuffers( ori_name="/reshape_layer_output_shape", value = torch.tensor(bound_node.output_shape[1:]), perturbation=None, options=self.bound_opts, ) reshape_layer_output_shape.name = "reshape_layer_output_shape" reshape_layer = BoundReshape( attr=None, inputs = [gamma_layer, reshape_layer_output_shape], output_index=0, options=self.bound_opts, ) reshape_layer.name = "/reshape_layer" reshape_layer.device = device reshape_layer.perturbed = True reshape_layer.input_shape = gamma_layer.output_shape reshape_layer.output_shape = bound_node.output_shape reshape_layer.batch_dim = bound_node.batch_dim # The residual connection that connects the optimized layer and the reshape # layer from above is not explicitly coded, it's handled implicitly: # Here, we propagate backwards through 5->4->3->2->1->regular output layer and let # CROWN handle the propagation from there on backwards to the input layer. # The other half of the residual connection is implemented by explicitly setting # the .lA and .uA values of the optimized layer to C. # This is done via initial_As, initial_lb, initial_ub. if isinstance(C, (OneHotC, eyeC)): batch_size = C.shape[1] assert C.shape[0] <= C.shape[2] assert len(C.shape) == 3 # This is expensive, but Reshape doesn't support OneHotC objects if isinstance(C, OneHotC): C = torch.eye(C.shape[2], device=C.device)[C.index].unsqueeze(1).expand(-1, batch_size, -1) else: C = torch.eye(C.shape[2], device=C.device).unsqueeze(1).expand(-1, batch_size, -1) start_shape = None lA = C if bound_lower else None uA = C if bound_upper else None # 3) Reshape A, lower_b, upper_b = reshape_layer.bound_backward( lA, uA, *reshape_layer.inputs, start_node=bound_node, unstable_idx=unstable_idx, start_shape=start_shape) assert lower_b == 0 assert upper_b == 0 lA = A[0][0] uA = A[0][1] # 2) Gamma A, lower_b, upper_b = gamma_layer.bound_backward( lA, uA, *gamma_layer.inputs, start_node=bound_node, unstable_idx=unstable_idx, start_shape=start_shape) assert lower_b == 0 assert upper_b == 0 lA = A[0][0] uA = A[0][1] # 1) Hx + d A, lower_b, upper_b = linear_Hxd_layer.bound_backward( lA, uA, *linear_Hxd_layer.inputs, start_node=bound_node, unstable_idx=unstable_idx, start_shape=start_shape) # lower_b and upper_b are no longer 0, because d wasn't 0. lA = A[0][0] uA = A[0][1] # This encodes the residual connection. initial_As = { self.final_node().name: (lA, uA), bound_node.name: (C, C), } assert lower_b.ndim == 2 assert upper_b.ndim == 2 return self.backward_general( bound_node = bound_node, start_backpropagation_at_node = self.final_node(), C = orig_C, # only used for batch_size, output_dim, output_shape computation bound_lower = bound_lower, bound_upper = bound_upper, average_A = average_A, need_A_only = need_A_only, unstable_idx = unstable_idx, update_mask = update_mask, apply_output_constraints_to = [], # no nested application initial_As = initial_As, initial_lb = lower_b, initial_ub = upper_b, ) ================================================ FILE: auto_LiRPA/parse_graph.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch from torch.onnx.utils import _optimize_graph from collections import OrderedDict from collections import namedtuple from packaging import version import re import os import traceback from .bounded_tensor import BoundedTensor, BoundedParameter from .utils import logger, unpack_inputs Node = namedtuple('Node', ( 'name', 'ori_name', 'inputs', 'attr', 'op', 'param', 'input_index', 'bound_node', 'output_index', 'perturbation'), defaults=(None,) * 10) def get_node_name(node): return node.debugName() def get_node_attribute(node, attribute_name): if hasattr(torch.onnx.symbolic_helper, '_node_get'): # Pytorch >= 1.13. return torch.onnx.symbolic_helper._node_get(node, attribute_name) else: # Pytorch <= 1.12. This will call _node_getitem in torch.onnx.utils. return node[attribute_name] def parse_graph(graph, inputs, params): input_all = [] input_used = [] scope = {} for n in graph.inputs(): input_all.append(n.debugName()) for n in graph.nodes(): n_inputs = [get_node_name(i) for i in n.inputs()] for inp in n.inputs(): input_used.append(inp.debugName()) for out in n.outputs(): scope[get_node_name(out)] = n.scopeName() for node in graph.inputs(): name = get_node_name(node) scope[name] = '' for n in graph.outputs(): name = get_node_name(n) if name in input_all: # This output node directly comes from an input node with an Op input_used.append(n.debugName()) def name_with_scope(node): name = get_node_name(node) name = '/'.join([scope[name], name]) if '.' in name: # "." should not be used as it could issues in state_dict loading # where PyTorch would treat it as having submodules name = name.replace('.', '-') return name nodesOP = [] for n in graph.nodes(): attrs = {k: get_node_attribute(n, k) for k in n.attributeNames()} n_inputs = [name_with_scope(i) for i in n.inputs()] for i, out in enumerate(list(n.outputs())): nodesOP.append(Node(**{ 'name': name_with_scope(out), 'op': n.kind(), 'inputs': n_inputs, 'attr': attrs, 'output_index': i, })) # filter out input nodes in `graph.inputs()` that are actually used nodesIn = [] used_by_index = [] for i, n in enumerate(graph.inputs()): name = get_node_name(n) used = name in input_used used_by_index.append(used) if used: nodesIn.append(n) # filter out input nodes in `inputs` that are actually used inputs_unpacked = unpack_inputs(inputs) assert len(list(graph.inputs())) == len(inputs_unpacked) + len(params) inputs = [inputs_unpacked[i] for i in range(len(inputs_unpacked)) if used_by_index[i]] # index of the used inputs among all the inputs input_index = [i for i in range(len(inputs_unpacked)) if used_by_index[i]] # Add a name to all inputs inputs = list(zip(["input_{}".format(input_index[i]) for i in range(len(inputs))], inputs)) # filter out params that are actually used params = [params[i] for i in range(len(params)) if used_by_index[i + len(inputs_unpacked)]] inputs_and_params = inputs + params assert len(nodesIn) == len(inputs_and_params) # output nodes of the module nodesOut = [] for n in graph.outputs(): # we only record names nodesOut.append(name_with_scope(n)) for i, n in enumerate(nodesIn): if (isinstance(inputs_and_params[i][1], BoundedTensor) or isinstance(inputs_and_params[i][1], BoundedParameter)): perturbation = inputs_and_params[i][1].ptb else: perturbation = None if i > 0 and n.type().sizes() != list(inputs_and_params[i][1].size()): raise RuntimeError("Input tensor shapes do not much: {} != {}".format( n.type().sizes(), list(inputs_and_params[i][1].size()))) name = name_with_scope(n) nodesIn[i] = Node(**{ 'name': name, 'ori_name': inputs_and_params[i][0], 'op': 'Parameter', 'inputs': [], 'attr': str(n.type()), 'param': inputs_and_params[i][1] if i >= len(inputs) else None, # index among all the inputs including unused ones 'input_index': input_index[i] if i < len(inputs) else None, # Input nodes may have perturbation, if they are wrapped in BoundedTensor or BoundedParameters 'perturbation': perturbation, }) return nodesOP, nodesIn, nodesOut def _get_jit_params(module, param_exclude, param_include): state_dict = torch.jit._unique_state_dict(module, keep_vars=True) if param_exclude is not None: param_exclude = re.compile(param_exclude) if param_include is not None: param_include = re.compile(param_include) new_state_dict = OrderedDict() for k, v in state_dict.items(): if param_exclude is not None and param_exclude.match(k) is not None: print(f'\nremove input element {k} from nodesIn\n') continue if param_include is not None and param_include.match(k) is None: continue new_state_dict[k] = v params = zip(new_state_dict.keys(), new_state_dict.values()) return params def get_output_template(out): """Construct a template for the module output with `None` representing places to be filled with tensor results""" if isinstance(out, torch.Tensor): return None elif isinstance(out, list): return list([get_output_template(o) for o in out]) elif isinstance(out, tuple): return tuple([get_output_template(o) for o in out]) elif isinstance(out, dict): template = {} for key in out: template[key] = get_output_template(out[key]) return template else: raise NotImplementedError def parse_source(node): kind = node.kind() if hasattr(node, 'sourceRange'): source_range_str = node.sourceRange() # divide source_range_str by '\n' and drop any lines containing 'torch.nn' source_range_str = '\n'.join([line for line in source_range_str.split('\n') if 'torch/nn' not in line]) match = re.match(r'([^ ]+\.py)\((\d+)\)', source_range_str) if match: # match.group(1) is the file name # match.group(2) is the line number return f"{kind}_{os.path.basename(match.group(1)).split('.')[0]}_{match.group(2)}" return kind def update_debug_names(trace_graph): visited = [] for n in trace_graph.nodes(): for input in n.inputs(): if input.debugName() not in visited: input.setDebugName(f"{input.debugName()}_{parse_source(n)}") visited.append(input.debugName()) for output in n.outputs(): if output.debugName() not in visited: output.setDebugName(f"{output.debugName()}_{parse_source(n)}") visited.append(output.debugName()) def parse_module(module, inputs, param_exclude=".*AuxLogits.*", param_include=None): params = _get_jit_params(module, param_exclude=param_exclude, param_include=param_include) try: trace, out = torch.jit._get_trace_graph(module, inputs) except: print(traceback.format_exc()) raise RuntimeError( 'Failed to get the trace. ' 'Please check that the model and inputs are compatible with torch.jit.') if version.parse(torch.__version__) < version.parse("2.0.0"): from torch.onnx.symbolic_helper import _set_opset_version _set_opset_version(12) if version.parse(torch.__version__) >= version.parse("2.1.0"): # This is needed for BoundConcatGrad to work with torch 2.1.0 and later if version.parse(torch.__version__) < version.parse("2.9.0"): from torch.onnx._globals import GLOBALS else: from torch.onnx._internal.torchscript_exporter._globals import GLOBALS GLOBALS.autograd_inlining = False logger.debug("Graph before ONNX convertion:") logger.debug(trace) # Assuming that the first node in the graph is the primary input node. # It must have a batch dimension. primary_input = get_node_name(next(iter(trace.inputs()))) trace_graph = _optimize_graph( trace, torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK, params_dict={}, input_names=[primary_input], dynamic_axes={primary_input: {0: 'batch'}}) logger.debug('trace_graph: %s', trace_graph) if os.environ.get('AUTOLIRPA_DEBUG_NAMES', 0): update_debug_names(trace_graph) logger.debug("ONNX graph:") logger.debug(trace_graph) if not isinstance(inputs, tuple): inputs = (inputs, ) nodesOP, nodesIn, nodesOut = parse_graph(trace_graph, tuple(inputs), tuple(params)) for i in range(len(nodesOP)): param_in = OrderedDict() for inp in nodesOP[i].inputs: for n in nodesIn: if inp == n.name: param_in.update({inp:n.param}) nodesOP[i] = nodesOP[i]._replace(param=param_in) template = get_output_template(out) return nodesOP, nodesIn, nodesOut, template ================================================ FILE: auto_LiRPA/patches.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch import torch.nn.functional as F from torch import Tensor def insert_zeros(image, s): """ Insert s columns and rows 0 between every pixel in the image. For example: image = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] s = 2 output = [[1, 0, 0, 2, 0, 0, 3], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [4, 0, 0, 5, 0, 0, 6], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [7, 0, 0, 8, 0, 0, 9]] """ if s <= 0: return image matrix = torch.zeros(size=(image.size(0), image.size(1), image.size(2) * (s+1) - s, image.size(3) * (s+1) - s), dtype=image.dtype, device=image.device) matrix_stride = matrix.stride() selected_matrix = torch.as_strided(matrix, [ # Shape of the output matrix. matrix.size(0), # Batch size. matrix.size(1), # Channel. image.size(2), # H (without zeros) image.size(3), # W (without zeros) ], [ # Stride of the output matrix. matrix_stride[0], # Batch size dimension, keep using the old stride. matrix_stride[1], # Channel dimension. matrix_stride[2] * (s + 1), # Move s+1 rows. s+1, # Move s+1 pixels. ]) # Move a pixel (on the width direction). selected_matrix[:] = image return matrix def remove_zeros(image, s, remove_zero_start_idx=(0,0)): if s <= 0: return image matrix_stride = image.stride() storage_offset = image.storage_offset() return torch.as_strided(image, [ # Shape of the output matrix. *image.shape[:-2], (image.size(-2) - remove_zero_start_idx[-2] + (s + 1) - 1) // (s + 1), # H (without zeros) (image.size(-1) - remove_zero_start_idx[-1] + (s + 1) - 1) // (s + 1), # W (without zeros) ], [ # Stride of the output matrix. *matrix_stride[:-2], matrix_stride[-2] * (s + 1), # Move s+1 rows. matrix_stride[-1] * (s + 1), # Move s+1 pixels. ], storage_offset + matrix_stride[-2] * remove_zero_start_idx[-2] + matrix_stride[-1] * remove_zero_start_idx[-1] ) def unify_shape(shape): """ Convert shapes to 4-tuple: (left, right, top, bottom). """ if shape is not None: if isinstance(shape, int): # Same on all four directions. shape = (shape, shape, shape, shape) if len(shape) == 2: # (height direction, width direction). shape = (shape[1], shape[1], shape[0], shape[0]) assert len(shape) == 4 # Returned: (left, right, top, bottom). return shape def simplify_shape(shape): """ Convert shapes to 2-tuple or a single number. Used to avoid extra padding operation because the padding operation in F.conv2d is not general enough. """ if len(shape) == 4: # 4-tuple: (left, right, top, bottom). if shape[0] == shape[1] and shape[2] == shape[3]: shape = (shape[2], shape[0]) if len(shape) == 2: # 2-tuple: (height direction, width direction). if shape[0] == shape[1]: shape = shape[0] return shape def is_shape_used(shape, expected=0): if isinstance(shape, int): return shape != expected else: return sum(shape) != expected class Patches: """ A special class which denotes a convoluntional operator as a group of patches the shape of Patches.patches is [batch_size, num_of_patches, out_channel, in_channel, M, M] M is the size of a single patch Assume that we have a conv2D layer with w.weight(out_channel, in_channel, M, M), stride and padding applied on an image (N * N) num_of_patches = ((N + padding * 2 - M)//stride + 1) ** 2 Here we only consider kernels with the same H and W """ def __init__( self, patches=None, stride=1, padding=0, shape=None, identity=0, unstable_idx=None, output_shape=None, inserted_zeros=0, output_padding=0, input_shape=None): # Shape: [batch_size, num_of_patches, out_channel, in_channel, M, M] # M is the size of a single patch # Assume that we have a conv2D layer with w.weight(out_channel, in_channel, M, M), stride and padding applied on an image (N * N) # num_of_patches = ((N + padding * 2 - M)//stride + 1) ** 2 # Here we only consider kernels with the same H and W self.patches = patches self.stride = stride self.padding = padding self.shape = shape self.identity = identity self.unstable_idx = unstable_idx self.output_shape = output_shape self.input_shape = input_shape self.inserted_zeros = inserted_zeros self.output_padding = output_padding self.simplify() def __add__(self, other): if isinstance(other, Patches): # Insert images with zero to make stride the same, if necessary. assert self.stride == other.stride if self.unstable_idx is not None or other.unstable_idx is not None: if self.unstable_idx is not other.unstable_idx: # Same tuple object. raise ValueError('Please set bound option "sparse_conv_intermediate_bounds" to False to run this model.') assert self.output_shape == other.output_shape A1 = self.patches A2 = other.patches # change paddings to merge the two patches sp = torch.tensor(unify_shape(self.padding)) op = torch.tensor(unify_shape(other.padding)) if (sp - op).abs().sum().item() > 0: if (sp - op >= 0).all(): A2 = F.pad(A2, (sp - op).tolist()) pass elif (sp - op <= 0).all(): A1 = F.pad(A1, (op - sp).tolist()) else: raise ValueError("Unsupported padding size") ret = A1 + A2 return Patches(ret, other.stride, torch.max(sp, op).tolist(), ret.shape, unstable_idx=self.unstable_idx, output_shape=self.output_shape, inserted_zeros=self.inserted_zeros, output_padding=self.output_padding) else: assert self.inserted_zeros == 0 assert not is_shape_used(self.output_padding) # Patches has shape (out_c, batch, out_h, out_w, in_c, h, w). input_shape = other.shape[3:] matrix = other pieces = self.patches if pieces.ndim == 9: pieces = pieces.transpose(0, 1) pieces = pieces.view(pieces.shape[0], -1, pieces.shape[3], pieces.shape[4], pieces.shape[5]*pieces.shape[6], pieces.shape[7], pieces.shape[8]).transpose(0,1) if pieces.ndim == 8: pieces = pieces.transpose(0, 1) pieces = pieces.view(pieces.shape[0], -1, pieces.shape[3], pieces.shape[4], pieces.shape[5], pieces.shape[6], pieces.shape[7]).transpose(0,1) A1_matrix = patches_to_matrix( pieces, input_shape, self.stride, self.padding, output_shape=self.output_shape, unstable_idx=self.unstable_idx) return A1_matrix.transpose(0, 1) + matrix def __str__(self): return ( f"Patches(stride={self.stride}, padding={self.padding}, " f"output_padding={self.output_padding}, inserted_zeros={self.inserted_zeros}, " f"kernel_shape={list(self.patches.shape)}, input_shape={self.input_shape}, " f"output_shape={self.output_shape}, unstable_idx={type(self.unstable_idx)})" ) @property def device(self): if self.patches is not None: return self.patches.device if self.unstable_idx is not None: if isinstance(self.unstable_idx, tuple): return self.unstable_idx[0].device else: return self.unstable_idx.device raise RuntimeError("Patches object is unintialized and cannot determine its device.") def create_similar(self, patches=None, stride=None, padding=None, identity=None, unstable_idx=None, output_shape=None, inserted_zeros=None, output_padding=None, input_shape=None): """ Create a new Patches object with new patches weights, and keep other properties the same. """ new_patches = self.patches.clone() if patches is None else patches new_identity = self.identity if identity is None else identity if new_identity and (new_patches is not None): raise ValueError("Identity Patches should have .patches property set to 0.") return Patches( new_patches, stride=self.stride if stride is None else stride, padding=self.padding if padding is None else padding, shape=new_patches.shape, identity=new_identity, unstable_idx=self.unstable_idx if unstable_idx is None else unstable_idx, output_shape=self.output_shape if output_shape is None else output_shape, inserted_zeros=self.inserted_zeros if inserted_zeros is None else inserted_zeros, output_padding=self.output_padding if output_padding is None else output_padding, input_shape=self.input_shape if input_shape is None else input_shape, ) def clone(self): return self.create_similar() def detach(self): new_obj = Patches( patches=self.patches.detach() if self.patches is not None else None, stride=self.stride, padding=self.padding, shape=self.shape, identity=self.identity, unstable_idx=( tuple(idx.detach() for idx in self.unstable_idx) if isinstance(self.unstable_idx, tuple) else self.unstable_idx.detach() ) if self.unstable_idx is not None else None, output_shape=self.output_shape, inserted_zeros=self.inserted_zeros, output_padding=self.output_padding, input_shape=self.input_shape, ) return new_obj def to_matrix(self, input_shape): assert not is_shape_used(self.output_padding) return patches_to_matrix( self.patches, input_shape, self.stride, self.padding, self.output_shape, self.unstable_idx, self.inserted_zeros ) def simplify(self): """Merge stride and inserted_zeros; if they are the same they can cancel out.""" stride = [self.stride, self.stride] if isinstance(self.stride, int) else self.stride if (self.inserted_zeros > 0 and self.inserted_zeros + 1 == stride[0] and stride[0] == stride[1] and (self.patches.size(-1) % stride[1]) == 0 and (self.patches.size(-2) % stride[0]) == 0): # print(f'before simplify: patches={self.patches.size()} padding={self.padding}, stride={self.stride}, output_padding={self.output_padding}, inserted_zeros={self.inserted_zeros}') full_stride = [stride[1], stride[1], stride[0], stride[0]] # output_padding = tuple(p // s for p, s in zip(output_padding, full_stride)) padding = unify_shape(self.padding) # since inserted_zero will not put zeros to both end, like [x 0 0 x 0 0 x] instead of [x 0 0 x 0 0 x 0 0] # when computing the simplified padding, we should view (inserted_zeros-1) padding entries from one end side # as part of the inserted_zero matrices (i.e., "consumed") consumed_padding = (padding[0], padding[1] - (stride[1] - 1), padding[2], padding[3] - (stride[0] - 1)) tentative_padding = tuple(p // s - o for p, s, o in zip(consumed_padding, full_stride, unify_shape(self.output_padding))) # negative padding is inconvenient if all([p >= 0 for p in tentative_padding]): remove_zero_start_idx = (padding[2] % stride[0], padding[0] % stride[1]) self.padding = tentative_padding self.patches = remove_zeros(self.patches, self.inserted_zeros, remove_zero_start_idx=remove_zero_start_idx) self.stride = 1 self.inserted_zeros = 0 self.output_padding = 0 # print(f'after simplify: patches={self.patches.size()} padding={self.padding}, stride={self.stride}, output_padding={self.output_padding}, inserted_zeros={self.inserted_zeros}') def matmul(self, input, patch_abs=False, input_shape=None): """ Broadcast multiplication for patches and a matrix. Input shape: (batch_size, in_c, in_h, in_w). If the dim of in_c, in_h, in_w = 1, the the input will be expand by given input_shape to support broadcast Output shape: [batch_size, unstable_size] when unstable_idx is not None, [batch_size, out_c, out_h, out_w] when unstable_idx is None, """ patches = self.patches if patch_abs: patches = patches.abs() if input_shape is not None: # For cases that input only has fewer dimensions like (1, in_c, 1, 1) input = input.expand(input_shape) # Expand to (batch_size, in_c, in_h, in_w) # unfold the input as [batch_size, out_h, out_w, in_c, H, W] unfold_input = inplace_unfold( input, kernel_size=patches.shape[-2:], padding=self.padding, stride=self.stride, inserted_zeros=self.inserted_zeros, output_padding=self.output_padding) if self.unstable_idx is not None: # We need to add a out_c dimension and select from it. unfold_input = unfold_input.unsqueeze(0).expand(self.output_shape[1], -1, -1, -1, -1, -1, -1) # Shape: [unstable_size, batch_size, in_c, H, W]. # Here unfold_input will match this shape. unfold_input = unfold_input[self.unstable_idx[0], :, self.unstable_idx[1], self.unstable_idx[2]] # shape: [batch_size, unstable_size]. return torch.einsum('sbchw,sbchw->bs', unfold_input, patches) else: # shape: [batch_size, out_c, out_h, out_w]. return torch.einsum('bijchw,sbijchw->bsij', unfold_input, patches) def create_padding(self, output_shape): # patches was not padded, so we need to pad them here. # If this layer is followed by a ReLU layer, then the padding was already handled there and there is no need to pad again. one_d_unfolded_r = create_valid_mask( output_shape, self.patches.device, self.patches.dtype, self.patches.shape[-2:], self.stride, self.inserted_zeros, self.padding, self.output_padding, self.unstable_idx if self.unstable_idx else None) patches = self.patches * one_d_unfolded_r return patches def compute_patches_stride_padding(input_shape, patches_padding, patches_stride, op_padding, op_stride, inserted_zeros=0, output_padding=0, simplify=True): """ Compute stride and padding after a conv layer with patches mode. """ for p in (patches_padding, patches_stride, op_padding, op_stride): assert isinstance(p, int) or (isinstance(p, (list, tuple)) and (len(p) == 2 or len(p) == 4)) # If p is int, then same padding on all 4 sides. # If p is 2-tuple, then it is padding p[0] on both sides of H, p[1] on both sides of W # If p is 4-tuple, then it is padding p[2], p[3] on top and bottom sides of H, p[0] and p[1] on left and right sides of W # If any of the inputs are not tuple/list, we convert them to tuple. full_patch_padding, full_op_padding, full_patch_stride, full_op_stride = [ (p, p) if isinstance(p, int) else p for p in [patches_padding, op_padding, patches_stride, op_stride]] full_patch_padding, full_op_padding, full_patch_stride, full_op_stride = [ (p[1], p[1], p[0], p[0]) if len(p) == 2 else p for p in [full_patch_padding, full_op_padding, full_patch_stride, full_op_stride]] # Compute the new padding and stride after this layer. new_padding = tuple(pp * os + op * (inserted_zeros + 1) for pp, op, os in zip(full_patch_padding, full_op_padding, full_op_stride)) new_stride = tuple(ps * os for ps, os in zip(full_patch_stride, full_op_stride)) output_padding = unify_shape(output_padding) new_output_padding = (output_padding[0], # Left output_padding[1] + inserted_zeros * input_shape[3] % full_op_stride[2], # Right output_padding[2], # Top output_padding[3] + inserted_zeros * input_shape[2] % full_op_stride[0]) # Bottom # Merge into a single number if all numbers are identical. if simplify: if new_padding.count(new_padding[0]) == len(new_padding): new_padding = new_padding[0] if new_stride.count(new_stride[0]) == len(new_stride): new_stride = new_stride[0] return new_padding, new_stride, new_output_padding def patches_to_matrix(pieces, input_shape, stride, padding, output_shape=None, unstable_idx=None, inserted_zeros=0): """Converting a Patches piece into a full dense matrix.""" # torch.as_strided may cause unpredictable error under deterministic mode, # so we temporarily disable it. deterministic = torch.are_deterministic_algorithms_enabled() torch.use_deterministic_algorithms(False) if type(padding) == int: padding = (padding, padding, padding, padding) if pieces.ndim == 9: # Squeeze two additional dimensions for output and input respectively assert pieces.shape[1] == 1 and pieces.shape[5] == 1 pieces = pieces.reshape( pieces.shape[0], *pieces.shape[2:5], *pieces.shape[6:] ) if unstable_idx is None: assert pieces.ndim == 7 # Non-sparse pieces, with shape (out_c, batch, out_h, out_w, c, h, w). output_channel, batch_size, output_x, output_y = pieces.shape[:4] else: batch_size = pieces.shape[1] output_channel, output_x, output_y = output_shape[1:] input_channel, kernel_x, kernel_y = pieces.shape[-3:] input_x, input_y = input_shape[-2:] if inserted_zeros > 0: input_x, input_y = (input_x - 1) * (inserted_zeros + 1) + 1, (input_y - 1) * (inserted_zeros + 1) + 1 if unstable_idx is None: # Fix all patches in a full A matrix. A_matrix = torch.zeros(batch_size, output_channel, output_x, output_y, input_channel, (input_x + padding[2] + padding[3]) * (input_y + padding[0] + padding[1]), device=pieces.device, dtype=pieces.dtype) # Save its orignal stride. orig_stride = A_matrix.stride() # This is the main trick - we create a *view* of the original matrix, and it contains all sliding windows for the convolution. # Since we only created a view (in fact, only metadata of the matrix changed), it should be very efficient. matrix_strided = torch.as_strided(A_matrix, [batch_size, output_channel, output_x, output_y, output_x, output_y, input_channel, kernel_x, kernel_y], [orig_stride[0], orig_stride[1], orig_stride[2], orig_stride[3], (input_x + padding[2] + padding[3]) * stride, stride, orig_stride[4], input_y + padding[0] + padding[1], 1]) # Now we need to fill the conv kernel parameters into the last three dimensions of matrix_strided. first_indices = torch.arange(output_x * output_y, device=pieces.device) second_indices = torch.div(first_indices, output_y, rounding_mode="trunc") third_indices = torch.fmod(first_indices, output_y) # pieces have shape (out_c, batch, out_h, out_w, c, h, w). pieces = pieces.transpose(0, 1) # pieces has the out_c dimension at the front, need to move it to the second. matrix_strided[:,:,second_indices,third_indices,second_indices,third_indices,:,:,:] = pieces.reshape(*pieces.shape[:2], -1, *pieces.shape[4:]) A_matrix = A_matrix.view(batch_size, output_channel * output_x * output_y, input_channel, input_x + padding[2] + padding[3], input_y + padding[0] + padding[1]) else: # Fill only a selection of patches. # Create only a partial A matrix. unstable_size = unstable_idx[0].numel() A_matrix = torch.zeros(batch_size, unstable_size, input_channel, (input_x + padding[2] + padding[3]) * (input_y + padding[0] + padding[1]), device=pieces.device, dtype=pieces.dtype) # Save its orignal stride. orig_stride = A_matrix.stride() # This is the main trick - we create a *view* of the original matrix, and it contains all sliding windows for the convolution. # Since we only created a view (in fact, only metadata of the matrix changed), it should be very efficient. matrix_strided = torch.as_strided(A_matrix, [batch_size, unstable_size, output_x, output_y, input_channel, kernel_x, kernel_y], [orig_stride[0], orig_stride[1], (input_x + padding[2] + padding[3]) * stride, stride, orig_stride[2], input_y + padding[0] + padding[1], 1]) # pieces have shape (unstable_size, batch, c, h, w). first_indices = torch.arange(unstable_size, device=pieces.device) matrix_strided[:,first_indices,unstable_idx[1],unstable_idx[2],:,:,:] = pieces.transpose(0, 1).to(matrix_strided) A_matrix = A_matrix.view(batch_size, unstable_size, input_channel, input_x + padding[2] + padding[3], input_y + padding[0] + padding[1]) A_matrix = A_matrix[:,:,:,padding[2]:input_x + padding[2],padding[0]:input_y + padding[0]] if inserted_zeros > 0: A_matrix = A_matrix[:,:,:, ::(inserted_zeros+1), ::(inserted_zeros+1)] # Re-enable deterministic if needed. torch.use_deterministic_algorithms(deterministic) return A_matrix def check_patch_biases(lb, ub, lower_b, upper_b): # When we use patches mode, it's possible that we need to add two bias # one is from the Tensor mode and one is from the patches mode # And we need to detect this case and reshape the bias if lower_b.ndim < lb.ndim: lb = lb.transpose(0,1).reshape(lb.size(1), lb.size(0), -1) lb = lb.expand(lb.size(0), lb.size(1), lower_b.size(0)//lb.size(1)) lb = lb.reshape(lb.size(0), -1).t() ub = ub.transpose(0,1).reshape(ub.size(1), ub.size(0), -1) ub = ub.expand(ub.size(0), ub.size(1), upper_b.size(0)//ub.size(1)) ub = ub.reshape(ub.size(0), -1).t() elif lower_b.ndim > lb.ndim: lower_b = lower_b.transpose(0, 1).reshape(lower_b.size(1), -1).t() upper_b = upper_b.transpose(0, 1).reshape(upper_b.size(1), -1).t() return lb, ub, lower_b, upper_b def inplace_unfold(image, kernel_size, stride=1, padding=0, inserted_zeros=0, output_padding=0): # Image has size (batch_size, channel, height, width). assert image.ndim == 4 if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) if isinstance(padding, int): padding = (padding, padding, padding, padding) # (left, right, top, bottom). if len(padding) == 2: # (height direction, width direction). padding = (padding[1], padding[1], padding[0], padding[0]) if isinstance(output_padding, int): output_padding = (output_padding, output_padding, output_padding, output_padding) # (left, right, top, bottom). if len(output_padding) == 2: # (height direction, width direction). output_padding = (output_padding[1], output_padding[1], output_padding[0], output_padding[0]) if isinstance(stride, int): stride = (stride, stride) # (height direction, width direction). assert len(kernel_size) == 2 and len(padding) == 4 and len(stride) == 2 # Make sure the image is large enough for the kernel. assert image.size(2) + padding[2] + padding[3] >= kernel_size[0] and image.size(3) + padding[0] + padding[1] >= kernel_size[1] if inserted_zeros > 0: # We first need to insert zeros in the image before unfolding. image = insert_zeros(image, inserted_zeros) # padding = (padding[0], padding[1] + 1, padding[2], padding[3] + 1) # Compute the number of patches. # Formulation: https://pytorch.org/docs/stable/generated/torch.nn.Unfold.html#torch.nn.Unfold patches_h = int((image.size(2) + padding[2] + padding[3] - (kernel_size[0] - 1) - 1) / stride[0] + 1) patches_w = int((image.size(3) + padding[0] + padding[1] - (kernel_size[1] - 1) - 1) / stride[1] + 1) # Pad image. if sum(padding) != 0: image = torch.nn.functional.pad(image, padding) # Save its orignal stride. image_stride = image.stride() matrix_strided = torch.as_strided(image, [ # Shape of the output matrix. image.size(0), # Batch size. patches_h, # indices for each patch. patches_w, image.size(1), # Channel. kernel_size[0], # indices for each pixel on a patch. kernel_size[1]], [ # Stride of the output matrix. image_stride[0], # Batch size dimension, keep using the old stride. image_stride[2] * stride[0], # Move patch in the height dimension. image_stride[3] * stride[1], # Move patch in the width dimension. image_stride[1], # Move to the next channel. image_stride[2], # Move to the next row. image_stride[3]]) # Move a pixel (on the width direction). # Output shape is (batch_size, patches_h, patches_w, channel, kernel_height, kernel_width) if sum(output_padding) > 0: output_padding = tuple(p if p > 0 else None for p in output_padding) matrix_strided = matrix_strided[:, output_padding[2]:-output_padding[3] if output_padding[3] is not None else None, output_padding[0]:-output_padding[1] if output_padding[1] is not None else None, :, :, :] return matrix_strided def maybe_unfold_patches(d_tensor, last_A, alpha_lookup_idx=None): """ Utility function to handle patch mode bound propagation in activation functions. In patches mode, we need to unfold lower and upper slopes (as input "d_tensor"). In matrix mode we simply return. """ if d_tensor is None or last_A is None or isinstance(last_A, Tensor): return d_tensor # Shape for d_tensor: # sparse: [spec, batch, in_c, in_h, in_w] # non-sparse (partially shared): [out_c, batch, in_c, in_h, in_w] # non-sparse (not shared): [out_c*out_h*out_w, batch, in_c, in_h, in_w] # shared (independent of output spec): [1, batch, in_c, in_h, in_w] # The in_h, in_w dimensions must be unfolded as patches. origin_d_shape = d_tensor.shape if d_tensor.ndim == 6: # Merge the (out_h, out_w) dimensions. d_tensor = d_tensor.view(*origin_d_shape[:2], -1, *origin_d_shape[-2:]) d_shape = d_tensor.size() # Reshape to 4-D tensor to unfold. d_tensor = d_tensor.view(-1, *d_tensor.shape[-3:]) # unfold the slope matrix as patches. Patch shape is [spec * batch, out_h, out_w, in_c, H, W). d_unfolded = inplace_unfold( d_tensor, kernel_size=last_A.patches.shape[-2:], stride=last_A.stride, padding=last_A.padding, inserted_zeros=last_A.inserted_zeros, output_padding=last_A.output_padding) # Reshape to the original shape of d, e.g., for non-sparse it is (out_c, batch, out_h, out_w, in_c, H, W). d_unfolded_r = d_unfolded.view(*d_shape[:-3], *d_unfolded.shape[1:]) if last_A.unstable_idx is not None: # Here we have d for all output neurons, but we only need to select unstable ones. if d_unfolded_r.size(0) == 1 and alpha_lookup_idx is None: # Shared alpha, spasre alpha should not be used. # Note: only d_unfolded_r.size(0) == 1 cannot judge that it is a shared alpha, # since the activation may have no unstable neuron at all so # the first dim = 1 + # unstable neuron still equals to 1 if len(last_A.unstable_idx) == 3: # Broadcast the spec shape, so only need to select the rest dimensions. # Change shape to (out_h, out_w, batch, in_c, H, W) or (out_h, out_w, in_c, H, W). d_unfolded_r = d_unfolded_r.squeeze(0).permute(1, 2, 0, 3, 4, 5) d_unfolded_r = d_unfolded_r[last_A.unstable_idx[1], last_A.unstable_idx[2]] elif len(last_A.unstable_idx) == 4: # [spec, batch, output_h, output_w, input_c, H, W] # to [output_h, output_w, batch, in_c, H, W] d_unfolded_r = d_unfolded_r.squeeze(0).permute(1, 2, 0, 3, 4, 5) d_unfolded_r = d_unfolded_r[last_A.unstable_idx[2], last_A.unstable_idx[3]] else: raise NotImplementedError() # output shape: (unstable_size, batch, in_c, H, W). else: # The spec dimension may be sparse and contains unstable neurons for the spec layer only. if alpha_lookup_idx is None: # alpha is spec-dense. Possible because the number of unstable neurons may decrease. if last_A.output_shape[1] == d_unfolded_r.size(0): # Non spec-sparse, partially shared alpha among output channel dimension. # Shape after unfolding is (out_c, batch, out_h, out_w, in_c, patch_h, patch_w). d_unfolded_r = d_unfolded_r[last_A.unstable_idx[0], :, last_A.unstable_idx[1], last_A.unstable_idx[2]] else: # Non spec-sparse, non-shared alpha. # Shape after unfolding is (out_c*out_h*out_w, batch, out_h, out_w, in_c, patch_h, patch_w). # Reshaped to (out_c, out_h, out_w, batch, out_h, out_w, in_c, patch_h, patch_w). d_unfolded_r = d_unfolded_r.view(last_A.shape[0], last_A.shape[2], last_A.shape[3], -1, *d_unfolded_r.shape[2:]) # Select on all out_c, out_h, out_w dimensions. d_unfolded_r = d_unfolded_r[last_A.unstable_idx[0], last_A.unstable_idx[1], last_A.unstable_idx[2], :, last_A.unstable_idx[1], last_A.unstable_idx[2]] elif alpha_lookup_idx.ndim == 1: # sparse alpha: [spec, batch, in_c, in_h, in_w] # Partially shared alpha on the spec dimension - all output neurons on the same channel use the same alpha. # If alpha_lookup_idx is not None, we need to convert the sparse indices using alpha_lookup_idx. _unstable_idx = alpha_lookup_idx[last_A.unstable_idx[0]] # The selection is only used on the channel dimension. d_unfolded_r = d_unfolded_r[_unstable_idx, :, last_A.unstable_idx[1], last_A.unstable_idx[2]] elif alpha_lookup_idx is not None and alpha_lookup_idx.ndim == 3: # sparse alpha: [spec, batch, in_c, in_h, in_w] # We created alpha as full output shape; alpha not shared among channel dimension. # Shape of alpha is (out_c*out_h*out_w, batch, in_c, in_h, in_w), note that the first 3 dimensions # is merged into one to allow simpler selection. _unstable_idx = alpha_lookup_idx[ last_A.unstable_idx[0], last_A.unstable_idx[1], last_A.unstable_idx[2]] # d_unfolded_r shape from (out_c, batch, out_h, out_w, in_c, in_h, in_w) # to (out_c * out_h * out_w(sparse), batch, in_c, in_h, in_w) # Note that the dimensions out_h, out_w come from unfolding, not specs in alpha, so they will be selected # directly without translating using the lookup table. d_unfolded_r = d_unfolded_r[_unstable_idx, :, last_A.unstable_idx[1], last_A.unstable_idx[2]] # after selection we return (unstable_size, batch_size, in_c, H, W) return d_unfolded_r else: raise ValueError else: # A is not sparse. Alpha shouldn't be sparse as well. assert alpha_lookup_idx is None if last_A.patches.size(0) != d_unfolded_r.size(0) and d_unfolded_r.size(0) != 1: # Non-shared alpha, shape after unfolding is (out_c*out_h*out_w, batch, out_h, out_w, in_c, patch_h, patch_w). # Reshaped to (out_c, out_h*out_w, batch, out_h*out_w, in_c, patch_h, patch_w). d_unfolded_r = d_unfolded_r.reshape(last_A.shape[0], last_A.shape[2] * last_A.shape[3], -1, d_unfolded_r.shape[2] * d_unfolded_r.shape[3], *d_unfolded_r.shape[4:]) # Select the "diagonal" elements in the out_h*out_w dimension. # New shape is (out_c, batch, in_c, patch_h, patch_w, out_h*out_w) d_unfolded_r = d_unfolded_r.diagonal(offset=0, dim1=1, dim2=3) # New shape is (out_c, batch, in_c, patch_h, patch_w, out_h, out_w) d_unfolded_r = d_unfolded_r.view(*d_unfolded_r.shape[:-1], last_A.shape[2], last_A.shape[3]) # New shape is (out_c, batch, out_h, out_w, in_c, patch_h, patch_w) d_unfolded_r = d_unfolded_r.permute(0, 1, 5, 6, 2, 3, 4) # For sparse patches, the shape after unfold is (unstable_size, batch_size, in_c, H, W). # For regular patches, the shape after unfold is (out_c, batch, out_h, out_w, in_c, H, W). if d_unfolded_r.ndim != last_A.patches.ndim: # For the situation of d independent of output neuron (e.g., vanilla crown bound), which does not have # the out_h, out_w dimension and out_c = 1 (sepc). We added 1s for the out_h, out_w dimensions. d_unfolded_r = d_unfolded_r.unsqueeze(2).unsqueeze(-4) return d_unfolded_r def create_valid_mask(output_shape, device, dtype, kernel_size, stride, inserted_zeros, padding, output_padding, unstable_idx=None): """ Create a 0-1 mask of patch pieces shape (except batch dim), where 1 indicates the cells corresponding to valid image pixels Can be used to mask out unused A cells :return: tensor of batch pieces shape, containing the binary mask """ one_d = torch.ones( tuple(1 for i in output_shape[1:]), device=device, dtype=dtype ).expand(output_shape[1:]) # Add batch dimension. one_d = one_d.unsqueeze(0) # After unfolding, the shape is (1, out_h, out_w, in_c, h, w) one_d_unfolded = inplace_unfold( one_d, kernel_size=kernel_size, stride=stride, padding=padding, inserted_zeros=inserted_zeros, output_padding=output_padding) if unstable_idx is not None: # Move out_h, out_w dimension to the front for easier selection. ans = one_d_unfolded.permute(1, 2, 0, 3, 4, 5) # for sparse patches the shape is (unstable_size, batch, in_c, h, w). # Batch size is 1 so no need to select here. ans = ans[unstable_idx[1], unstable_idx[2]] else: # Append the spec dimension. ans = one_d_unfolded.unsqueeze(0) return ans ================================================ FILE: auto_LiRPA/perturbations.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import json import math import os import numpy as np import torch from .utils import logger, eyeC from .patches import Patches, patches_to_matrix from .linear_bound import LinearBound from .concretize_func import constraints_solving, sort_out_constr_batches, construct_constraints class Perturbation: r""" Base class for a perturbation specification. Please see examples at `auto_LiRPA/perturbations.py`. Examples: * `PerturbationLpNorm`: Lp-norm (p>=1) perturbation. * `PerturbationL0Norm`: L0-norm perturbation. * `PerturbationSynonym`: Synonym substitution perturbation for NLP. """ def __init__(self): pass def set_eps(self, eps): self.eps = eps def concretize(self, x, A, sign=-1, aux=None): r""" Concretize bounds according to the perturbation specification. Args: x (Tensor): Input before perturbation. A (Tensor) : A matrix from LiRPA computation. sign (-1 or +1): If -1, concretize for lower bound; if +1, concretize for upper bound. aux (object, optional): Auxilary information for concretization. Returns: bound (Tensor): concretized bound with the shape equal to the clean output. """ raise NotImplementedError def init(self, x, aux=None, forward=False): r""" Initialize bounds before LiRPA computation. Args: x (Tensor): Input before perturbation. aux (object, optional): Auxilary information. forward (bool): It indicates whether forward mode LiRPA is involved. Returns: bound (LinearBound): Initialized bounds. center (Tensor): Center of perturbation. It can simply be `x`, or some other value. aux (object, optional): Auxilary information. Bound initialization may modify or add auxilary information. """ raise NotImplementedError class PerturbationL0Norm(Perturbation): """Perturbation constrained by the L_0 norm. Assuming input data is in the range of 0-1. """ def __init__(self, eps, x_L=None, x_U=None, ratio=1.0): self.eps = eps self.x_U = x_U self.x_L = x_L self.ratio = ratio def concretize(self, x, A, sign=-1, aux=None): if A is None: return None eps = math.ceil(self.eps) x = x.reshape(x.shape[0], -1, 1) center = A.matmul(x) x = x.reshape(x.shape[0], 1, -1) original = A * x.expand(x.shape[0], A.shape[-2], x.shape[2]) neg_mask = A < 0 pos_mask = A >= 0 if sign == 1: A_diff = torch.zeros_like(A) A_diff[pos_mask] = A[pos_mask] - original[pos_mask]# changes that one weight can contribute to the value A_diff[neg_mask] = - original[neg_mask] else: A_diff = torch.zeros_like(A) A_diff[pos_mask] = original[pos_mask] A_diff[neg_mask] = original[neg_mask] - A[neg_mask] # FIXME: this assumes the input pixel range is between 0 and 1! A_diff, _= torch.sort(A_diff, dim = 2, descending=True) bound = center + sign * A_diff[:, :, :eps].sum(dim = 2).unsqueeze(2) * self.ratio return bound.squeeze(2) def init(self, x, aux=None, forward=False): # For other norms, we pass in the BoundedTensor objects directly. x_L = x x_U = x if not forward: return LinearBound(None, None, None, None, x_L, x_U), x, None batch_size = x.shape[0] dim = x.reshape(batch_size, -1).shape[-1] eye = torch.eye(dim).to(x.device).unsqueeze(0).repeat(batch_size, 1, 1) lw = eye.reshape(batch_size, dim, *x.shape[1:]) lb = torch.zeros_like(x).to(x.device) uw, ub = lw.clone(), lb.clone() return LinearBound(lw, lb, uw, ub, x_L, x_U), x, None def __repr__(self): return 'PerturbationLpNorm(norm=0, eps={})'.format(self.eps) class PerturbationLpNorm(Perturbation): """Perturbation constrained by the L_p norm.""" def __init__(self, eps=0, norm=np.inf, x_L=None, x_U=None, eps_min=0, constraints=None, rearrange_constraints=False, no_return_inf=False, timer=None): r""" Initialize a p-norm perturbation instance. There are two ways to initialize it: -- x_L, x_U: (Higher priority) -- eps : (Lower priority) If use eps to initialize it, the centroid x (or x0 as in the member attribute) will be passed into `init` and `concretize` function. For the shape notations such as 'B' or 'X', please check the shape declaration at the beginning of concretize_func.py Args: eps (Tensor): The epsilon tensor, it represents the pertubation added to a BoundedTensor. norm (int or torch.inf): The p in p-norm perturbation. x_L (Tensor): Lower bound of input box, shape (B, *input_shape[1:]). x_U (Tensor): Upper bound of input box, shape (B, *input_shape[1:]). eps_min () constraints (Tuple[Tensor, Tensor] or None): A tuple `(A, b)` representing per-batch linear constraints. - `A`: shape (B, N_constr, X) - `b`: shape (B, N_constr) rearrange_constraints (bool): Whether to rearrange constraints for better solver performance. Default: False. no_return_inf (bool): If True, infeasible batches will be excluded from `active_indices`. Otherwise, infeasible batches are still marked active. Default: False. Please check `constraints_solving` and `sort_out_constr_batches` for more details. timer (Timer): A timer recording the concretization time. """ self.eps = eps self.x0 = None # For p = inf, pre-compute x0 and eps would accerlerate the concretize function. if norm == np.inf and x_L is not None and x_U is not None: self.eps = (x_U - x_L) / 2 self.x0 = (x_U + x_L) / 2 # x0_act and eps_act stands for x0 and eps matrix for batches with active constraints self.x0_act = None # shape (batchsize, *X_shape) self.eps_act = None # shape (batchsize, *X_shape) # x0_sparse_act and eps_sparse_act are the active sparse x0 and eps matrix when sparse perturbation is enabled. # Check init_sparse_linf to see how sparse x0, eps, x0_act, eps_act are created. self.x0_sparse_act = None # shape (batchsize, *X_sparse_shape) self.eps_sparse_act = None # shape (batchsize, *X_sparse_shape) self.eps_min = eps_min self.norm = norm self.dual_norm = 1 if (norm == np.inf) else (np.float64(1.0) / (1 - 1.0 / self.norm)) self.x_L = x_L self.x_U = x_U self.sparse = False self.timer = timer self.aux_lb = None self.aux_ub = None self.rearrange_constraints = rearrange_constraints # constraints is a tuple containing both the coefficient matrix and bias term # of the constraints. The constraints would appear in the form of: # A_c * x + b_c <= 0 # Coefficient matrix will be reshaped into (batchsize, # of constraints, # input_dim). Bias term will be reshaped into (batchsize, # of constraints) # also see in `constraints_solving` in constraints_solver.py # Pre-process the constraints. self.constraints, self.sorted_out_batches = sort_out_constr_batches(x_L, x_U, constraints, rearrange_constraints=rearrange_constraints, no_return_inf=no_return_inf) # The indices of hidden neurons to apply constraints. self.objective_indices = None # shape: (batchsize, num_of_neurons) if self.constraints is None or self.constraints[0].numel() == 0: self._constraints_enable = False else: self._constraints_enable = True self.no_return_inf = no_return_inf self._use_grad = False def get_input_bounds(self, x, A): if self.sparse: if self.x_L_sparse.shape[-1] == A.shape[-1]: x_L, x_U = self.x_L_sparse, self.x_U_sparse act_x0, act_eps = self.x0_sparse_act, self.eps_sparse_act else: # In backward mode, A is not sparse. x_L, x_U = self.x_L, self.x_U act_x0, act_eps = self.x0_act, self.eps_act else: x_L = x - self.eps if self.x_L is None else self.x_L x_U = x + self.eps if self.x_U is None else self.x_U act_x0, act_eps = self.x0_act, self.eps_act return x_L, x_U, act_x0, act_eps def get_constraints(self, A): if self.constraints is None: return None if self.sparse and self.x_L_sparse.shape[-1] == A.shape[-1]: return self.constraints_sparse else: return self.constraints def concretize_matrix(self, x, A, sign, constraints=None): # If A is an identity matrix, we will handle specially. if not isinstance(A, eyeC): # A has (Batch, spec, *input_size). For intermediate neurons, spec is *neuron_size. A = A.reshape(A.shape[0], A.shape[1], -1) if self.norm == np.inf: x_L, x_U, act_x0, act_eps = self.get_input_bounds(x, A) if constraints is None: constraints = self.get_constraints(A) # The original code for matrix concretize has been merged into `constraints_solving`. # Pick out auxiliary bound based on the sign. aux_bounds = self.aux_lb if sign == -1.0 else self.aux_ub results = constraints_solving(x_L, x_U, A, constraints, sign, sorted_out_batches=self.sorted_out_batches, objective_indices=self.objective_indices, constraints_enable=self._constraints_enable, no_return_inf=self.no_return_inf, timer=self.timer, aux_bounds=aux_bounds, act_x0=act_x0, act_eps=act_eps, use_grad=self._use_grad) if self.no_return_inf: # return: (bound, infeasible_bounds) bound = results[0] infeasible_bounds = results[1] self.add_infeasible_batches(infeasible_bounds) else: # return: bound bound = results else: x = x.reshape(x.shape[0], -1, 1) if not isinstance(A, eyeC): # Find the upper and lower bounds via dual norm. deviation = A.norm(self.dual_norm, -1) * self.eps bound = A.matmul(x) + sign * deviation.unsqueeze(-1) else: # A is an identity matrix. Its norm is all 1. bound = x + sign * self.eps bound = bound.squeeze(-1) return bound def concretize_patches(self, x, A, sign): if self.norm == np.inf: x_L, x_U, _, _, = self.get_input_bounds(x, A) # Here we should not reshape # Find the uppwer and lower bound similarly to IBP. center = (x_U + x_L) / 2.0 diff = (x_U - x_L) / 2.0 if not A.identity == 1: bound = A.matmul(center) bound_diff = A.matmul(diff, patch_abs=True) if sign == 1: bound += bound_diff elif sign == -1: bound -= bound_diff else: raise ValueError("Unsupported Sign") else: # A is an identity matrix. No need to do this matmul. bound = center + sign * diff return bound else: # Lp norm input_shape = x.shape if not A.identity: # Find the upper and lower bounds via dual norm. # matrix has shape # (batch_size, out_c * out_h * out_w, input_c, input_h, input_w) # or (batch_size, unstable_size, input_c, input_h, input_w) matrix = patches_to_matrix( A.patches, input_shape, A.stride, A.padding, A.output_shape, A.unstable_idx) # Note that we should avoid reshape the matrix. # Due to padding, matrix cannot be reshaped without copying. deviation = matrix.norm(p=self.dual_norm, dim=(-3,-2,-1)) * self.eps # Bound has shape (batch, out_c * out_h * out_w) or (batch, unstable_size). bound = torch.einsum('bschw,bchw->bs', matrix, x) + sign * deviation if A.unstable_idx is None: # Reshape to (batch, out_c, out_h, out_w). bound = bound.view(matrix.size(0), A.patches.size(0), A.patches.size(2), A.patches.size(3)) else: # A is an identity matrix. Its norm is all 1. bound = x + sign * self.eps return bound def concretize(self, x, A, sign=-1, constraints=None, aux=None): """Given an variable x and its bound matrix A, compute worst case bound according to Lp norm.""" if A is None: return None if isinstance(A, eyeC) or isinstance(A, torch.Tensor): ret = self.concretize_matrix(x, A, sign, constraints) elif isinstance(A, Patches): ret = self.concretize_patches(x, A, sign) else: raise NotImplementedError() if ret.ndim > 2: ret = ret.reshape(A.shape[1], -1) return ret def init_sparse_linf(self, x, x_L, x_U): """ Sparse Linf perturbation where only a few dimensions are actually perturbed""" self.sparse = True batch_size = x_L.shape[0] perturbed = (x_U > x_L).int() logger.debug(f'Perturbed: {perturbed.sum()}') lb = ub = x_L * (1 - perturbed) # x_L=x_U holds when perturbed=0 perturbed = perturbed.view(batch_size, -1) index = torch.cumsum(perturbed, dim=-1) dim = max(perturbed.view(batch_size, -1).sum(dim=-1).max(), 1) self.x_L_sparse = torch.zeros(batch_size, dim + 1).to(x_L) self.x_L_sparse.scatter_(dim=-1, index=index, src=(x_L - lb).view(batch_size, -1), reduce='add') self.x_U_sparse = torch.zeros(batch_size, dim + 1).to(x_U) self.x_U_sparse.scatter_(dim=-1, index=index, src=(x_U - ub).view(batch_size, -1), reduce='add') self.x_L_sparse, self.x_U_sparse = self.x_L_sparse[:, 1:], self.x_U_sparse[:, 1:] # --- create x0 and eps for Lp Norm self.x0_sparse = (self.x_L_sparse + self.x_U_sparse) / 2 self.eps_sparse = (self.x_U_sparse - self.x_L_sparse) / 2 if self.sorted_out_batches is not None: active_indices = self.sorted_out_batches["active_indices"] self.x0_sparse_act = self.x0_sparse[active_indices].unsqueeze(-1) self.eps_sparse_act = self.eps_sparse[active_indices].unsqueeze(-1) lw = torch.zeros(batch_size, dim + 1, perturbed.shape[-1], device=x.device) perturbed = perturbed.to(torch.get_default_dtype()) lw.scatter_(dim=1, index=index.unsqueeze(1), src=perturbed.unsqueeze(1)) lw = uw = lw[:, 1:, :].view(batch_size, dim, *x.shape[1:]) print(f'Using Linf sparse perturbation. Perturbed dimensions: {dim}.') print(f'Avg perturbation: {(self.x_U_sparse - self.x_L_sparse).mean()}') # When sparse linf is enabled, the input x perturbation would change its shape # Hence, the shape of constraints_A should change accordingly. # But for the final layer, we still need the dense linf, and use the original (dense) constraints if self.constraints is not None: # constraints_A: (batchsize, n_constraints, x_dim) constraints_A, constraints_b = self.constraints # reversed_lw: (batchsize, x_dim, sparse_dim) reversed_lw = lw.reshape((batch_size, dim, -1)).transpose(1, 2) lb_act = lb # When pre-processing the constraints, we only kept the active ones. # Hence, reversed_lw and lb_act should also be re-collected. active_indices = self.sorted_out_batches["active_indices"] reversed_lw = reversed_lw[active_indices] lb_act = lb_act[active_indices] # reversed lw will sort out the sparse dimensions out of all x dimension new_constraints_A = constraints_A.bmm(reversed_lw) # Besides original constraint_b, should also include the a*x terms where x is not perturbed # new_constraints_b = constraints_b + torch.einsum("bcx, bx -> bc", constraints_A, lb_act.flatten(1)) new_constraints_b = constraints_b self.constraints_sparse = (new_constraints_A, new_constraints_b) return LinearBound( lw, lb, uw, ub, x_L, x_U), x, None def init(self, x, aux=None, forward=False): self.sparse = False if self.norm == np.inf: x_L = x - self.eps if self.x_L is None else self.x_L x_U = x + self.eps if self.x_U is None else self.x_U else: if int(os.environ.get('AUTOLIRPA_L2_DEBUG', 0)) == 1: # FIXME Experimental code. Need to change the IBP code also. x_L = x - self.eps if self.x_L is None else self.x_L x_U = x + self.eps if self.x_U is None else self.x_U else: # FIXME This causes confusing lower bound and upper bound # For other norms, we pass in the BoundedTensor objects directly. x_L = x_U = x if self.x_L is not None and self.x_U is not None: self.x0 = (self.x_L + self.x_U) / 2 else: self.x0 = x.data if self.sorted_out_batches is not None and self.sorted_out_batches.get("active_indices") is not None: active_indices = self.sorted_out_batches["active_indices"] self.x0_act = self.x0[active_indices].flatten(1).unsqueeze(-1) self.eps_act = self.eps[active_indices].flatten(1).unsqueeze(-1) if not forward: return LinearBound( None, None, None, None, x_L, x_U), x, None if (self.norm == np.inf and x_L.numel() > 1 and (x_L == x_U).sum() > 0.5 * x_L.numel()): return self.init_sparse_linf(x, x_L, x_U) batch_size = x.shape[0] dim = x.reshape(batch_size, -1).shape[-1] lb = ub = torch.zeros_like(x) eye = torch.eye(dim).to(x).expand(batch_size, dim, dim) lw = uw = eye.reshape(batch_size, dim, *x.shape[1:]) return LinearBound( lw, lb, uw, ub, x_L, x_U), x, None def add_infeasible_batches(self, infeasible_batches): r""" Synchronize the `infeasible_batches` tensor between the global graph and the local perturbation node. If the computation graph includes multiple perturbed inputs, the BoundedModule (entire network) maintains a global `infeasible_batches` tensor, while each perturbed input (root) keeps its own local copy. - Before concretization: copy the global tensor to the local one. - After concretization: propagate updates from the local tensor back to the global tensor. Args: infeasible_batches: A boolean vector with shape (batchsize, ). A True value indicates that a batch is infeasible given its constraints. """ if self.constraints is not None and infeasible_batches is not None and infeasible_batches.any(): if self.sorted_out_batches["infeasible_batches"] is None: self.sorted_out_batches["infeasible_batches"] = infeasible_batches else: infeasible_batches = infeasible_batches | self.sorted_out_batches["infeasible_batches"] self.sorted_out_batches["infeasible_batches"] = infeasible_batches active_indices = self.sorted_out_batches["active_indices"] B_act = active_indices.numel() active_feasible_mask = (~infeasible_batches)[active_indices] if active_feasible_mask.sum() < B_act: self.sorted_out_batches["active_indices"] = active_indices[active_feasible_mask] self.x0_act = self.x0_act[active_feasible_mask] self.eps_act = self.eps_act[active_feasible_mask] constraints_A, constraints_b = self.constraints constraints_A = constraints_A[active_feasible_mask] constraints_b = constraints_b[active_feasible_mask] self.constraints = (constraints_A, constraints_b) def add_objective_indices(self, objective_indices): if self.constraints is not None: self.objective_indices = objective_indices @property def constraints_enable(self): ''' Enable / Disable the constrained concretize mode, regardless whether constraints is None or not. ''' return self._constraints_enable @constraints_enable.setter def constraints_enable(self, enable: bool): self._constraints_enable = enable @constraints_enable.deleter def constraints_enable(self): del self._constraints_enable @property def use_grad(self): ''' Enable / Disable the constrained concretize with gradient. ''' return self._use_grad @use_grad.setter def use_grad(self, use_grad: bool): self._use_grad = use_grad @use_grad.deleter def use_grad(self): del self._use_grad def add_aux_bounds(self, aux_lb, aux_ub): self.aux_lb = aux_lb self.aux_ub = aux_ub def clear_aux_bounds(self): self.aux_lb = None self.aux_ub = None def reset_constraints(self, constraints, decision_thresh): r""" Reset the constraints of this perturbation. Also will call `sort_out_constr_batches` to preprocess the constraints. Be sure not to reset with the same constraints input repeatedly. """ # We have to enable the gradient computation for the constraints # when using constraints_solving within alpha crown. self.use_grad = True constraints = construct_constraints(constraints[0], constraints[1], decision_thresh, self.x_L.shape[0], self.x_L.flatten(1).shape[1]) self.constraints, self.sorted_out_batches = sort_out_constr_batches(self.x_L, self.x_U, constraints, rearrange_constraints=self.rearrange_constraints, no_return_inf=self.no_return_inf) def __repr__(self): if self.norm == np.inf: if self.x_L is None and self.x_U is None: return f'PerturbationLpNorm(norm=inf, eps={self.eps})' else: return f'PerturbationLpNorm(norm=inf, eps={self.eps}, x_L={self.x_L}, x_U={self.x_U})' else: return f'PerturbationLpNorm(norm={self.norm}, eps={self.eps})' class PerturbationLinear(Perturbation): """ Perturbation defined by a Linear transformation. args: lower_A: Lower bound matrix of shape (B, output_dim, input_dim) upper_A: Upper bound matrix of shape (B, output_dim, input_dim) lower_b: Lower bound bias of shape (B, output_dim) upper_b: Upper bound bias of shape (B, output_dim) input_lb: Input lower bound of shape (B, input_dim) input_ub: Input upper bound of shape (B, input_dim) x_L: Output lower bound of shape (B, output_dim) x_U: Output upper bound of shape (B, output_dim) x_L and x_U can be None, in which case they will be computed from the other parameters. """ def __init__(self, lower_A, upper_A, lower_b, upper_b, input_lb, input_ub, x_L=None, x_U=None): super(PerturbationLinear, self).__init__() self.lower_A = lower_A self.upper_A = upper_A self.lower_b = lower_b.unsqueeze(-1) if lower_b is not None else None self.upper_b = upper_b.unsqueeze(-1) if upper_b is not None else None self.input_lb = input_lb.unsqueeze(-1) if input_lb is not None else None self.input_ub = input_ub.unsqueeze(-1) if input_ub is not None else None if x_L is None or x_U is None: mid = (self.input_lb + self.input_ub) / 2 diff = (self.input_ub - self.input_lb) / 2 self.x_U = (self.upper_A @ mid + torch.abs(self.upper_A) @ diff + self.upper_b).squeeze(-1) self.x_L = (self.lower_A @ mid - torch.abs(self.lower_A) @ diff + self.lower_b).squeeze(-1) else: self.x_L = x_L self.x_U = x_U def concretize(self, x, A, sign=-1, aux=None): if A is None: return None else: A_pos = torch.clamp(A, min=0) A_neg = torch.clamp(A, max=0) center = (self.input_lb + self.input_ub) / 2 diff = (self.input_ub - self.input_lb) / 2 if sign == 1: composite_A = A_pos @ self.upper_A + A_neg @ self.lower_A composite_b = A_pos @ self.upper_b + A_neg @ self.lower_b bound = composite_A @ center + torch.abs(composite_A) @ diff + composite_b else: composite_A = A_pos @ self.lower_A + A_neg @ self.upper_A composite_b = A_pos @ self.lower_b + A_neg @ self.upper_b bound = composite_A @ center - torch.abs(composite_A) @ diff + composite_b return bound.squeeze(-1) def init(self, x, aux=None, forward=False): if not forward: return LinearBound(None, None, None, None, self.x_L, self.x_U), x, None else: raise NotImplementedError("Linear perturbation does not support forward mode.") class PerturbationSynonym(Perturbation): def __init__(self, budget, eps=1.0, use_simple=False): super(PerturbationSynonym, self).__init__() self._load_synonyms() self.budget = budget self.eps = eps self.use_simple = use_simple self.model = None self.train = False def __repr__(self): return (f'perturbation(Synonym-based word substitution ' f'budget={self.budget}, eps={self.eps})') def _load_synonyms(self, path='data/synonyms.json'): with open(path) as file: self.synonym = json.loads(file.read()) logger.info('Synonym list loaded for {} words'.format(len(self.synonym))) def set_train(self, train): self.train = train def concretize(self, x, A, sign, aux): assert(self.model is not None) x_rep, mask, can_be_replaced = aux batch_size, length, dim_word = x.shape[0], x.shape[1], x.shape[2] dim_out = A.shape[1] max_num_cand = x_rep.shape[2] mask_rep = torch.tensor(can_be_replaced, dtype=torch.get_default_dtype(), device=A.device) num_pos = int(np.max(np.sum(can_be_replaced, axis=-1))) update_A = A.shape[-1] > num_pos * dim_word if update_A: bias = torch.bmm(A, (x * (1 - mask_rep).unsqueeze(-1)).reshape(batch_size, -1, 1)).squeeze(-1) else: bias = 0. A = A.reshape(batch_size, dim_out, -1, dim_word) A_new, x_new, x_rep_new, mask_new = [], [], [], [] zeros_A = torch.zeros(dim_out, dim_word, device=A.device) zeros_w = torch.zeros(dim_word, device=A.device) zeros_rep = torch.zeros(max_num_cand, dim_word, device=A.device) zeros_mask = torch.zeros(max_num_cand, device=A.device) for t in range(batch_size): cnt = 0 for i in range(0, length): if can_be_replaced[t][i]: if update_A: A_new.append(A[t, :, i, :]) x_new.append(x[t][i]) x_rep_new.append(x_rep[t][i]) mask_new.append(mask[t][i]) cnt += 1 if update_A: A_new += [zeros_A] * (num_pos - cnt) x_new += [zeros_w] * (num_pos - cnt) x_rep_new += [zeros_rep] * (num_pos - cnt) mask_new += [zeros_mask] * (num_pos - cnt) if update_A: A = torch.cat(A_new).reshape(batch_size, num_pos, dim_out, dim_word).transpose(1, 2) x = torch.cat(x_new).reshape(batch_size, num_pos, dim_word) x_rep = torch.cat(x_rep_new).reshape(batch_size, num_pos, max_num_cand, dim_word) mask = torch.cat(mask_new).reshape(batch_size, num_pos, max_num_cand) length = num_pos A = A.reshape(batch_size, A.shape[1], length, -1).transpose(1, 2) x = x.reshape(batch_size, length, -1, 1) if sign == 1: cmp, init = torch.max, -1e30 else: cmp, init = torch.min, 1e30 init_tensor = torch.ones(batch_size, dim_out).to(x.device) * init dp = [[init_tensor] * (self.budget + 1) for i in range(0, length + 1)] dp[0][0] = torch.zeros(batch_size, dim_out).to(x.device) A = A.reshape(batch_size * length, A.shape[2], A.shape[3]) Ax = torch.bmm( A, x.reshape(batch_size * length, x.shape[2], x.shape[3]) ).reshape(batch_size, length, A.shape[1]) Ax_rep = torch.bmm( A, x_rep.reshape(batch_size * length, max_num_cand, x.shape[2]).transpose(-1, -2) ).reshape(batch_size, length, A.shape[1], max_num_cand) Ax_rep = Ax_rep * mask.unsqueeze(2) + init * (1 - mask).unsqueeze(2) Ax_rep_bound = cmp(Ax_rep, dim=-1).values if self.use_simple and self.train: return torch.sum(cmp(Ax, Ax_rep_bound), dim=1) + bias for i in range(1, length + 1): dp[i][0] = dp[i - 1][0] + Ax[:, i - 1] for j in range(1, self.budget + 1): dp[i][j] = cmp( dp[i - 1][j] + Ax[:, i - 1], dp[i - 1][j - 1] + Ax_rep_bound[:, i - 1] ) dp = torch.cat(dp[length], dim=0).reshape(self.budget + 1, batch_size, dim_out) return cmp(dp, dim=0).values + bias def init(self, x, aux=None, forward=False): tokens, batch = aux self.tokens = tokens # DEBUG assert(len(x.shape) == 3) batch_size, length, dim_word = x.shape[0], x.shape[1], x.shape[2] max_pos = 1 can_be_replaced = np.zeros((batch_size, length), dtype=bool) self._build_substitution(batch) for t in range(batch_size): cnt = 0 candidates = batch[t]['candidates'] # for transformers if tokens[t][0] == '[CLS]': candidates = [[]] + candidates + [[]] for i in range(len(tokens[t])): if tokens[t][i] == '[UNK]' or \ len(candidates[i]) == 0 or tokens[t][i] != candidates[i][0]: continue for w in candidates[i][1:]: if w in self.model.vocab: can_be_replaced[t][i] = True cnt += 1 break max_pos = max(max_pos, cnt) dim = max_pos * dim_word if forward: eye = torch.eye(dim_word).to(x.device) lw = torch.zeros(batch_size, dim, length, dim_word).to(x.device) lb = torch.zeros_like(x).to(x.device) word_embeddings = self.model.word_embeddings.weight vocab = self.model.vocab x_rep = [[[] for i in range(length)] for t in range(batch_size)] max_num_cand = 1 for t in range(batch_size): candidates = batch[t]['candidates'] # for transformers if tokens[t][0] == '[CLS]': candidates = [[]] + candidates + [[]] cnt = 0 for i in range(length): if can_be_replaced[t][i]: word_embed = word_embeddings[vocab[tokens[t][i]]] # positional embedding and token type embedding other_embed = x[t, i] - word_embed if forward: lw[t, (cnt * dim_word):((cnt + 1) * dim_word), i, :] = eye lb[t, i, :] = torch.zeros_like(word_embed) for w in candidates[i][1:]: if w in self.model.vocab: x_rep[t][i].append( word_embeddings[self.model.vocab[w]] + other_embed) max_num_cand = max(max_num_cand, len(x_rep[t][i])) cnt += 1 else: if forward: lb[t, i, :] = x[t, i, :] if forward: uw, ub = lw, lb else: lw = lb = uw = ub = None zeros = torch.zeros(dim_word, device=x.device) x_rep_, mask = [], [] for t in range(batch_size): for i in range(length): x_rep_ += x_rep[t][i] + [zeros] * (max_num_cand - len(x_rep[t][i])) mask += [1] * len(x_rep[t][i]) + [0] * (max_num_cand - len(x_rep[t][i])) x_rep_ = torch.cat(x_rep_).reshape(batch_size, length, max_num_cand, dim_word) mask = torch.tensor(mask, dtype=torch.get_default_dtype(), device=x.device)\ .reshape(batch_size, length, max_num_cand) x_rep_ = x_rep_ * self.eps + x.unsqueeze(2) * (1 - self.eps) inf = 1e20 lower = torch.min(mask.unsqueeze(-1) * x_rep_ + (1 - mask).unsqueeze(-1) * inf, dim=2).values upper = torch.max(mask.unsqueeze(-1) * x_rep_ + (1 - mask).unsqueeze(-1) * (-inf), dim=2).values lower = torch.min(lower, x) upper = torch.max(upper, x) return LinearBound(lw, lb, uw, ub, lower, upper), x, (x_rep_, mask, can_be_replaced) def _build_substitution(self, batch): for example in batch: if not 'candidates' in example or example['candidates'] is None: candidates = [] tokens = example['sentence'].strip().lower().split(' ') for i in range(len(tokens)): _cand = [] if tokens[i] in self.synonym: for w in self.synonym[tokens[i]]: if w in self.model.vocab: _cand.append(w) if len(_cand) > 0: _cand = [tokens[i]] + _cand candidates.append(_cand) example['candidates'] = candidates ================================================ FILE: auto_LiRPA/solver_module.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### from .bound_ops import * from typing import TYPE_CHECKING if TYPE_CHECKING: from .bound_general import BoundedModule def build_solver_module(self: 'BoundedModule', x=None, C=None, interm_bounds=None, final_node_name=None, model_type="mip", solver_pkg="gurobi", set_input=True): r"""build lp/mip solvers in general graph. Args: x: inputs, a list of BoundedTensor. If set to None, we reuse exisint bounds that were previously computed in compute_bounds(). C (Tensor): The specification matrix that can map the output of the model with an additional linear layer. This is usually used for maping the logits output of the model to classification margins. interm_bounds: if specified, will replace existing intermediate layer bounds. Otherwise we reuse exising intermediate bounds. final_node_name (String): the name for the target layer to optimize solver_pkg (String): the backbone of the solver, default gurobi, also support scipy Returns: output vars (list): a list of final nodes to optimize """ # self.root_names: list of root node name # self.final_name: list of output node name # self.final_node: output module # .input: a list of input modules of this layer module # .solver_vars: a list of gurobi vars of every layer module # list with conv shape if conv layers, otherwise flattened # if last layer we need to be careful with: # C: specification matrix # .is_input_perturbed(1) if x is not None: assert interm_bounds is not None # Set the model to use new intermediate layer bounds, ignore the original ones. self.set_input(x, interm_bounds=interm_bounds) roots = [self[name] for name in self.root_names] # create interval ranges for input and other weight parameters for i in range(len(roots)): # if isinstance(root[i], BoundInput) and not isinstance(root[i], BoundParams): if type(roots[i]) is BoundInput: # create input vars for gurobi self.model if set_input: inp_gurobi_vars = self._build_solver_input(roots[i]) else: value = roots[i].forward() # regular weights roots[i].solver_vars = value final = self.final_node() if final_node_name is None else self[final_node_name] # backward propagate every layer including last layer self._build_solver_general(node=final, C=C, model_type=model_type, solver_pkg=solver_pkg) # a list of output solver vars return final.solver_vars def _build_solver_general(self: 'BoundedModule', node: Bound, C=None, model_type="mip", solver_pkg="gurobi"): if not hasattr(node, 'solver_vars'): if not node.perturbed: # if not perturbed, just forward node.solver_vars = self.get_forward_value(node) return node.solver_vars for n in node.inputs: self._build_solver_general(n, C=C, model_type=model_type) inp = [n_pre.solver_vars for n_pre in node.inputs] if C is not None and isinstance(node, BoundLinear) and\ not node.is_input_perturbed(1) and self.final_name == node.name: # when node is the last layer # merge the last BoundLinear node with the specification, # available when weights of this layer are not perturbed solver_vars = node.build_solver(*inp, model=self.solver_model, C=C, model_type=model_type, solver_pkg=solver_pkg) else: solver_vars = node.build_solver(*inp, model=self.solver_model, C=None, model_type=model_type, solver_pkg=solver_pkg) # just return output node gurobi vars return solver_vars def _reset_solver_vars(self: 'BoundedModule', node: Bound, iteration=True): if hasattr(node, 'solver_vars'): del node.solver_vars if iteration: if hasattr(node, 'inputs'): for n in node.inputs: self._reset_solver_vars(n) def _reset_solver_model(self: 'BoundedModule'): self.solver_model.remove(self.solver_model.getVars()) self.solver_model.remove(self.solver_model.getConstrs()) self.solver_model.update() def _build_solver_input(self: 'BoundedModule', node): ## Do the input layer, which is a special case assert isinstance(node, BoundInput) assert node.perturbation is not None if self.solver_model is None: self.solver_model = grb.Model() # zero var will be shared within the solver model zero_var = self.solver_model.addVar(lb=0, ub=0, obj=0, vtype=grb.GRB.CONTINUOUS, name='zero') one_var = self.solver_model.addVar(lb=1, ub=1, obj=0, vtype=grb.GRB.CONTINUOUS, name='one') neg_one_var = self.solver_model.addVar(lb=-1, ub=-1, obj=0, vtype=grb.GRB.CONTINUOUS, name='neg_one') x_L = node.value - node.perturbation.eps if node.perturbation.x_L is None else node.perturbation.x_L x_U = node.value + node.perturbation.eps if node.perturbation.x_U is None else node.perturbation.x_U x_L = x_L.min(dim=0).values x_U = x_U.max(dim=0).values input_shape = x_L.shape name_array = [f'inp_{idx}' for idx in range(prod(input_shape))] inp_gurobi_vars_dict = self.solver_model.addVars(*input_shape, lb=x_L, ub=x_U, obj=0, vtype=grb.GRB.CONTINUOUS, name=name_array) inp_gurobi_vars = np.empty(input_shape, dtype=object) for idx in inp_gurobi_vars_dict: inp_gurobi_vars[idx] = inp_gurobi_vars_dict[idx] inp_gurobi_vars = inp_gurobi_vars.tolist() # Flatten the input solver_vars. def flatten(x): if isinstance(x, list): result = [] for item in x: result.extend(flatten(item)) return result else: return [x] # Add extra constraints for the inputs if the perturbation norm is not L_inf. if node.perturbation.norm != float("inf"): if isinstance(inp_gurobi_vars, (list, tuple)): flat_inp_gurobi_vars = flatten(inp_gurobi_vars) else: flat_inp_gurobi_vars = inp_gurobi_vars if hasattr(node.value[0], "flatten"): flat_node_value = node.value.flatten().tolist() else: flat_node_value = node.value assert len(flat_inp_gurobi_vars) == len(flat_node_value), "The input doesn't match the variables" if node.perturbation.norm == 2: # For L2 norm, we directly add a quadratic constraint for cplex compatibility. # TODO: Compare efficiency with the second method below. If the second method is faster, # we should use it for L2 norm by default (when cplex is not used). print(f'setup L2 constraint for input with radius {node.perturbation.eps}.') quad_expr = grb.QuadExpr() for var, val in zip(flat_inp_gurobi_vars, flat_node_value): quad_expr.add((var - val) * (var - val)) self.solver_model.addQConstr( quad_expr <= node.perturbation.eps ** 2, name="l2_perturbation" ) else: print(f'setup Lp constraint for input with radius {node.perturbation.eps}.') n = len(flat_inp_gurobi_vars) # Create variables to set up the lp constraint. # We set input = x0 + delta where delta is under the Lp norm constraint. senses = ['='] * n delta_vars = self.solver_model.addVars( n, lb=-grb.GRB.INFINITY, ub=grb.GRB.INFINITY, name="delta" ) diff = -np.array(flat_node_value) vars_list = list(delta_vars.values()) + flat_inp_gurobi_vars self.solver_model.update() A = np.hstack([np.eye(n), -np.eye(n)]) # Add constraints input = x0 + delta as delta - input = -x0. # Here x0 is "flat_node_value" and input is "flat_inp_gurobi_vars". self.solver_model.addMConstr(A, vars_list, senses, diff) # Set up the lp constraint here: \| delta \|_p <= eps. lp_norm_var = self.solver_model.addVar( lb=0, vtype=grb.GRB.CONTINUOUS, name="lp_norm" ) self.solver_model.addGenConstrNorm( lp_norm_var, delta_vars, node.perturbation.norm, name="lp_norm_constr" ) self.solver_model.addConstr( lp_norm_var <= node.perturbation.eps, name="lp_perturbation_radius" ) node.solver_vars = inp_gurobi_vars # Save the gurobi input variables so that we can later extract primal values in input space easily. self.input_vars = inp_gurobi_vars self.solver_model.update() return inp_gurobi_vars ================================================ FILE: auto_LiRPA/tools.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch from graphviz import Digraph import shutil import re from typing import TYPE_CHECKING, List if TYPE_CHECKING: from .bound_general import BoundedModule def visualize(self: 'BoundedModule', output_path, print_bounds=False): r"""A visualization tool for BoundedModule. If dot engine is available in the system enviornment, it renders the graph and output {output_path}.png. Otherwise, it output a {output_path}.dot. Args: output_path (str): The path to save the graph (without file extension). print_bounds (bool): Whether to display the mean width of the bounds for each node. """ nodes = list(self.nodes()) # Create a directed graph dot = Digraph(format='png', engine='dot') # Add nodes with optional attributes for node in nodes: # we name the Graphviz nodes with the sanitized node name, # while keeping the original name in the label which is displayed in the graph. export_node_name = sanitize_graphviz_name(node.name) label = f"""<
{node.name}
{node.__class__.__name__}
{ tuple(node.output_shape) if node.output_shape is not None else None}
>""" if print_bounds: # Display the mean width of the bounds) # (Both the empirical bound from forward value and the computed bound if available) label = f"""<
{node.name}
{node.__class__.__name__}
{ tuple(node.output_shape) if node.output_shape is not None else None}
{ (node.forward_value.max(dim=0)[0] - node.forward_value.min(dim=0)[0]).to(dtype=torch.float).mean().item() if ( node.perturbed and hasattr(node, "forward_value") and isinstance(node.forward_value, torch.Tensor)) else None}
{ (node.upper - node.lower).to(dtype=torch.float).mean().item() if ( node.perturbed and hasattr(node, "lower") and hasattr(node, "upper") and node.lower is not None and node.upper is not None) else None}
>""" # perturbed nodes are highlighted in grey if getattr(node, "perturbed", False): style_attrs = {'style': 'filled', 'fillcolor': 'lightgrey'} else: style_attrs = {} if node.__class__.__name__ in ["BoundParams", "boundConstant", "BoundBuffers"]: dot.node(export_node_name, label=label, fontsize="8", width="0.5", height="0.2", shape="ellipse", **style_attrs) elif node.__class__.__name__ == "BoundInput": dot.node(export_node_name, label=label, shape="diamond", **style_attrs) else: dot.node(export_node_name, label=label, shape="square", **style_attrs) for inp in node.inputs: dot.edge(sanitize_graphviz_name(inp.name), export_node_name) # Render graph if shutil.which("dot") is None: print("Cannot render the graphviz file (dot not found).") print(f"Graph saved to {output_path}.dot") dot.save(output_path + ".dot") else: dot.render(output_path, cleanup=True) print(f"Graph saved to {output_path}.png") def sanitize_graphviz_name(name): """ Convert problematic characters (like `:`, `::`) in a Graphviz node name to a safe alternative character `_`. """ unsafe_chars = r'[:;,\[\]{}()<>|#*@&=+`~^?"\\\s]' safe_name = re.sub(unsafe_chars, "_", name) return safe_name ================================================ FILE: auto_LiRPA/utils.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import logging import time import torch import torch.nn as nn import os import sys import appdirs from collections import defaultdict, namedtuple from functools import reduce import operator import warnings from typing import Tuple from .patches import Patches logging.basicConfig( format='%(levelname)-8s %(asctime)-12s [%(filename)s:%(lineno)d] %(message)s', datefmt='%H:%M:%S', stream=sys.stdout, level=logging.INFO ) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG if os.environ.get('AUTOLIRPA_DEBUG', 0) else logging.INFO) warnings.simplefilter("once") # Special identity matrix. Avoid extra computation of identity matrix multiplication in various places. eyeC = namedtuple('eyeC', 'shape device') OneHotC = namedtuple('OneHotC', 'shape device index coeffs') BatchedCrownC = namedtuple('BatchedCrownC', 'type') def onehotc_to_dense(one_hot_c: OneHotC, dtype: torch.dtype) -> torch.Tensor: shape = one_hot_c.shape # [spec, batch, C, H, W] dim = int(prod(shape[2:])) dense = torch.zeros( size=(shape[0], shape[1], dim), device=one_hot_c.device, dtype=dtype) # one_hot_c.index has size (spec, batch), its values are the index of the one-hot non-zero elements in A. # one_hot_c.coeffs is the value of the non-zero element. dense = torch.scatter( dense, dim=2, index=one_hot_c.index.unsqueeze(-1), src=one_hot_c.coeffs.unsqueeze(-1)) dense = dense.view(shape[0], shape[1], *shape[2:]) return dense # Benchmarking mode disable some expensive assertions. Benchmarking = True reduction_sum = lambda x: x.sum(dim=tuple(range(1, x.dim())), keepdim=True) reduction_mean = lambda x: x.mean(dim=tuple(range(1, x.dim())), keepdim=True) reduction_max = lambda x: x.amax(dim=tuple(range(1, x.dim())), keepdim=True) reduction_min = lambda x: x.amin(dim=tuple(range(1, x.dim())), keepdim=True) MIN_HALF_FP = 5e-8 # 2**-24, which is the smallest value that float16 can be represented def reduction_str2func(reduction_func): if type(reduction_func) == str: if reduction_func == 'min': return reduction_min elif reduction_func == 'max': return reduction_max elif reduction_func == 'sum': return reduction_sum elif reduction_func == 'mean': return reduction_mean else: raise NotImplementedError(f'Unknown reduction_func {reduction_func}') else: return reduction_func def stop_criterion_placeholder(threshold=0): return lambda x: RuntimeError("BUG: bound optimization stop criterion not specified.") def stop_criterion_min(threshold=0): return lambda x: (x.min(1, keepdim=True).values > threshold) def stop_criterion_all(threshold=0): # The dimension of x should be (batch, spec). The spec dimension # This was used in the incomplete verifier, where the spec dimension can # present statements in an OR clause. return lambda x: (x > threshold).all(dim=1, keepdim=True) def stop_criterion_max(threshold=0): return lambda x: (x.max(1, keepdim=True).values > threshold) def stop_criterion_batch(threshold=0): # may unexpected broadcast, pay attention to the shape of threshold # x shape: batch, number_bounds; threshold shape: batch, number_bounds return lambda x: (x > threshold) def stop_criterion_batch_any(threshold=0): """If any spec >= rhs, then this sample can be stopped; if all samples can be stopped, stop = True, o.w., False. """ # may unexpected broadcast, pay attention to the shape of threshold # x shape: batch, number_bounds; threshold shape: batch, number_bounds return lambda x: (x > threshold).any(dim=1, keepdim=True) def stop_criterion_general(or_spec_size, threshold=0): """ If any spec in a group >= rhs, then this group can be stopped; if all groups can be stopped, stop = True, o.w., False. Args: or_clause_indices: [num_clause]. the indices of the belonging OR clauses for AND clauses. num_or: the number of OR clauses. threshold: [batch, num_clause]. The threshold for each spec. sum(or_clause_indices) == num_clauses. """ def stop_criterion_per_or(x): # get the indices of OR clauses assigned to their corresponding atom clauses, [num_clause] num_or = or_spec_size.shape[0] or_clause_indices = torch.repeat_interleave( torch.arange(num_or, device=or_spec_size.device), or_spec_size ).view(1, -1).expand(x.shape) # get the result for each spec. [batch, num_clause] result_per_spec = (x > threshold) # get the number of verified ANDs for each OR clause. [batch, num_or] num_verified_and_per_or = torch.scatter_reduce(result_per_spec[:, :num_or], 1, or_clause_indices, result_per_spec, 'sum', include_self=False) # result of any spec in a OR (group of ANDs) is True (sum >= 1) -> result of the OR is True. return num_verified_and_per_or >= 1 # if all OR clauses are True, then return True. [batch, 1] return lambda x: stop_criterion_per_or(x).all(dim=1, keepdim=True) def stop_criterion_batch_topk(threshold=0, k=1314): # x shape: batch, number_bounds; threshold shape: batch, number_bounds return lambda x: (torch.kthvalue(x, k, dim=-1, keepdim=True).values > threshold).any(dim=1) def multi_spec_keep_func_all(x): return torch.all(x, dim=-1) user_data_dir = appdirs.user_data_dir('auto_LiRPA') if not os.path.exists(user_data_dir): try: os.makedirs(user_data_dir) except: logger.error('Failed to create directory {}'.format(user_data_dir)) class MultiAverageMeter(object): """Computes and stores the average and current value for multiple metrics""" def __init__(self): self.reset() def reset(self): self.sum_meter = defaultdict(float) self.lasts = defaultdict(float) self.counts_meter = defaultdict(int) self.batch_size = 1 def set_batch_size(self, batch_size): self.batch_size = batch_size def update(self, key, val, n=None): if val is None: return if n is None: n = self.batch_size if isinstance(val, torch.Tensor): val = val.item() self.lasts[key] = val self.sum_meter[key] += val * n self.counts_meter[key] += n def last(self, key): return self.lasts[key] def avg(self, key): if self.counts_meter[key] == 0: return 0.0 else: return self.sum_meter[key] / self.counts_meter[key] def __repr__(self): s = "" for k in self.sum_meter: s += "{}={:.4f} ".format(k, self.avg(k)) return s.strip() class MultiTimer(object): """Count the time for each part of training.""" def __init__(self): self.reset() def reset(self): self.timer_starts = defaultdict(float) self.timer_total = defaultdict(float) def start(self, key): if self.timer_starts[key] != 0: raise RuntimeError("start() is called more than once") self.timer_starts[key] = time.time() def stop(self, key): if key not in self.timer_starts: raise RuntimeError("Key does not exist; please call start() before stop()") self.timer_total[key] += time.time() - self.timer_starts[key] self.timer_starts[key] = 0 def total(self, key): return self.timer_total[key] def __repr__(self): s = "" for k in self.timer_total: s += "{}_time={:.3f} ".format(k, self.timer_total[k]) return s.strip() class Flatten(nn.Flatten): """Legacy Flatten class. It was previously created when nn.Flatten was not supported. Simply use nn.Flatten in the future.""" pass class Unflatten(nn.Module): def __init__(self, wh): super().__init__() self.wh = wh # width and height of the feature maps def forward(self, x): return x.view(x.size(0), -1, self.wh, self.wh) class Max(nn.Module): def __init__(self): super(Max, self).__init__() def forward(self, x, y): return torch.max(x, y) class Min(nn.Module): def __init__(self): super(Min, self).__init__() def forward(self, x, y): return torch.min(x, y) def scale_gradients(optimizer, gradient_accumulation_steps, grad_clip=None): parameters = [] for param_group in optimizer.param_groups: for param in param_group['params']: parameters.append(param) if param.grad is not None: param.grad.data /= gradient_accumulation_steps if grad_clip is not None: return torch.nn.utils.clip_grad_norm_(parameters, grad_clip) # unpack tuple, dict, list into one single list # TODO: not sure if the order matches graph.inputs() def unpack_inputs(inputs, device=None): if isinstance(inputs, dict): inputs = list(inputs.values()) if isinstance(inputs, tuple) or isinstance(inputs, list): res = [] for item in inputs: res += unpack_inputs(item, device=device) return res else: if device is not None: inputs = inputs.to(device) return [inputs] def isnan(x): if isinstance(x, Patches): return False return torch.isnan(x).any() def prod(x): return reduce(operator.mul, x, 1) def batched_index_select(input, dim, index): # Assuming the input has a batch dimension. # index has dimensin [spec, batch]. if input.ndim == 4: # Alphas for fully connected layers, shape [2, spec, batch, neurons] index = index.unsqueeze(-1).unsqueeze(0).expand(input.size(0), -1, -1, input.size(3)) elif input.ndim == 6: # Alphas for fully connected layers, shape [2, spec, batch, c, h, w]. index = index.view(1, index.size(0), index.size(1), *([1] * (input.ndim - 3))).expand(input.size(0), -1, -1, *input.shape[3:]) elif input.ndim == 3: # Weights. input = input.expand(index.size(0), -1, -1) index = index.unsqueeze(-1).expand(-1, -1, input.size(2)) elif input.ndim == 2: # Bias. input = input.expand(index.size(0), -1) else: raise ValueError return torch.gather(input, dim, index) def get_spec_matrix(X, y, num_classes): with torch.no_grad(): c = (torch.eye(num_classes).type_as(X)[y].unsqueeze(1) - torch.eye(num_classes).type_as(X).unsqueeze(0)) I = (~(y.unsqueeze(1) == torch.arange(num_classes).type_as(y).unsqueeze(0))) c = (c[I].view(X.size(0), num_classes - 1, num_classes)) return c def unravel_index( indices: torch.LongTensor, shape: Tuple[int, ...], ) -> torch.LongTensor: r"""Converts flat indices into unraveled coordinates in a target shape. Args: indices: A tensor of (flat) indices, (*, N). shape: The targeted shape, (D,). Returns: The unraveled coordinates, a list with tensors in shape (N, D). Code borrowed from: https://github.com/pytorch/pytorch/issues/35674 """ coord = [] for dim in reversed(shape): coord.append(indices % dim) indices = torch.div(indices, dim, rounding_mode='trunc') return list(reversed(coord)) class AutoBatchSize: def __init__(self, init_batch_size, device, vram_ratio=0.9, enable=True): self.batch_size = init_batch_size self.max_actual_batch_size = 0 self.device = device self.vram_ratio = vram_ratio self.enable = enable def record_actual_batch_size(self, actual_batch_size): """Record the actual batch size used. It may be smaller than self.batch_size, especially for the early batches. """ self.max_actual_batch_size = max(self.max_actual_batch_size, actual_batch_size) def update(self): """Check if the batch size can be enlarged.""" if not self.enable: return None # Only try to update the batch size if the current batch size has # been actually used, as indicated by `max_actual_batch_size` if self.device == 'cpu' or self.max_actual_batch_size < self.batch_size: return None total_vram = torch.cuda.get_device_properties(self.device).total_memory current_vram = torch.cuda.memory_reserved(self.device) if current_vram * 2 >= total_vram * self.vram_ratio: return None new_batch_size = self.batch_size * 2 self.batch_size = new_batch_size logger.debug('Automatically updated batch size to %d', new_batch_size) return { 'current_vram': current_vram, 'total_vram': total_vram, } def sync_params(model_ori: torch.nn.Module, model: 'BoundedModule', loss_fusion: bool = False): """Sync the parameters from a BoundedModule to the original model.""" state_dict_loss = model.state_dict() state_dict = model_ori.state_dict() for name in state_dict_loss: v = state_dict_loss[name] if name.endswith('.param'): name = name[:-6] elif name.endswith('.buffer'): name = name[:-7] else: raise NameError(name) name_ori = model[name].ori_name if loss_fusion: assert name_ori.startswith('model.') name_ori = name_ori[6:] assert name_ori in state_dict state_dict[name_ori] = v model_ori.load_state_dict(state_dict) return state_dict def reduce_broadcast_dims(A, target_shape, left_extra_dims=1): """ When backward propagating tensors that are automatically broadcasted, we need to reduce the broadcasted dimensions to match the input shape. This can be useful for backward bound propagation and backward gradient computation. Args: A: The input tensor. target_shape: The target shape to reduce to. left_extra_dims: The number of dimensions that A should have but the target shape doesn't have. These dimensions are usually added to the left of the target shape and don't need to be reduced (e.g. spec). Example: x1 has shape [a1, a2, a3, a4], x2 has shape [a2, 1, a4], y = x1 * x2. Two types of broadcasting here: 1. Adding additional dimensions to x2 to match the dimension of x1. 2. Broadcasting along existing dimensions length 1. In backward computation from y to x2, we need to reduce (sum) the A matrix to match the shape of x2. The first dimension of A is usually for spec, so the shape usually aligns from the second dimension. """ # Step 1: Dimension doesn't exist in target shape but exists in A. # cnt_sum is the number of dimensions that are broadcast. # (The additional dimensions in A that are not in target shape). cnt_sum = (A.ndim - left_extra_dims) - len(target_shape) # The broadcast dimensions must be the first dimensions in A # (except the extra dimensions and batch dimension). dims = list(range(left_extra_dims + 1, cnt_sum + left_extra_dims + 1)) if dims: A = torch.sum(A, dim=dims, keepdim=False) # Step 2: Dimension exists in target shape, broadcast from 1. # FIXME (05/11/2022): the following condition is not always correct. # We should not rely on checking dimension is "1" or not. dims = [i + left_extra_dims for i in range(left_extra_dims, len(target_shape)) if target_shape[i] == 1 and A.shape[i + left_extra_dims] != 1] if dims: A = torch.sum(A, dim=dims, keepdim=True) # Check the final shape - it should be compatible. assert A.shape[2:] == target_shape[1:] # skip the spec and batch dimension. return A @torch.jit.script def matmul_maybe_batched(a: torch.Tensor, b: torch.Tensor, both_batched: bool): # Basically just matmul, but we need to handle the batch dimension. if both_batched: return torch.einsum("b...ij,b...jk->b...ik", a, b) else: return a.matmul(b) def transfer(tensor, device=None, dtype=None, non_blocking=False): """Transfer a tensor to a specific device or dtype.""" if device: tensor = tensor.to(device, non_blocking=non_blocking) if dtype: tensor = tensor.to(dtype) return tensor def clone_sub_A_dict(A_dict, out_in_keys: Tuple): """ Deep copy the A_dict structure for specific out_in_keys. Args: A_dict: The A_dict to be copied. out_in_keys: The (out_key, in_key) pairs to be copied. Returns: A new A_dict with all tensors cloned. """ # Structure: A_dict[out_key][in_key][key] # key in [lA, uA, lbias, ubias, unstable_idx] # lA, uA are tensors or Patches # (there're also types like eyeC, OneHotC, not supported here) # lbias, ubias are tensors # unstable_idx is tensor or tuple of tensors out_key, in_key = out_in_keys src_subdict = A_dict[out_key][in_key] cloned_subdict = {} for key, val in src_subdict.items(): if val is None: cloned_subdict[key] = None continue if isinstance(val, (torch.Tensor, Patches)): cloned_subdict[key] = val.detach().clone() elif isinstance(val, tuple): cloned_subdict[key] = tuple(v.detach().clone() for v in val) else: raise NotImplementedError(f'Unsupported A type {type(val)} for copying.') return cloned_subdict def clone_full_A_dict(A_dict): """ Deep copy the A_dict structure. Args: A_dict: The A_dict to be copied. Returns: A new A_dict with all tensors cloned. """ new_A_dict = {} for out_key, in_dict in A_dict.items(): new_A_dict[out_key] = {} for in_key in in_dict: new_A_dict[out_key][in_key] = clone_sub_A_dict(A_dict, (out_key, in_key)) return new_A_dict ================================================ FILE: auto_LiRPA/wrapper.py ================================================ ######################################################################### ## This file is part of the auto_LiRPA library, a core part of the ## ## α,β-CROWN (alpha-beta-CROWN) neural network verifier developed ## ## by the α,β-CROWN Team ## ## ## ## Copyright (C) 2020-2025 The α,β-CROWN Team ## ## Team leaders: ## ## Faculty: Huan Zhang (UIUC) ## ## Student: Xiangru Zhong (UIUC) ## ## ## ## See CONTRIBUTORS for all current and past developers in the team. ## ## ## ## This program is licensed under the BSD 3-Clause License, ## ## contained in the LICENCE file in this directory. ## ## ## ######################################################################### import torch import torch.nn as nn class CrossEntropyWrapper(nn.Module): def __init__(self, model): super(CrossEntropyWrapper, self).__init__() self.model = model def forward(self, x, labels): y = self.model(x) logits = y - torch.gather(y, dim=-1, index=labels.unsqueeze(-1)) return torch.exp(logits).sum(dim=-1, keepdim=True) class CrossEntropyWrapperMultiInput(nn.Module): def __init__(self, model): super(CrossEntropyWrapperMultiInput, self).__init__() self.model = model def forward(self, labels, *x): y = self.model(*x) logits = y - torch.gather(y, dim=-1, index=labels.unsqueeze(-1)) return torch.exp(logits).sum(dim=-1, keepdim=True) ================================================ FILE: doc/.gitignore ================================================ _build sections *.md !src/*.md !README.md ================================================ FILE: doc/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: doc/README.md ================================================ # Documentation This directory contains source files for building our documentation. Please view the compiled documentation on our [documentation page](https://auto-lirpa.readthedocs.io/en/latest/?badge=latest), as some links may not work here on GitHub. ## Dependencies Install additional libraries for building documentations: ```bash pip install -r requirements.txt ``` ## Build Build documentations in HTML: ``` make html ``` The documentation will be generated at `_build/html`. ================================================ FILE: doc/api.rst ================================================ API Usage ====================================== .. autoclass:: auto_LiRPA.BoundedModule .. autofunction:: auto_LiRPA.BoundedModule.forward .. autofunction:: auto_LiRPA.BoundedModule.compute_bounds .. autofunction:: auto_LiRPA.BoundedModule.save_intermediate .. autoclass:: auto_LiRPA.bound_ops.Bound .. autofunction:: auto_LiRPA.bound_ops.Bound.forward .. autofunction:: auto_LiRPA.bound_ops.Bound.interval_propagate .. autofunction:: auto_LiRPA.bound_ops.Bound.bound_forward .. autofunction:: auto_LiRPA.bound_ops.Bound.bound_backward .. autoclass:: auto_LiRPA.perturbations.Perturbation .. autofunction:: auto_LiRPA.perturbations.Perturbation.concretize .. autofunction:: auto_LiRPA.perturbations.Perturbation.init Indices and tables ------------------- * :ref:`genindex` * :ref:`search` .. * :ref:`modindex` ================================================ FILE: doc/conf.py ================================================ # Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import subprocess import inspect import sys from pygit2 import Repository sys.path.insert(0, '..') import auto_LiRPA subprocess.run(['python', 'process.py']) # -- Project information ----------------------------------------------------- project = 'auto_LiRPA' author = 'auto-LiRPA authors' copyright = f'2020-2025, {author}' # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.linkcode', 'm2r2', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'src', 'Thumbs.db', '.DS_Store'] # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] repo = Repository('../') branch = repo.head.shorthand # Resolve function for the linkcode extension. def linkcode_resolve(domain, info): def find_source(): obj = auto_LiRPA parts = info['fullname'].split('.') if info['module'].endswith(f'.{parts[0]}'): module = info['module'][:-len(parts[0])-1] else: module = info['module'] obj = sys.modules[module] for part in parts: obj = getattr(obj, part) fn = inspect.getsourcefile(obj) source, lineno = inspect.getsourcelines(obj) return fn, lineno, lineno + len(source) - 1 fn, lineno_start, lineno_end = find_source() filename = f'{fn}#L{lineno_start}-L{lineno_end}' return f"https://github.com/Verified-Intelligence/auto_LiRPA/blob/{branch}/doc/{filename}" ================================================ FILE: doc/index.rst ================================================ .. auto_LiRPA documentation master file, created by sphinx-quickstart on Wed Jul 14 21:56:10 2021. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Documentation for `auto_LiRPA `_ =========================================================================== .. toctree:: :hidden: installation quick-start examples api custom_op paper .. raw:: html

.. mdinclude:: sections/introduction.md Usage ----- * :doc:`Installation ` * :doc:`Quick Start ` * :doc:`More Working Examples ` * :doc:`API Usage ` * :doc:`Custom Operators ` * :doc:`Reproducing our NeurIPS 2020 paper ` ================================================ FILE: doc/process.py ================================================ """ Process source files before running Sphinx""" import re import os import shutil from pygit2 import Repository repo = 'https://github.com/Verified-Intelligence/auto_LiRPA' branch = Repository('.').head.shorthand repo_file_path = os.path.join(repo, 'tree', branch) # Parse README.md into sections which can be reused heading = '' copied = {} print('Parsing markdown sections from README:') with open('../README.md') as file: for line in file.readlines(): if line.startswith('##'): heading = line[2:].strip() else: if not heading in copied: copied[heading] = '' copied[heading] += line if not os.path.exists('sections'): os.makedirs('sections') for key in copied: if key == '': continue filename = re.sub(r"[?+\'\"]", '', key.lower()) filename = re.sub(r" ", '-', filename) + '.md' print(filename) with open(os.path.join('sections', filename), 'w') as file: file.write(f'## {key}\n') file.write(copied[key]) print() # Load source files and fix links to GitHub for folder in ['src', 'sections']: for filename in os.listdir(folder): print(f'Processing {folder}/{filename}') with open(os.path.join(folder, filename)) as file: source = file.read() source_new = '' ptr = 0 for m in re.finditer('(\[.*\])(\(.*\))', source): assert m.start() >= ptr source_new += source[ptr:m.start()] ptr = m.start() source_new += m.group(1) ptr += len(m.group(1)) link_raw = m.group(2) while len(link_raw) >= 2 and link_raw[-2] == ')': link_raw = link_raw[:-1] link = link_raw[1:-1] if link.startswith('https://') or link.startswith('http://') or '.html#' in link: link_new = link else: if folder == 'sections': link_new = os.path.join(repo_file_path, link) else: link_new = os.path.join(repo_file_path, 'docs/src', link) print(f'Fix link {link} -> {link_new}') source_new += f'({link_new})' ptr += len(link_raw) source_new += source[ptr:] with open(filename, 'w') as file: file.write(source_new) print() ================================================ FILE: examples/.gitignore ================================================ auto_LiRPA ================================================ FILE: examples/__init__.py ================================================ ================================================ FILE: examples/language/.gitignore ================================================ model* !modeling* log* res_test.pkl ckpt_* data_language.tar.gz data/ ================================================ FILE: examples/language/Transformer/Transformer.py ================================================ # coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights rved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import, division, print_function import os import torch import torch.nn as nn from Transformer.modeling import BertForSequenceClassification from pytorch_pretrained_bert.modeling import BertConfig from Transformer.utils import convert_examples_to_features from language_utils import build_vocab from auto_LiRPA.utils import logger class Transformer(nn.Module): def __init__(self, args, data_train): super().__init__() self.args = args self.max_seq_length = args.max_sent_length self.drop_unk = args.drop_unk self.num_labels = args.num_classes self.label_list = range(args.num_classes) self.device = args.device self.lr = args.lr self.dir = args.dir self.vocab = build_vocab(data_train, args.min_word_freq) if not os.path.exists(self.dir): os.makedirs(self.dir) self.checkpoint = 0 config = BertConfig(len(self.vocab)) config.num_hidden_layers = args.num_layers config.embedding_size = args.embedding_size config.hidden_size = args.hidden_size config.intermediate_size = args.intermediate_size config.hidden_act = args.hidden_act config.num_attention_heads = args.num_attention_heads config.layer_norm = args.layer_norm config.hidden_dropout_prob = args.dropout self.model = BertForSequenceClassification( config, self.num_labels, vocab=self.vocab).to(self.device) logger.info("Model initialized") if args.load: checkpoint = torch.load(args.load, map_location=torch.device(self.device)) epoch = checkpoint['epoch'] self.model.embeddings.load_state_dict(checkpoint['state_dict_embeddings']) self.model.model_from_embeddings.load_state_dict(checkpoint['state_dict_model_from_embeddings']) logger.info('Checkpoint loaded: {}'.format(args.load)) self.model_from_embeddings = self.model.model_from_embeddings self.word_embeddings = self.model.embeddings.word_embeddings self.model_from_embeddings.device = self.device def save(self, epoch): self.model.model_from_embeddings = self.model_from_embeddings path = os.path.join(self.dir, "ckpt_{}".format(epoch)) torch.save({ 'state_dict_embeddings': self.model.embeddings.state_dict(), 'state_dict_model_from_embeddings': self.model.model_from_embeddings.state_dict(), 'epoch': epoch }, path) logger.info("Model saved to {}".format(path)) def build_optimizer(self): # update the original model with the converted model self.model.model_from_embeddings = self.model_from_embeddings param_group = [ {"params": [p[1] for p in self.model.named_parameters()], "weight_decay": 0.}, ] return torch.optim.Adam(param_group, lr=self.lr) def train(self): self.model.train() self.model_from_embeddings.train() def eval(self): self.model.eval() self.model_from_embeddings.eval() def get_input(self, batch): features = convert_examples_to_features( batch, self.label_list, self.max_seq_length, self.vocab, drop_unk=self.drop_unk) input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long).to(self.device) input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long).to(self.device) segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long).to(self.device) label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long).to(self.device) tokens = [f.tokens for f in features] embeddings, extended_attention_mask = \ self.model(input_ids, segment_ids, input_mask, embed_only=True) return embeddings, extended_attention_mask, tokens, label_ids def forward(self, batch): embeddings, extended_attention_mask, tokens, label_ids = self.get_input(batch) logits = self.model_from_embeddings(embeddings, extended_attention_mask) preds = torch.argmax(logits, dim=1) return preds ================================================ FILE: examples/language/Transformer/__init__.py ================================================ ================================================ FILE: examples/language/Transformer/modeling.py ================================================ # coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """PyTorch BERT model.""" from __future__ import absolute_import, division, print_function, unicode_literals import torch from torch import nn from pytorch_pretrained_bert.modeling import BertIntermediate, BertSelfAttention, BertPreTrainedModel class BertLayerNorm(nn.Module): def __init__(self, hidden_size, eps=1e-12): super(BertLayerNorm, self).__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) self.bias = nn.Parameter(torch.zeros(hidden_size)) self.variance_epsilon = eps def forward(self, x): u = x.mean(-1, keepdim=True) s = (x - u).pow(2).mean(-1, keepdim=True) x = (x - u) / torch.sqrt(s + self.variance_epsilon) return self.weight * x + self.bias class BertLayerNormNoVar(nn.Module): def __init__(self, hidden_size, eps=1e-12): super(BertLayerNormNoVar, self).__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) self.bias = nn.Parameter(torch.zeros(hidden_size)) self.variance_epsilon = eps def forward(self, x): u = x.mean(-1, keepdim=True) x = x - u return self.weight * x + self.bias class BertEmbeddings(nn.Module): """Construct the embeddings from word, position and token_type embeddings. """ def __init__(self, config, glove=None, vocab=None): super(BertEmbeddings, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.embedding_size, padding_idx=0) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.embedding_size) self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.embedding_size) self.config = config def forward(self, input_ids, token_type_ids=None): seq_length = input_ids.size(1) position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) position_ids = position_ids.unsqueeze(0).expand_as(input_ids) if token_type_ids is None: token_type_ids = torch.zeros_like(input_ids) words_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) # position/token_type embedding disabled # embeddings = words_embeddings + position_embeddings + token_type_embeddings embeddings = words_embeddings return embeddings class BertSelfOutput(nn.Module): def __init__(self, config): super(BertSelfOutput, self).__init__() self.config = config self.dense = nn.Linear(config.hidden_size, config.hidden_size) if hasattr(config, "layer_norm") and config.layer_norm == "no_var": self.LayerNorm = BertLayerNormNoVar(config.hidden_size, eps=1e-12) else: self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward(self, hidden_states, input_tensor): hidden_states = self.dense(hidden_states) hidden_states = self.dropout(hidden_states) if hidden_states.shape[-1] == input_tensor.shape[-1]: hidden_states = hidden_states + input_tensor if hasattr(self.config, "layer_norm") and self.config.layer_norm == "no": pass else: hidden_states = self.LayerNorm(hidden_states) return hidden_states class BertAttention(nn.Module): def __init__(self, config, input_size): super(BertAttention, self).__init__() self.self = BertSelfAttention(config) self.output = BertSelfOutput(config) def forward(self, input_tensor, attention_mask): self_output = self.self(input_tensor, attention_mask) attention_output = self.output(self_output, input_tensor) return attention_output class BertOutput(nn.Module): def __init__(self, config): super(BertOutput, self).__init__() self.config = config self.dense = nn.Linear(config.intermediate_size, config.hidden_size) if hasattr(config, "layer_norm") and config.layer_norm == "no_var": self.LayerNorm = BertLayerNormNoVar(config.hidden_size, eps=1e-12) else: self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12) self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward(self, hidden_states, input_tensor): hidden_states = self.dense(hidden_states) hidden_states = self.dropout(hidden_states) hidden_states = hidden_states + input_tensor if hasattr(self.config, "layer_norm") and self.config.layer_norm == "no": pass else: hidden_states = self.LayerNorm(hidden_states) return hidden_states class BertLayer(nn.Module): def __init__(self, config, layer_id): super(BertLayer, self).__init__() self.input_size = config.hidden_size self.attention = BertAttention(config, self.input_size) self.intermediate = BertIntermediate(config) self.output = BertOutput(config) def forward(self, hidden_states, attention_mask): attention_output = self.attention(hidden_states, attention_mask) intermediate_output = self.intermediate(attention_output) layer_output = self.output(intermediate_output, attention_output) return layer_output class BertEncoder(nn.Module): def __init__(self, config): super(BertEncoder, self).__init__() self.layer = nn.ModuleList([BertLayer(config, i) for i in range(config.num_hidden_layers)]) def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True): all_encoder_layers = [] for layer_module in self.layer: hidden_states = layer_module(hidden_states, attention_mask) if output_all_encoded_layers: all_encoder_layers.append(hidden_states) if not output_all_encoded_layers: all_encoder_layers.append(hidden_states) return all_encoder_layers class BertPooler(nn.Module): def __init__(self, config): super(BertPooler, self).__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.activation = nn.Tanh() def forward(self, hidden_states): # We "pool" the model by simply taking the hidden state corresponding # to the first token. first_token_tensor = hidden_states[:, 0] pooled_output = self.dense(first_token_tensor) pooled_output = self.activation(pooled_output) return pooled_output class BertModelFromEmbeddings(BertPreTrainedModel): def __init__(self, config): super(BertModelFromEmbeddings, self).__init__(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights) def forward(self, embeddings, extended_attention_mask): encoded_layers = self.encoder(embeddings, extended_attention_mask) sequence_output = encoded_layers[-1] pooled_output = self.pooler(sequence_output) return pooled_output class BertForSequenceClassificationFromEmbeddings(BertPreTrainedModel): def __init__(self, config, num_labels=2): super(BertForSequenceClassificationFromEmbeddings, self).__init__(config) self.num_labels = num_labels self.bert = BertModelFromEmbeddings(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_labels) self.linear_in = nn.Linear(config.embedding_size, config.hidden_size) self.layer_norm = config.layer_norm if hasattr(config, "layer_norm") and config.layer_norm == "no_var": self.LayerNorm = BertLayerNormNoVar(config.embedding_size, eps=1e-12) else: self.LayerNorm = BertLayerNorm(config.embedding_size, eps=1e-12) self.apply(self.init_bert_weights) def forward(self, embeddings, extended_attention_mask): embeddings = self.linear_in(embeddings) if self.layer_norm == "no": pass else: embeddings = self.LayerNorm(embeddings) embeddings = self.dropout(embeddings) pooled_output = self.bert(embeddings, extended_attention_mask) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) return logits class BertForSequenceClassification(BertPreTrainedModel): def __init__(self, config, num_labels=2, glove=None, vocab=None): super(BertForSequenceClassification, self).__init__(config) self.model_from_embeddings = BertForSequenceClassificationFromEmbeddings( config, num_labels ) self.num_labels = num_labels self.embeddings = BertEmbeddings(config, glove=glove, vocab=vocab) self.apply(self.init_bert_weights) def forward(self, input_ids, token_type_ids=None, attention_mask=None, embed_only=False): if attention_mask is None: attention_mask = torch.ones_like(input_ids) if token_type_ids is None: token_type_ids = torch.zeros_like(input_ids) extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) extended_attention_mask = extended_attention_mask.to(dtype=next(self.parameters()).dtype) # fp16 compatibility extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 embeddings = self.embeddings(input_ids, token_type_ids) if embed_only: return embeddings, extended_attention_mask logits = self.model_from_embeddings(embeddings, extended_attention_mask) return logits ================================================ FILE: examples/language/Transformer/utils.py ================================================ # coding=utf-8 # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. # Copyright (c) 2018, NVIDIA CORPORATION. All rights rved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from language_utils import tokenize, token_to_id class InputExample(object): def __init__(self, guid, text_a, text_b=None, label=None): self.guid = guid self.text_a = text_a self.text_b = text_b self.label = label class InputFeatures(object): def __init__(self, input_ids, input_mask, segment_ids, label_id, tokens): self.input_ids = input_ids self.input_mask = input_mask self.segment_ids = segment_ids self.label_id = label_id self.tokens = tokens def convert_examples_to_features(examples, label_list, max_seq_length, vocab, drop_unk=False): #tokenizer): """Loads a data file into a list of `InputBatch`s.""" features = [] all_tokens = tokenize(examples, vocab, max_seq_length - 2, drop_unk=drop_unk) for i in range(len(all_tokens)): all_tokens[i] = ["[CLS]"] + all_tokens[i] + ["[SEP]"] all_ids = token_to_id(all_tokens, vocab) max_seq_length = min(max_seq_length, max([len(tokens) for tokens in all_tokens])) for (ex_index, example) in enumerate(examples): tokens = all_tokens[ex_index] segment_ids = [0] * len(tokens) input_ids = all_ids[ex_index] input_mask = [1] * len(input_ids) padding = [0] * (max_seq_length - len(input_ids)) input_ids += padding input_mask += padding segment_ids += padding assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length assert len(segment_ids) == max_seq_length features.append(InputFeatures( input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, label_id=example["label"], tokens=tokens)) return features ================================================ FILE: examples/language/data_utils.py ================================================ import random import json from auto_LiRPA.utils import logger def load_data_sst(): data = [] for split in ['train_all_nodes', 'train', 'dev', 'test']: with open('data/sst/{}.json'.format(split)) as file: data.append(json.loads(file.read())) return data def load_data(dataset): if dataset == "sst": return load_data_sst() else: raise NotImplementedError('Unknown dataset {}'.format(dataset)) def clean_data(data): return [example for example in data if example['candidates'] is not None] def get_batches(data, batch_size): batches = [] random.shuffle(data) for i in range((len(data) + batch_size - 1) // batch_size): batches.append(data[i * batch_size : (i + 1) * batch_size]) return batches ================================================ FILE: examples/language/language_utils.py ================================================ from auto_LiRPA.utils import logger import numpy as np def build_vocab(data_train, min_word_freq, dump=False, include=[]): vocab = { '[PAD]': 0, '[UNK]': 1, '[CLS]': 2, '[SEP]': 3, '[MASK]': 4 } cnt = {} for example in data_train: for token in example['sentence'].strip().lower().split(): if token in cnt: cnt[token] += 1 else: cnt[token] = 1 for w in cnt: if cnt[w] >= min_word_freq or w in include: vocab[w] = len(vocab) logger.info('Vocabulary size: {}'.format(len(vocab))) if dump: with open('tmp/vocab.txt', 'w') as file: for w in vocab.keys(): file.write('{}\n'.format(w)) return vocab def tokenize(batch, vocab, max_seq_length, drop_unk=False): res = [] for example in batch: t = example['sentence'].strip().lower().split(' ') if drop_unk: tokens = [w for w in t if w in vocab][:max_seq_length] else: tokens = [] for token in t[:max_seq_length]: if token in vocab: tokens.append(token) else: tokens.append('[UNK]') res.append(tokens) return res def token_to_id(tokens, vocab): ids = [] for t in tokens: ids.append([vocab[w] for w in t]) return ids ================================================ FILE: examples/language/lstm.py ================================================ import os import shutil import torch import torch.nn as nn import torch.nn.functional as F from auto_LiRPA.utils import logger from language_utils import build_vocab class LSTMFromEmbeddings(nn.Module): def __init__(self, args, vocab_size): super(LSTMFromEmbeddings, self).__init__() self.embedding_size = args.embedding_size self.hidden_size = args.hidden_size self.num_classes = args.num_classes self.device = args.device self.cell_f = nn.LSTMCell(self.embedding_size, self.hidden_size) self.cell_b = nn.LSTMCell(self.embedding_size, self.hidden_size) self.linear = nn.Linear(self.hidden_size * 2, self.num_classes) if args.dropout is not None: self.dropout = nn.Dropout(p=args.dropout) logger.info('LSTM dropout: {}'.format(args.dropout)) else: self.dropout = None def forward(self, embeddings, mask): if self.dropout is not None: embeddings = self.dropout(embeddings) embeddings = embeddings * mask.unsqueeze(-1) batch_size = embeddings.shape[0] length = embeddings.shape[1] h_f = torch.zeros(batch_size, self.hidden_size).to(embeddings.device) c_f = h_f.clone() h_b, c_b = h_f.clone(), c_f.clone() h_f_sum, h_b_sum = h_f.clone(), h_b.clone() for i in range(length): h_f, c_f = self.cell_f(embeddings[:, i], (h_f, c_f)) h_b, c_b = self.cell_b(embeddings[:, length - i - 1], (h_b, c_b)) h_f_sum = h_f_sum + h_f h_b_sum = h_b_sum + h_b states = torch.cat([h_f_sum / float(length), h_b_sum / float(length)], dim=-1) logits = self.linear(states) return logits class LSTM(nn.Module): def __init__(self, args, data_train): super(LSTM, self).__init__() self.args = args self.embedding_size = args.embedding_size self.max_seq_length = args.max_sent_length self.min_word_freq = args.min_word_freq self.device = args.device self.lr = args.lr self.dir = args.dir if not os.path.exists(self.dir): os.makedirs(self.dir) self.vocab = self.vocab_actual = build_vocab(data_train, args.min_word_freq) self.checkpoint = 0 if args.load: ckpt = torch.load(args.load, map_location=torch.device(self.device)) self.embedding = torch.nn.Embedding(len(self.vocab), self.embedding_size) self.model_from_embeddings = LSTMFromEmbeddings(args, len(self.vocab)) self.model = self.embedding, LSTMFromEmbeddings(args, len(self.vocab)) self.embedding.load_state_dict(ckpt['state_dict_embedding']) self.model_from_embeddings.load_state_dict(ckpt['state_dict_model_from_embeddings']) self.checkpoint = ckpt['epoch'] else: self.embedding = torch.nn.Embedding(len(self.vocab), self.embedding_size) self.model_from_embeddings = LSTMFromEmbeddings(args, len(self.vocab)) self.model = self.embedding, LSTMFromEmbeddings(args, len(self.vocab)) logger.info("Model initialized") self.embedding = self.embedding.to(self.device) self.model_from_embeddings = self.model_from_embeddings.to(self.device) self.word_embeddings = self.embedding def save(self, epoch): path = os.path.join(self.dir, 'ckpt_{}'.format(epoch)) torch.save({ 'state_dict_embedding': self.embedding.state_dict(), 'state_dict_model_from_embeddings': self.model_from_embeddings.state_dict(), 'epoch': epoch }, path) logger.info('LSTM saved: {}'.format(path)) def build_optimizer(self): self.model = (self.model[0], self.model_from_embeddings) param_group = [] for m in self.model: for p in m.named_parameters(): param_group.append(p) param_group = [{"params": [p[1] for p in param_group], "weight_decay": 0.}] return torch.optim.Adam(param_group, lr=self.lr) def get_input(self, batch): mask, tokens = [], [] for example in batch: _tokens = [] for token in example["sentence"].strip().lower().split(' ')[:self.max_seq_length]: if token in self.vocab: _tokens.append(token) else: _tokens.append("[UNK]") tokens.append(_tokens) max_seq_length = max([len(t) for t in tokens]) token_ids = [] for t in tokens: ids = [self.vocab[w] for w in t] mask.append(torch.cat([ torch.ones(1, len(ids)), torch.zeros(1, self.max_seq_length - len(ids)) ], dim=-1).to(self.device)) ids += [self.vocab["[PAD]"]] * (self.max_seq_length - len(ids)) token_ids.append(ids) embeddings = self.embedding(torch.tensor(token_ids, dtype=torch.long).to(self.device)) mask = torch.cat(mask, dim=0) label_ids = torch.tensor([example["label"] for example in batch]).to(self.device) return embeddings, mask, tokens, label_ids def train(self): self.model_from_embeddings.train() def eval(self): self.model_from_embeddings.eval() ================================================ FILE: examples/language/oracle.py ================================================ import torch from auto_LiRPA.utils import logger from auto_LiRPA import PerturbationSynonym from data_utils import get_batches def oracle(args, model, ptb, data, type): logger.info('Running oracle for {}'.format(type)) model.eval() assert(isinstance(ptb, PerturbationSynonym)) cnt_cor = 0 word_embeddings = model.word_embeddings.weight vocab = model.vocab for t, example in enumerate(data): embeddings, mask, tokens, label_ids = model.get_input([example]) candidates = example['candidates'] if tokens[0][0] == '[CLS]': candidates = [[]] + candidates + [[]] embeddings_all = [] def dfs(tokens, embeddings, budget, index): if index == len(tokens): embeddings_all.append(embeddings.cpu()) return dfs(tokens, embeddings, budget, index + 1) if budget > 0 and tokens[index] != '[UNK]' and len(candidates[index]) > 0\ and tokens[index] == candidates[index][0]: for w in candidates[index][1:]: if w in vocab: _embeddings = torch.cat([ embeddings[:index], word_embeddings[vocab[w]].unsqueeze(0), embeddings[index + 1:] ], dim=0) dfs(tokens, _embeddings, budget - 1, index + 1) dfs(tokens[0], embeddings[0], ptb.budget, 0) cor = True for embeddings in get_batches(embeddings_all, args.oracle_batch_size): embeddings_tensor = torch.cat(embeddings).cuda().reshape(len(embeddings), *embeddings[0].shape) logits = model.model_from_embeddings(embeddings_tensor, mask) for pred in list(torch.argmax(logits, dim=1)): if pred != example['label']: cor = False if not cor: break cnt_cor += cor if (t + 1) % args.log_interval == 0: logger.info('{} {}/{}: oracle robust acc {:.3f}'.format(type, t + 1, len(data), cnt_cor * 1. / (t + 1))) logger.info('{}: oracle robust acc {:.3f}'.format(type, cnt_cor * 1. / (t + 1))) ================================================ FILE: examples/language/preprocess/pre_compute_lm_scores.py ================================================ # Ref: https://worksheets.codalab.org/rest/bundles/0x3f614472f4a14393b3d85d5568114591/contents/blob/precompute_lm_scores.py """Precompute language model scores.""" import argparse import json import os import sys import torch from tqdm import tqdm from data_utils import load_data sys.path.insert(0, 'tmp/windweller-l2w/adaptive_softmax') import query as lmquery OPTS = None def parse_args(): parser = argparse.ArgumentParser('Insert a description of this script.') parser.add_argument('--data', type=str, default='sst') parser.add_argument('--out', default='tmp') parser.add_argument('--window-radius', '-w', type=int, default=6) parser.add_argument('--neighbor-file', type=str, default='tmp/synonyms.json') return parser.parse_args() def main(): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') query_handler = lmquery.load_model(device) with open(OPTS.neighbor_file) as f: neighbors = json.load(f) data_train_warmup, data_train, data_dev, data_test = load_data(OPTS.data) split = [('train', data_train), ('dev', data_dev), ('test', data_test)] for s in split: data = s[1] out_file = os.path.join(OPTS.out, '{}_lm_scores.txt'.format(s[0])) with open(out_file, 'w') as f: for sent_idx, example in enumerate(tqdm(data)): sentence = example["sentence"] print('%d\t%s' % (sent_idx, sentence), file=f) words = sentence.lower().strip().split(' ') for i, w in enumerate(words): if w in neighbors: options = [w] + neighbors[w] start = max(0, i - OPTS.window_radius) end = min(len(words), i + 1 + OPTS.window_radius) # Remove OOV words from prefix and suffix prefix = [x for x in words[start:i] if x in query_handler.word_to_idx] suffix = [x for x in words[i+1:end] if x in query_handler.word_to_idx] queries = [] in_vocab_options = [] for opt in options: if opt in query_handler.word_to_idx: queries.append(prefix + [opt] + suffix) in_vocab_options.append(opt) else: print('%d\t%d\t%s\t%s' % (sent_idx, i, opt, float('-inf')), file=f) if queries: log_probs = query_handler.query(queries, batch_size=16) for x, lp in zip(in_vocab_options, log_probs): print('%d\t%d\t%s\t%s' % (sent_idx, i, x, lp), file=f) f.flush() if __name__ == '__main__': OPTS = parse_args() main() ================================================ FILE: examples/language/preprocess/preprocess_sst.py ================================================ import random, json def load_data_sst(): # training data path = "train-nodes.tsv" data_train_all_nodes = [] with open(path) as file: for line in file.readlines()[1:]: data_train_all_nodes.append({ "sentence": line.split("\t")[0], "label": int(line.split("\t")[1]) }) # train/dev/test data for subset in ["train", "dev", "test"]: path = "{}.txt".format(subset) data = [] with open(path) as file: for line in file.readlines(): segs = line[:-1].split(" ") tokens, word_labels = [], [] label = int(segs[0][1]) if label < 2: label = 0 elif label >= 3: label = 1 else: continue for i in range(len(segs) - 1): if segs[i][0] == "(" and segs[i][1] in ["0", "1", "2", "3", "4"]\ and segs[i + 1][0] != "(": tokens.append(segs[i + 1][:segs[i + 1].find(")")]) word_labels.append(int(segs[i][1])) data.append({ "label": label, "sentence": " ".join(tokens), "word_labels": word_labels }) if subset == "train": data_train = data elif subset == "dev": data_dev = data else: data_test = data return data_train_all_nodes, data_train, data_dev, data_test def read_scores(split): res = {} with open('{}_lm_scores.txt'.format(split)) as file: line = file.readline().strip().split('\t') while True: if len(line) < 2: break sentence = line[-1] tokens = sentence.lower().split(' ') candidates = [[] for i in range(len(tokens))] while True: line = file.readline().strip().split('\t') if len(line) != 4: break pos, word, score = int(line[1]), line[2], float(line[3]) if score == float('-inf'): continue if len(candidates[pos]) == 0: if word != tokens[pos]: continue elif score < candidates[pos][0][1] - 5.0: continue candidates[pos].append((word, score)) res[sentence] = [[w[0] for w in cand] for cand in candidates] return res data_train_all_nodes, data_train, data_dev, data_test = load_data_sst() candidates_dev = read_scores('dev') candidates_test = read_scores('test') for example in data_dev: example['candidates'] = candidates_dev[example['sentence']] for example in data_test: example['candidates'] = candidates_test[example['sentence']] with open('train_all_nodes.json', 'w') as file: file.write(json.dumps(data_train_all_nodes)) with open('train.json', 'w') as file: file.write(json.dumps(data_train)) with open('dev.json', 'w') as file: file.write(json.dumps(data_dev)) with open('test.json', 'w') as file: file.write(json.dumps(data_test)) ================================================ FILE: examples/language/train.py ================================================ import argparse import random import pickle import os import pdb import time import logging import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.nn import CrossEntropyLoss from torch.utils.tensorboard import SummaryWriter from auto_LiRPA import BoundedModule, BoundedTensor, PerturbationSynonym, CrossEntropyWrapperMultiInput from auto_LiRPA.utils import MultiAverageMeter, logger, scale_gradients from auto_LiRPA.eps_scheduler import * from Transformer.Transformer import Transformer from lstm import LSTM from data_utils import load_data, clean_data, get_batches from oracle import oracle parser = argparse.ArgumentParser() parser.add_argument('--train', action='store_true') parser.add_argument('--robust', action='store_true') parser.add_argument('--oracle', action='store_true') parser.add_argument('--dir', type=str, default='model') parser.add_argument('--checkpoint', type=int, default=None) parser.add_argument('--data', type=str, default='sst', choices=['sst']) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--device', type=str, default='cuda', choices=['cuda', 'cpu']) parser.add_argument('--load', type=str, default=None) parser.add_argument('--legacy_loading', action='store_true', help='use a deprecated way of loading checkpoints for previously saved models') parser.add_argument('--auto_test', action='store_true') parser.add_argument('--eps', type=float, default=1.0) parser.add_argument('--budget', type=int, default=6) parser.add_argument('--method', type=str, default=None, choices=['IBP', 'IBP+backward', 'IBP+backward_train', 'forward', 'forward+backward']) parser.add_argument('--model', type=str, default='transformer', choices=['transformer', 'lstm']) parser.add_argument('--num_epochs', type=int, default=25) parser.add_argument('--num_epochs_all_nodes', type=int, default=20) parser.add_argument('--eps_start', type=int, default=1) parser.add_argument('--eps_length', type=int, default=10) parser.add_argument('--log_interval', type=int, default=100) parser.add_argument('--min_word_freq', type=int, default=2) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--oracle_batch_size', type=int, default=1024) parser.add_argument('--gradient_accumulation_steps', type=int, default=1) parser.add_argument('--max_sent_length', type=int, default=32) parser.add_argument('--vocab_size', type=int, default=50000) parser.add_argument('--lr', type=float, default=1e-4) parser.add_argument('--lr_decay', type=float, default=1) parser.add_argument('--grad_clip', type=float, default=10.0) parser.add_argument('--num_classes', type=int, default=2) parser.add_argument('--num_layers', type=int, default=1) parser.add_argument('--num_attention_heads', type=int, default=4) parser.add_argument('--hidden_size', type=int, default=64) parser.add_argument('--embedding_size', type=int, default=64) parser.add_argument('--intermediate_size', type=int, default=128) parser.add_argument('--drop_unk', action='store_true') parser.add_argument('--hidden_act', type=str, default='relu') parser.add_argument('--layer_norm', type=str, default='no_var', choices=['standard', 'no', 'no_var']) parser.add_argument('--loss_fusion', action='store_true') parser.add_argument('--dropout', type=float, default=0.1) parser.add_argument('--bound_opts_relu', type=str, default='zero-lb') args = parser.parse_args() writer = SummaryWriter(os.path.join(args.dir, 'log'), flush_secs=10) file_handler = logging.FileHandler(os.path.join(args.dir, 'log/train.log')) file_handler.setFormatter(logging.Formatter('%(levelname)-8s %(asctime)-12s %(message)s')) logger.addHandler(file_handler) data_train_all_nodes, data_train, data_dev, data_test = load_data(args.data) if args.robust: data_dev, data_test = clean_data(data_dev), clean_data(data_test) if args.auto_test: random.seed(args.seed) random.shuffle(data_test) data_test = data_test[:10] assert args.batch_size >= 10 # Use double precision and deterministic algorithm for automatic testing. os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' torch.use_deterministic_algorithms(True) torch.set_default_dtype(torch.float64) logger.info('Dataset sizes: {}/{}/{}/{}'.format( len(data_train_all_nodes), len(data_train), len(data_dev), len(data_test))) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) dummy_embeddings = torch.zeros(1, args.max_sent_length, args.embedding_size, device=args.device) dummy_labels = torch.zeros(1, dtype=torch.long, device=args.device) if args.model == 'transformer': dummy_mask = torch.zeros(1, 1, 1, args.max_sent_length, device=args.device) model = Transformer(args, data_train) elif args.model == 'lstm': dummy_mask = torch.zeros(1, args.max_sent_length, device=args.device) model = LSTM(args, data_train) dev_batches = get_batches(data_dev, args.batch_size) test_batches = get_batches(data_test, args.batch_size) ptb = PerturbationSynonym(budget=args.budget) dummy_embeddings = BoundedTensor(dummy_embeddings, ptb) model_ori = model.model_from_embeddings bound_opts = { 'activation_bound_option': args.bound_opts_relu, 'exp': 'no-max-input', 'fixed_reducemax_index': True } if isinstance(model_ori, BoundedModule): model_bound = model_ori else: model_bound = BoundedModule( model_ori, (dummy_embeddings, dummy_mask), bound_opts=bound_opts, device=args.device) model.model_from_embeddings = model_bound if args.loss_fusion: bound_opts['loss_fusion'] = True model_loss = BoundedModule( CrossEntropyWrapperMultiInput(model_ori), (torch.zeros(1, dtype=torch.long), dummy_embeddings, dummy_mask), bound_opts=bound_opts, device=args.device) ptb.model = model optimizer = model.build_optimizer() if args.lr_decay < 1: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=args.lr_decay) else: lr_scheduler = None if args.robust: eps_scheduler = LinearScheduler(args.eps, 'start={},length={}'.format(args.eps_start, args.eps_length)) for i in range(model.checkpoint): eps_scheduler.step_epoch(verbose=False) else: eps_scheduler = None logger.info('Model converted to support bounds') def step(model, ptb, batch, eps=1.0, train=False): model_bound = model.model_from_embeddings if train: model.train() model_bound.train() grad = torch.enable_grad() if args.loss_fusion: model_loss.train() else: model.eval() model_bound.eval() grad = torch.no_grad() if args.auto_test: grad = torch.enable_grad() with grad: ptb.set_eps(eps) ptb.set_train(train) embeddings_unbounded, mask, tokens, labels = model.get_input(batch) aux = (tokens, batch) if args.robust and eps > 1e-9: embeddings = BoundedTensor(embeddings_unbounded, ptb) else: embeddings = embeddings_unbounded.detach().requires_grad_(True) robust = args.robust and eps > 1e-6 if train and robust and args.loss_fusion: # loss_fusion loss if args.method == 'IBP+backward_train': lb, ub = model_loss.compute_bounds( x=(labels, embeddings, mask), aux=aux, C=None, method='IBP+backward', bound_lower=False) else: raise NotImplementedError loss_robust = torch.log(ub).mean() loss = acc = acc_robust = -1 # unknown else: # regular loss logits = model_bound(embeddings, mask) loss = CrossEntropyLoss()(logits, labels) acc = (torch.argmax(logits, dim=1) == labels).float().mean() if robust: num_class = args.num_classes c = torch.eye(num_class).type_as(embeddings)[labels].unsqueeze(1) - \ torch.eye(num_class).type_as(embeddings).unsqueeze(0) I = (~(labels.data.unsqueeze(1) == torch.arange(num_class).type_as(labels.data).unsqueeze(0))) c = (c[I].view(embeddings.size(0), num_class - 1, num_class)) if args.method in ['IBP', 'IBP+backward', 'forward', 'forward+backward']: lb, ub = model_bound.compute_bounds(aux=aux, C=c, method=args.method, bound_upper=False) elif args.method == 'IBP+backward_train': # CROWN-IBP if 1 - eps > 1e-4: lb, ub = model_bound.compute_bounds(aux=aux, C=c, method='IBP+backward', bound_upper=False) ilb, iub = model_bound.compute_bounds(aux=aux, C=c, method='IBP', reuse_ibp=True) lb = eps * ilb + (1 - eps) * lb else: lb, ub = model_bound.compute_bounds(aux=aux, C=c, method='IBP') else: raise NotImplementedError lb_padded = torch.cat((torch.zeros(size=(lb.size(0),1), dtype=lb.dtype, device=lb.device), lb), dim=1) fake_labels = torch.zeros(size=(lb.size(0),), dtype=torch.int64, device=lb.device) loss_robust = robust_ce = CrossEntropyLoss()(-lb_padded, fake_labels) acc_robust = 1 - torch.mean((lb < 0).any(dim=1).float()) else: acc_robust, loss_robust = acc, loss if train or args.auto_test: loss_robust.backward() grad_embed = torch.autograd.grad( embeddings_unbounded, model.word_embeddings.weight, grad_outputs=embeddings.grad)[0] if model.word_embeddings.weight.grad is None: model.word_embeddings.weight.grad = grad_embed else: model.word_embeddings.weight.grad += grad_embed if args.auto_test: print('Saving results for automated tests.') print(f'acc={acc}, loss={loss}, robust_acc={acc_robust}, robust_loss={loss_robust}') print('gradients:') print(grad_embed) with open('res_test.pkl', 'wb') as file: pickle.dump(( float(acc), float(loss), float(acc_robust), float(loss_robust), grad_embed.detach().numpy()), file) return acc, loss, acc_robust, loss_robust def train(epoch, batches, type): meter = MultiAverageMeter() assert(optimizer is not None) train = type == 'train' if args.robust: eps_scheduler.set_epoch_length(len(batches)) if train: eps_scheduler.train() eps_scheduler.step_epoch() else: eps_scheduler.eval() for i, batch in enumerate(batches): if args.robust: eps_scheduler.step_batch() eps = eps_scheduler.get_eps() else: eps = 0 acc, loss, acc_robust, loss_robust = step( model, ptb, batch, eps=eps, train=train) meter.update('acc', acc, len(batch)) meter.update('loss', loss, len(batch)) meter.update('acc_rob', acc_robust, len(batch)) meter.update('loss_rob', loss_robust, len(batch)) if train: if (i + 1) % args.gradient_accumulation_steps == 0 or (i + 1) == len(batches): scale_gradients(optimizer, i % args.gradient_accumulation_steps + 1, args.grad_clip) optimizer.step() optimizer.zero_grad() if lr_scheduler is not None: lr_scheduler.step() writer.add_scalar('loss_train_{}'.format(epoch), meter.avg('loss'), i + 1) writer.add_scalar('loss_robust_train_{}'.format(epoch), meter.avg('loss_rob'), i + 1) writer.add_scalar('acc_train_{}'.format(epoch), meter.avg('acc'), i + 1) writer.add_scalar('acc_robust_train_{}'.format(epoch), meter.avg('acc_rob'), i + 1) if (i + 1) % args.log_interval == 0 or (i + 1) == len(batches): logger.info('Epoch {}, {} step {}/{}: eps {:.5f}, {}'.format( epoch, type, i + 1, len(batches), eps, meter)) if lr_scheduler is not None: logger.info('lr {}'.format(lr_scheduler.get_lr())) writer.add_scalar('loss/{}'.format(type), meter.avg('loss'), epoch) writer.add_scalar('loss_robust/{}'.format(type), meter.avg('loss_rob'), epoch) writer.add_scalar('acc/{}'.format(type), meter.avg('acc'), epoch) writer.add_scalar('acc_robust/{}'.format(type), meter.avg('acc_rob'), epoch) if train: if args.loss_fusion: state_dict_loss = model_loss.state_dict() state_dict = {} for name in state_dict_loss: assert(name.startswith('model.')) state_dict[name[6:]] = state_dict_loss[name] model_ori.load_state_dict(state_dict) model_bound = BoundedModule( model_ori, (dummy_embeddings, dummy_mask), bound_opts=bound_opts, device=args.device) model.model_from_embeddings = model_bound model.save(epoch) return meter.avg('acc_rob') def main(): if args.train: for t in range(model.checkpoint, args.num_epochs): if t + 1 <= args.num_epochs_all_nodes: train(t + 1, get_batches(data_train_all_nodes, args.batch_size), 'train') else: train(t + 1, get_batches(data_train, args.batch_size), 'train') train(t + 1, dev_batches, 'dev') train(t + 1, test_batches, 'test') elif args.oracle: oracle(args, model, ptb, data_test, 'test') else: if args.robust: for i in range(args.num_epochs): eps_scheduler.step_epoch(verbose=False) res = [] for i in range(1, args.budget + 1): logger.info('budget {}'.format(i)) ptb.budget = i acc_rob = train(None, test_batches, 'test') res.append(acc_rob) logger.info('Verification results:') for i in range(len(res)): logger.info('budget {} acc_rob {:.3f}'.format(i + 1, res[i])) logger.info(res) else: train(None, test_batches, 'test') if __name__ == '__main__': main() ================================================ FILE: examples/sequence/.gitignore ================================================ model/ data/ ================================================ FILE: examples/sequence/__init__.py ================================================ ================================================ FILE: examples/sequence/data_utils.py ================================================ import random from torchvision import transforms from torchvision.datasets.mnist import MNIST as mnist def load_data(): transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) data_train = mnist("data", train=True, download=True, transform=transform) data_test = mnist("data", train=False, download=True, transform=transform) data_train = [data_train[i] for i in range(len(data_train))] data_test = [data_test[i] for i in range(len(data_test))] return data_train, data_test def get_batches(data, batch_size): batches = [] random.shuffle(data) for i in range((len(data) + batch_size - 1) // batch_size): batches.append(data[i * batch_size : (i + 1) * batch_size]) return batches ================================================ FILE: examples/sequence/lstm.py ================================================ import os import shutil import torch import torch.nn as nn from auto_LiRPA.utils import logger class LSTMCore(nn.Module): def __init__(self, args): super(LSTMCore, self).__init__() self.input_size = args.input_size // args.num_slices self.hidden_size = args.hidden_size self.num_classes = args.num_classes self.device = args.device self.cell_f = nn.LSTMCell(self.input_size, self.hidden_size) self.linear = nn.Linear(self.hidden_size, self.num_classes) def forward(self, X): batch_size, length = X.shape[0], X.shape[1] h_f = torch.zeros(batch_size, self.hidden_size).to(X.device) c_f = h_f.clone() h_f_sum = h_f.clone() for i in range(length): h_f, c_f = self.cell_f(X[:, i], (h_f, c_f)) h_f_sum = h_f_sum + h_f states = h_f_sum / float(length) logits = self.linear(states) return logits class LSTM(nn.Module): def __init__(self, args): super(LSTM, self).__init__() self.args = args self.device = args.device self.lr = args.lr self.num_slices = args.num_slices self.dir = args.dir if not os.path.exists(self.dir): os.makedirs(self.dir) self.checkpoint = 0 self.model = LSTMCore(args) if args.load: self.model.load_state_dict(args.load) logger.info(f"Model loaded: {args.load}") else: logger.info("Model initialized") self.model = self.model.to(self.device) self.core = self.model def save(self, epoch): output_dir = os.path.join(self.dir, "ckpt-%d" % epoch) if os.path.exists(output_dir): shutil.rmtree(output_dir) os.mkdir(output_dir) path = os.path.join(output_dir, "model") torch.save(self.core.state_dict(), path) with open(os.path.join(self.dir, "checkpoint"), "w") as file: file.write(str(epoch)) logger.info("LSTM saved: %s" % output_dir) def build_optimizer(self): param_group = [] for p in self.core.named_parameters(): param_group.append(p) param_group = [{"params": [p[1] for p in param_group], "weight_decay": 0.}] return torch.optim.Adam(param_group, lr=self.lr) def get_input(self, batch): X = torch.cat([example[0].reshape(1, self.num_slices, -1) for example in batch]) y = torch.tensor([example[1] for example in batch], dtype=torch.long) return X.to(self.device), y.to(self.device) def train(self): self.core.train() def eval(self): self.core.eval() ================================================ FILE: examples/sequence/train.py ================================================ import argparse import random import torch import torch.nn as nn import torch.nn.functional as F import numpy as np from lstm import LSTM from data_utils import load_data, get_batches from auto_LiRPA import BoundedModule, BoundedTensor, PerturbationLpNorm from auto_LiRPA.utils import MultiAverageMeter, logger, get_spec_matrix parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=0) parser.add_argument("--load", type=str, default=None) parser.add_argument("--device", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--norm", type=int, default=np.inf) parser.add_argument("--eps", type=float, default=0.1) parser.add_argument("--num_epochs", type=int, default=20) parser.add_argument("--batch_size", type=int, default=512) parser.add_argument("--num_slices", type=int, default=8) parser.add_argument("--hidden_size", type=int, default=256) parser.add_argument("--num_classes", type=int, default=10) parser.add_argument("--input_size", type=int, default=784) parser.add_argument("--lr", type=float, default=1e-2) parser.add_argument("--dir", type=str, default="model", help="directory to load or save the model") parser.add_argument("--num_epochs_warmup", type=int, default=10, help="number of epochs for the warmup stage when eps is linearly increased from 0 to the full value") parser.add_argument("--log_interval", type=int, default=10, help="interval of printing the log during training") args = parser.parse_args() ## Train or test one batch. def step(model, ptb, batch, eps=args.eps, train=False): # We increase the perturbation each batch. ptb.set_eps(eps) # We create a BoundedTensor object with current batch of data. X, y = model.get_input(batch) X = BoundedTensor(X, ptb) logits = model.core(X) # Form the linear speicifications, which are margins of ground truth class and other classes. num_class = args.num_classes c = get_spec_matrix(X, y, num_class) # Compute CROWN-IBP (IBP+backward) bounds for training. We only need the lower bound. # Here we can omit the x=(X,) argument because we have just used X for forward propagation. lb, ub = model.core.compute_bounds(C=c, method='CROWN-IBP', bound_upper=False) # Compute robust cross entropy loss. lb_padded = torch.cat((torch.zeros(size=(lb.size(0),1), dtype=lb.dtype, device=lb.device), lb), dim=1) fake_labels = torch.zeros(size=(lb.size(0),), dtype=torch.int64, device=lb.device) loss = nn.CrossEntropyLoss()(-lb_padded, fake_labels) # Report accuracy and robust accuracy. acc = (torch.argmax(logits, dim=-1) == y).float().mean() acc_robust = 1 - torch.mean((lb < 0).any(dim=1).float()) if train: loss.backward() return acc.detach(), acc_robust.detach(), loss.detach() ## Train one epoch. def train(epoch): meter = MultiAverageMeter() model.train() # Load data for a epoch. train_batches = get_batches(data_train, args.batch_size) eps_inc_per_step = 1.0 / (args.num_epochs_warmup * len(train_batches)) for i, batch in enumerate(train_batches): # We increase eps linearly every batch. eps = args.eps * min(eps_inc_per_step * ((epoch - 1) * len(train_batches) + i + 1), 1.0) # Call the main training loop. acc, acc_robust, loss = step(model, ptb, batch, eps=eps, train=True) # Optimize the loss. torch.nn.utils.clip_grad_norm_(model.core.parameters(), 5.0) optimizer.step() optimizer.zero_grad() meter.set_batch_size(len(batch)) meter.update('acc', acc) meter.update('acc_rob', acc_robust) meter.update('loss', loss) if (i + 1) % args.log_interval == 0: logger.info("Epoch {}, training step {}/{}: {}, eps {:.3f}".format( epoch, i + 1, len(train_batches), meter, eps)) model.save(epoch) ## Test accuracy and robust accuracy. def test(epoch, batches): meter = MultiAverageMeter() model.eval() for batch in batches: acc, acc_robust, loss = step(model, ptb, batch) meter.set_batch_size(len(batch)) meter.update('acc', acc) meter.update('acc_rob', acc_robust) meter.update('loss', loss) logger.info("Epoch {} test: {}".format(epoch, meter)) # Load MNIST dataset logger.info("Loading data...") data_train, data_test = load_data() logger.info("Dataset sizes: {}/{}".format(len(data_train), len(data_test))) test_batches = get_batches(data_test, args.batch_size) # Set all random seeds. random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Create a LSTM sequence classifier. logger.info("Creating LSTM model...") model = LSTM(args).to(args.device) X, y = model.get_input(test_batches[0]) # Create the perturbation object once here, and we can reuse it. ptb = PerturbationLpNorm(norm=args.norm, eps=args.eps) # Convert the LSTM to BoundedModule X = BoundedTensor(X, ptb) model.core = BoundedModule(model.core, (X,), device=args.device) optimizer = model.build_optimizer() # Main training loop. for t in range(model.checkpoint, args.num_epochs): train(t + 1) test(t + 1, test_batches) # If the loaded model has already reached the last epoch, test it directly. if model.checkpoint == args.num_epochs: test(args.num_epochs, test_batches) ================================================ FILE: examples/simple/invprop.py ================================================ """ A toy example for bounding neural network outputs under input perturbations using INVPROP See https://arxiv.org/abs/2302.01404 """ import torch from collections import defaultdict from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm class simple_model(torch.nn.Module): """ A very simple 2-layer neural network for demonstration. """ def __init__(self): super().__init__() # Weights of linear layers. self.w1 = torch.tensor([[1., -1.], [2., -1.]]) self.w2 = torch.tensor([[1., -1.]]) def forward(self, x): # Linear layer. z1 = x.matmul(self.w1.t()) # Relu layer. hz1 = torch.nn.functional.relu(z1) # Linear layer. z2 = hz1.matmul(self.w2.t()) return z2 model = simple_model() # Input x. x = torch.tensor([[1., 1.]]) # Lowe and upper bounds of x. lower = torch.tensor([[-1., -2.]]) upper = torch.tensor([[2., 1.]]) # Compute bounds using LiRPA using the given lower and upper bounds. norm = float("inf") ptb = PerturbationLpNorm(norm = norm, x_L=lower, x_U=upper) bounded_x = BoundedTensor(x, ptb) # INVPROP configuration # apply_output_constraints_to: list of layer names or types to which the output # constraints should be applied. Here, they will be applied to all layers of type # 'BoundMatMul' and 'BoundInput'. To only apply them to specific layers, use their # names, e.g. ['/0', '/z1']. The currently recommended way to get those names is # either to first construct an instance of BoundedModule with arbitrary bound_opts, # print it to stdout and inspect their names manually, or to access the layer names # as lirpa_model.final_node().inputs[0].inputs[0].name # tighten_input_bounds: whether to tighten the input bounds. This will modify the # perturbation of the input. If set, apply_output_constraints_to should contain # 'BoundInput' or the corresponding layer name. Otherwise, this will have no effect. # Similiar, adding 'BoundInput' to apply_output_constraints_to will have no effect # unless tighten_input_bounds is set. # best_of_oc_and_no_oc: Using output constraints may sometimes lead to worse results, # because the optimization might find bad local minima. If this is set to True, # every optimization step will be run twice, once with and once without output # constraints, and the better result will be chosen. # directly_optimize: Usually, only linear layers preceeding non-linear layers are # optimized using output constraints. If you want to optimize a specific layer that # would usually be skipped, add it's name to this list. This is most likely to be # used when preimages should be computed as they might use linear combinations of # the inputs. This requires the use of sequential linear layers. For detailed # examples, see https://github.com/kothasuhas/verify-input # oc_lr: Learning rate for the optimization of output constraints. # share_gammas: Whether neurons in each layer should share the same gamma lirpa_model = BoundedModule(model, torch.empty_like(x), bound_opts={ 'optimize_bound_args': { 'apply_output_constraints_to': ['BoundMatMul', 'BoundInput'], 'tighten_input_bounds': True, 'best_of_oc_and_no_oc': False, 'directly_optimize': [], 'oc_lr': 0.1, 'share_gammas': False, 'iteration': 1000, } }) # To dynamically set the apply_output_constraints_to option, set it to `[]` in the # above code, and then use the following: # lirpa_model.set_bound_opts({ # 'optimize_bound_args': { # 'apply_output_constraints_to': [ # lirpa_model.final_node().inputs[0].inputs[0].inputs[0].name, # lirpa_model.final_node().inputs[0].inputs[0].name, # ] # } # }) # The scalar output must be <= -1 # Constraints have the shape [1, num_constraints, num_output_neurons] # They are treated as conjunctions, i.e., all constraints must be satisfied. lirpa_model.constraints = torch.ones(1,1,1) # Thresholds have the shape [num_constraints] lirpa_model.thresholds = torch.tensor([-1.]) print(f"Original perturbation: x0: [{ptb.x_L[0][0]}, {ptb.x_U[0][0]}], x1: [{ptb.x_L[0][1]}, {ptb.x_U[0][1]}]") lb, ub = lirpa_model.compute_bounds(x=(bounded_x,), method='alpha-CROWN') tightened_ptb = lirpa_model['/0'].perturbation print(f"Tightened perturbation: x0: [{tightened_ptb.x_L[0][0]}, {tightened_ptb.x_U[0][0]}], x1: [{tightened_ptb.x_L[0][1]}, {tightened_ptb.x_U[0][1]}]") # For the bounds without output constraints, refer to toy.py print(f'alpha-CROWN bounds without output constraints: lower=-3, upper=2') print(f'alpha-CROWN bounds with output constraints: lower={lb.item()}, upper={ub.item()}') ================================================ FILE: examples/simple/lp_full.py ================================================ """ A simple example for bounding neural network outputs using LP/MIP solvers. Auto_LiRPA supports constructing LP/MIP optimization formulations (using Gurobi). This example uses LP to solve all intermediate layer bounds and final layer bounds, reflecting the setting in the paper "A Convex Relaxation Barrier to Tight Robustness Verification of Neural Networks". This is sometimes referred to as the LP-Full setting. This is in general, very slow; alpha-CROWN is generally recommended to compute intermediate layer bound rather than LP. Example usage: python lp_full.py --index 0 --norm 2.0 --perturbation 1.0 Here `--index` is the dataset index (MNIST in this example), `--norm` is the Lp perturbation norm used and `--perturbation` is the magnitude of the perturbation added to model input. """ import torch import torch.nn as nn import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from auto_LiRPA.operators import BoundLinear, BoundConv import gurobipy as grb import time import numpy as np import argparse # Help function for generating output matrix. This function used for # generating matrix C to calculate the margin between true class and # the other classes. def build_C(label, classes): """ label: shape (B,). Each label[b] in [0..classes-1]. Return: C: shape (B, classes-1, classes). For each sample b, each row is a "negative class" among [0..classes-1]\{label[b]}. Puts +1 at column=label[b], -1 at each negative class column. """ device = label.device batch_size = label.size(0) # 1) Initialize C = torch.zeros((batch_size, classes-1, classes), device=device) # 2) All class indices # shape: (1, K) -> (B, K) all_cls = torch.arange(classes, device=device).unsqueeze(0).expand(batch_size, -1) # 3) Negative classes only, shape (B, K-1) # mask out the ground-truth mask = all_cls != label.unsqueeze(1) neg_cls = all_cls[mask].view(batch_size, -1) # 4) Scatter +1 at each sample’s ground-truth label # shape needed: (B, K-1, 1) pos_idx = label.unsqueeze(1).expand(-1, classes-1).unsqueeze(-1) C.scatter_(dim=2, index=pos_idx, value=1.0) # 5) Scatter -1 at each row’s negative label # We have (B, K-1) negative labels. For row j in each sample b, neg_cls[b, j] is that row’s negative label row_idx = torch.arange(classes-1, device=device).unsqueeze(0).expand(batch_size, -1) # shape: (B, K-1) # We can do advanced indexing: C[torch.arange(batch_size).unsqueeze(1), row_idx, neg_cls] = -1.0 return C parser = argparse.ArgumentParser() parser.add_argument('--index', default=0, type=int, help='Index of data example (from MNIST dataset).') parser.add_argument('--norm', default='inf', type=str, help='Input perturbation norm.') parser.add_argument('--perturbation', default=0.05, type=float, help='Input perturbation magnitude.') parser.add_argument('--lr', default=0.5, type=float, help='Learning rate for alpha_crown.') parser.add_argument('--iteration', default=30, type=int, help='Iterations for alpha_crown.') args = parser.parse_args() ## Step 1: Define computational graph by implementing forward() # You can create your own model here. model = nn.Sequential( nn.Flatten(), nn.Linear(784, 100), nn.ReLU(), nn.Linear(100, 100), nn.ReLU(), nn.Linear(100, 10) ) # Optionally, load the pretrained weights. checkpoint = torch.load('./models/spectral_NOR_MLP_B.pth', weights_only=True) model.load_state_dict(checkpoint) ## Step 2: Prepare dataset. test_data = torchvision.datasets.MNIST( './data', train=False, download=True, transform=torchvision.transforms.ToTensor()) n_classes = 10 image = test_data.data[args.index].to(torch.float32).unsqueeze(0).unsqueeze(0) / 255.0 true_label = torch.tensor([test_data.targets[args.index]]) ## Step 3: Define perturbation. eps = args.perturbation norm = float(args.norm) # The upper bound and lower bound of mnist dataset is [0,1], # replace the bounds if using other dataset. if norm == float('inf'): x_U = None x_L = None else: x_U = torch.ones_like(image) x_L = torch.zeros_like(image) ptb = PerturbationLpNorm(norm = norm, eps = eps, x_U = x_U, x_L = x_L) print(f'Verification of MNIST data index {args.index} with L{args.norm} perturbation of {args.perturbation}\n') # Here we only use one image as input. image = BoundedTensor(image, ptb) print('Running LP-Full with LPs for all intermediate layers...') start_time = time.time() ## Step 4: Compute the bounds of different methods. # For CROWN/alpha-CROWN, we use the compute_bounds() method. # For LP and MIP, we use the build_solver_module() method. interm_bounds = {} lirpa_model = BoundedModule(model, image, device=image.device) # Store the output shape for each layer first for node in lirpa_model.nodes(): # For each intermediate layers, we first set their bound to be infinity as placeholder. if hasattr(node, 'output_shape'): interm_lb = torch.full(node.output_shape, -float('inf')) interm_ub = torch.full(node.output_shape, float('inf')) interm_bounds[node.name] = [interm_lb, interm_ub] # C is the specification matrix (groundtruth - target class). C = build_C(true_label, classes=n_classes) # Here we assume that the last node is the model output, and we start from intermdiate layers first. # Technically, here we need a topological sort of all model nodes if the computation graph is general. for node in lirpa_model.nodes(): # For simplicity, we assume the model contains linear, conv, and ReLU layers. # We need to calculate the preactivation bounds before each ReLU layer, which are the bounds for linear of conv layers. if isinstance(node, (BoundLinear, BoundConv)): interm_lb = torch.full(node.output_shape, -float('inf')) interm_ub = torch.full(node.output_shape, float('inf')) if node.is_final_node: print(f'Solving LPs for final layer bounds...') # Last node, all intermediate layer bounds have been obtained. # For last node, we need to use the specification matrix C to calculate the bounds on groundtruth - target labels. solver_vars = lirpa_model.build_solver_module(model_type='lp', x=(image,), final_node_name=node.name, interm_bounds=interm_bounds, C=C) lirpa_model.solver_model.setParam('OutputFlag', 0) final_lb = torch.empty(n_classes-1) final_ub = torch.empty(n_classes-1) for i in range(n_classes-1): print(f'Solving class {i}...') # Now you can define objectives based on the variables on the output layer. # And then solve them using gurobi. Here we just output the lower and upper # bounds for each output neuron. # Solve upper bound. lirpa_model.solver_model.setObjective(solver_vars[i], grb.GRB.MAXIMIZE) lirpa_model.solver_model.optimize() # If the solver does not terminate, you will get a NaN. if lirpa_model.solver_model.status == grb.GRB.Status.OPTIMAL: final_ub[i] = lirpa_model.solver_model.objVal # Solve lower bound. lirpa_model.solver_model.setObjective(solver_vars[i], grb.GRB.MINIMIZE) lirpa_model.solver_model.optimize() if lirpa_model.solver_model.status == grb.GRB.Status.OPTIMAL: final_lb[i] = lirpa_model.solver_model.objVal else: print(f'Solving LPs for layer {node.name} intermediate layer bounds...') # Solve intermediate layer bounds, one by one. solver_vars = lirpa_model.build_solver_module(model_type='lp', x=(image,), final_node_name=node.name, interm_bounds=interm_bounds) lirpa_model.solver_model.setParam('OutputFlag', 0) # For linear layer, the solver_vars shape is: (neurons). if isinstance(node, BoundLinear): for i, var in enumerate(solver_vars): lirpa_model.solver_model.setObjective(var, grb.GRB.MAXIMIZE) lirpa_model.solver_model.optimize() if lirpa_model.solver_model.status == grb.GRB.Status.OPTIMAL: interm_ub[0][i] = lirpa_model.solver_model.objVal # Solve lower bound. lirpa_model.solver_model.setObjective(var, grb.GRB.MINIMIZE) lirpa_model.solver_model.optimize() if lirpa_model.solver_model.status == grb.GRB.Status.OPTIMAL: interm_lb[0][i] = lirpa_model.solver_model.objVal # For convolutional layer, the solver_vars shape is (channel, out_w, out_h). elif isinstance(node, BoundConv): for i,channel in enumerate(solver_vars): for j, row in enumerate(channel): for k, var in enumerate(row): lirpa_model.solver_model.setObjective(var, grb.GRB.MAXIMIZE) lirpa_model.solver_model.optimize() if lirpa_model.solver_model.status == grb.GRB.Status.OPTIMAL: interm_ub[0][i][j][k] = lirpa_model.solver_model.objVal # Solve lower bound. lirpa_model.solver_model.setObjective(var, grb.GRB.MINIMIZE) lirpa_model.solver_model.optimize() if lirpa_model.solver_model.status == grb.GRB.Status.OPTIMAL: interm_lb[0][i][j][k] = lirpa_model.solver_model.objVal interm_bounds[node.name] = [interm_lb, interm_ub] print(f'Finished solving layer {node.name} with {len(solver_vars)} neurons') end_time = time.time() lp_time = end_time - start_time print(f'LP-Full time: {lp_time}\n') lirpa_model = BoundedModule(model, torch.empty_like(image), device=image.device) lirpa_model.set_bound_opts({'optimize_bound_args': {'iteration': args.iteration, 'lr_alpha': args.lr}}) start_time = time.time() print(f'Running alpha-CROWN with {args.iteration} iterations and learning rate of {args.lr}...') crown_lb, crown_ub = lirpa_model.compute_bounds(x=(image, ), C=C, method='alpha-CROWN') end_time = time.time() alpha_crown_time = end_time - start_time print(f'alpha-CROWN time: {alpha_crown_time}') # Step 5: output the final results of each method. print(f'\nResults for dataset index: {args.index}') print(f'LP-Full bounds:') for i in range(n_classes-1): if i == true_label.item(): label = i + 1 else: label = i print('{l:8.3f} <= f_{k} - f_{j} <= {u:8.3f}'.format( k=true_label.item(), j=label, l=final_lb[i].item(), u=final_ub[i].item())) # Alpha-CROWN should achieve similar results as LP full but without running any LPs. print(f'\nalpha-CROWN bounds:') for i in range(n_classes-1): if i == true_label.item(): label = i + 1 else: label = i print('{l:8.3f} <= f_{k} - f_{j} <= {u:8.3f}'.format( k=true_label.item(), j=label, l=crown_lb[0][i].item(), u=crown_ub[0][i].item())) print(f'alpha-CROWN bounds and LP-full bounds should be close for Linf norm; ' 'adjust the number of iterations and learning rate when necessary.\n') ================================================ FILE: examples/simple/mip_lp_solver.py ================================================ """ A simple example for bounding neural network outputs using LP/MIP solvers. Auto_LiRPA supports constructing LP/MIP optimization formulations (using Gurobi). This example serves as a skeleton for using the build_solver_module() method to obtain LP/MIP formulations of neural networks. Note that alpha-CROWN is used to calculate intermediate layer bounds for constructing the convex relaxation of ReLU neurons. So we are actually using "alpha-CROWN+MIP" or "alpha-CROWN+LP" here. Calculating intermediate layer bounds using LP/MIP is often impractical due to the high cost. """ import torch import torch.nn as nn import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm import gurobipy as grb ## Step 1: Define computational graph by implementing forward() # You can create your own model here. class mnist_model(nn.Module): def __init__( self, input_size=28*28, hidden_size=128, hidden_size_2=64, output_size=10): super(mnist_model, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.fc2 = nn.Linear(hidden_size, hidden_size_2) self.fc3 = nn.Linear(hidden_size_2, output_size) self.relu = nn.ReLU() def forward(self, x): x = x.view(-1, 784) out = self.fc1(x) out = self.relu(out) out = self.fc2(out) out = self.relu(out) out = self.fc3(out) return out model = mnist_model() # Optionally, load the pretrained weights. checkpoint = torch.load('../vision/pretrained/mnist_fc_3layer.pth') model.load_state_dict(checkpoint) ## Step 2: Prepare dataset. test_data = torchvision.datasets.MNIST( './data', train=False, download=True, transform=torchvision.transforms.ToTensor()) # For illustration we only use 1 image from dataset. N = 1 n_classes = 10 image = test_data.data[:N].view(N, 1, 28, 28) true_label = test_data.targets[:N] image = image.to(torch.float32) / 255.0 ## Step 3: Define perturbation. eps = 0.03 norm = float("inf") ptb = PerturbationLpNorm(norm = norm, eps = eps) # Here we only use one image as input image = BoundedTensor(image[0], ptb) ## Step 4: Compute the bounds of different methods. # For CROWN/alpha-CROWN, we use the compute_bounds() method. # For LP and MIP, we use the build_solver_module() method. result = {} # Note that here 'lp' or 'mip' are essentially 'alpha-CROWN+lp' and 'alpha-CROWN+mip'. # We use alpha-CROWN to calculate all the intermediate layer bounds for LP/MIP, because # using MIP/LP for all intermediate neurons will be very slow. for method in ['alpha-CROWN','lp','mip']: # To get clean results and avoid interference among methods, we create a # new BoundedModule object. However, in your production code please pay # attention that BoundedModule() has high construction overhead. lirpa_model = BoundedModule(model, torch.empty_like(image[0]), device=image.device) # Call alpha-CROWN first, which gives all intermediate layer bounds. lb, ub = lirpa_model.compute_bounds(x=(image,), method='alpha-CROWN') if method != 'alpha-CROWN': lb = torch.full_like(lb, float('nan')) ub = torch.full_like(ub, float('nan')) # Obtain the optimizer (Gurobi) variables for the output layer. # Auto_LiRPA will construct the LP/MIP formulation based on computation graph. # Note that pre-activation bounds are required for using this function. # Preactivation bounds have been computed using alpha-CROWN above. solver_vars = lirpa_model.build_solver_module(model_type=method) # Set some parameters for Gurobi optimizer. lirpa_model.solver_model.setParam('OutputFlag', 0) for i in range(n_classes): print(f'Solving class {i} with method {method}') # Now you can define objectives based on the variables on the output layer. # And then solve them using gurobi. Here we just output the lower and upper # bounds for each output neuron. # Solve upper bound. lirpa_model.solver_model.setObjective(solver_vars[i], grb.GRB.MAXIMIZE) lirpa_model.solver_model.optimize() # If the solver does not terminate, you will get a NaN. if lirpa_model.solver_model.status == grb.GRB.Status.OPTIMAL: ub[0][i] = lirpa_model.solver_model.objVal # Solve lower bound. lirpa_model.solver_model.setObjective(solver_vars[i], grb.GRB.MINIMIZE) lirpa_model.solver_model.optimize() if lirpa_model.solver_model.status == grb.GRB.Status.OPTIMAL: lb[0][i] = lirpa_model.solver_model.objVal result[method] = (lb, ub) # Step 5: output the final results of each method. for method in result.keys(): print(f'Bounding method: {method}') lb, ub = result[method] for i in range(n_classes): print('f_{j}(x_0): {l:8.3f} <= f_{j}(x_0+delta) <= {u:8.3f}'.format( j=i, l=lb[0][i].item(), u=ub[0][i].item())) ================================================ FILE: examples/simple/toy.py ================================================ """ A toy example for bounding neural network outputs under input perturbations. """ import torch from collections import defaultdict from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm class simple_model(torch.nn.Module): """ A very simple 2-layer neural network for demonstration. """ def __init__(self): super().__init__() # Weights of linear layers. self.w1 = torch.tensor([[1., -1.], [2., -1.]]) self.w2 = torch.tensor([[1., -1.]]) def forward(self, x): # Linear layer. z1 = x.matmul(self.w1.t()) # Relu layer. hz1 = torch.nn.functional.relu(z1) # Linear layer. z2 = hz1.matmul(self.w2.t()) return z2 model = simple_model() # Input x. x = torch.tensor([[1., 1.]]) # Lowe and upper bounds of x. lower = torch.tensor([[-1., -2.]]) upper = torch.tensor([[2., 1.]]) # Wrap model with auto_LiRPA for bound computation. # The second parameter is for constructing the trace of the computational graph, # and its content is not important. lirpa_model = BoundedModule(model, torch.empty_like(x)) pred = lirpa_model(x) print(f'Model prediction: {pred.item()}') # Compute bounds using LiRPA using the given lower and upper bounds. norm = float("inf") ptb = PerturbationLpNorm(norm = norm, x_L=lower, x_U=upper) bounded_x = BoundedTensor(x, ptb) # Compute bounds. lb, ub = lirpa_model.compute_bounds(x=(bounded_x,), method='IBP') print(f'IBP bounds: lower={lb.item()}, upper={ub.item()}') lb, ub = lirpa_model.compute_bounds(x=(bounded_x,), method='CROWN') print(f'CROWN bounds: lower={lb.item()}, upper={ub.item()}') # Getting the linear bound coefficients (A matrix). required_A = defaultdict(set) required_A[lirpa_model.output_name[0]].add(lirpa_model.input_name[0]) lb, ub, A = lirpa_model.compute_bounds(x=(bounded_x,), method='CROWN', return_A=True, needed_A_dict=required_A) print('CROWN linear (symbolic) bounds: lA x + lbias <= f(x) <= uA x + ubias, where') print(A[lirpa_model.output_name[0]][lirpa_model.input_name[0]]) # Opimized bounds, which is tighter. lb, ub, A = lirpa_model.compute_bounds(x=(bounded_x,), method='alpha-CROWN', return_A=True, needed_A_dict=required_A) print(f'alpha-CROWN bounds: lower={lb.item()}, upper={ub.item()}') print('alpha-CROWN linear (symbolic) bounds: lA x + lbias <= f(x) <= uA x + ubias, where') print(A[lirpa_model.output_name[0]][lirpa_model.input_name[0]]) ================================================ FILE: examples/vision/.gitignore ================================================ exp exp_inv __pycache__ model_* !model_gurobi.py saved_models config ================================================ FILE: examples/vision/bound_option.py ================================================ """ A simple example for bounding neural network outputs with different bound options on ReLU activation functions. """ import os from collections import defaultdict import torch import torch.nn as nn import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from auto_LiRPA.utils import Flatten ## Step 1: Define computational graph by implementing forward() # This simple model comes from https://github.com/locuslab/convex_adversarial def mnist_model(): model = nn.Sequential( nn.Conv2d(1, 16, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 4, stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(32*7*7,100), nn.ReLU(), nn.Linear(100, 10) ) return model model = mnist_model() # Optionally, load the pretrained weights. checkpoint = torch.load( os.path.join(os.path.dirname(__file__), 'pretrained/mnist_a_adv.pth'), map_location=torch.device('cpu')) model.load_state_dict(checkpoint) ## Step 2: Prepare dataset as usual test_data = torchvision.datasets.MNIST( './data', train=False, download=True, transform=torchvision.transforms.ToTensor()) # For illustration we only use one image from dataset N = 1 n_classes = 10 image = test_data.data[:N].view(N,1,28,28) true_label = test_data.targets[:N] # Convert to float image = image.to(torch.float32) / 255.0 if torch.cuda.is_available(): image = image.cuda() model = model.cuda() ## Step 3: wrap model with auto_LiRPA # Use default bound_option lirpa_model_default = BoundedModule(model, torch.empty_like(image), device=image.device) # Use same-slope option for ReLU functions lirpa_model_sameslope = BoundedModule(model, torch.empty_like(image), device=image.device, bound_opts={'activation_bound_option': 'same-slope'}) print('Running on', image.device) ## Step 4: Compute bounds using LiRPA given a perturbation eps = 0.3 norm = float("inf") ptb = PerturbationLpNorm(norm = norm, eps = eps) image = BoundedTensor(image, ptb) # Get model prediction as usual pred = lirpa_model_default(image) label = torch.argmax(pred, dim=1).cpu().detach().numpy() print() print('Demonstration 1.1: Bound computation and comparisons of different options.') ## Step 5: Compute bounds for final output print('Bounding method:', 'backward (CROWN)') print('Bounding option:', 'Default (adaptive)') lb, ub = lirpa_model_default.compute_bounds(x=(image,), method='backward') for i in range(N): print(f'Image {i} top-1 prediction {label[i]} ground-truth {true_label[i]}') for j in range(n_classes): indicator = '(ground-truth)' if j == true_label[i] else '' print('f_{j}(x_0): {l:8.3f} <= f_{j}(x_0+delta) <= {u:8.3f} {ind}'.format( j=j, l=lb[i][j].item(), u=ub[i][j].item(), ind=indicator)) print() print('Bounding option:', 'same-slope') lb, ub = lirpa_model_sameslope.compute_bounds(x=(image,), method='backward') for i in range(N): print(f'Image {i} top-1 prediction {label[i]} ground-truth {true_label[i]}') for j in range(n_classes): indicator = '(ground-truth)' if j == true_label[i] else '' print('f_{j}(x_0): {l:8.3f} <= f_{j}(x_0+delta) <= {u:8.3f} {ind}'.format( j=j, l=lb[i][j].item(), u=ub[i][j].item(), ind=indicator)) print() print('Demonbstration 1.2: same-slope option is also available with CROWN-Optimized') print('Bounding method:', 'CROWN-Optimized (alpha-CROWN)') print('Bounding option:', 'Default (adaptive)') lb, ub = lirpa_model_default.compute_bounds(x=(image,), method='CROWN-Optimized') for i in range(N): print(f'Image {i} top-1 prediction {label[i]} ground-truth {true_label[i]}') for j in range(n_classes): indicator = '(ground-truth)' if j == true_label[i] else '' print('f_{j}(x_0): {l:8.3f} <= f_{j}(x_0+delta) <= {u:8.3f} {ind}'.format( j=j, l=lb[i][j].item(), u=ub[i][j].item(), ind=indicator)) print() print('Bounding option:', 'same-slope') lb, ub = lirpa_model_sameslope.compute_bounds(x=(image,), method='CROWN-Optimized') for i in range(N): print(f'Image {i} top-1 prediction {label[i]} ground-truth {true_label[i]}') for j in range(n_classes): indicator = '(ground-truth)' if j == true_label[i] else '' print('f_{j}(x_0): {l:8.3f} <= f_{j}(x_0+delta) <= {u:8.3f} {ind}'.format( j=j, l=lb[i][j].item(), u=ub[i][j].item(), ind=indicator)) print() print('Demonstration 2: Obtaining linear coefficients of the lower and upper bounds.') print('With same-slope option, two linear coefficients should be the same.') # There are many bound coefficients during CROWN bound calculation; here we are interested in the linear bounds # of the output layer, with respect to the input layer (the image). required_A = defaultdict(set) required_A[lirpa_model_sameslope.output_name[0]].add(lirpa_model_sameslope.input_name[0]) print("Bounding method:", 'backward') print("Bounding option:", 'same-slope') lb, ub, A_dict = lirpa_model_sameslope.compute_bounds(x=(image,), method='backward', return_A=True, needed_A_dict=required_A) lower_A, lower_bias = A_dict[lirpa_model_sameslope.output_name[0]][lirpa_model_sameslope.input_name[0]]['lA'], A_dict[lirpa_model_sameslope.output_name[0]][lirpa_model_sameslope.input_name[0]]['lbias'] upper_A, upper_bias = A_dict[lirpa_model_sameslope.output_name[0]][lirpa_model_sameslope.input_name[0]]['uA'], A_dict[lirpa_model_sameslope.output_name[0]][lirpa_model_sameslope.input_name[0]]['ubias'] print(f'lower bound linear coefficients size (batch, output_dim, *input_dims): {list(lower_A.size())}') print(f'lower bound bias term size (batch, output_dim): {list(lower_bias.size())}') print(f'upper bound linear coefficients size (batch, output_dim, *input_dims): {list(upper_A.size())}') print(f'upper bound bias term size (batch, output_dim): {list(upper_bias.size())}') print() print(f'lower bound linear coefficients should be the same as upper bound linear coefficients: {(lower_A - upper_A).abs().max() < 1e-5}') print() ================================================ FILE: examples/vision/cifar_training.py ================================================ import argparse import multiprocessing import random import time import logging import os import torch.optim as optim import torchvision.datasets as datasets import torchvision.transforms as transforms from torch.nn import CrossEntropyLoss import models from auto_LiRPA import BoundedModule, BoundedTensor, BoundDataParallel, CrossEntropyWrapper from auto_LiRPA.bound_ops import BoundExp from auto_LiRPA.eps_scheduler import LinearScheduler, SmoothedScheduler, AdaptiveScheduler, FixedScheduler from auto_LiRPA.perturbations import * from auto_LiRPA.utils import MultiAverageMeter, logger, get_spec_matrix, sync_params def get_exp_module(bounded_module): for _, node in bounded_module.named_modules(): # Find the Exp neuron in computational graph if isinstance(node, BoundExp): return node return None parser = argparse.ArgumentParser() parser.add_argument("--verify", action="store_true", help='verification mode, do not train') parser.add_argument("--no_loss_fusion", action="store_true", help='without loss fusion, slower training mode') parser.add_argument("--load", type=str, default="", help='Load pretrained model') parser.add_argument("--device", type=str, default="cuda", choices=["cpu", "cuda"], help='use cpu or cuda') parser.add_argument("--data", type=str, default="CIFAR", choices=["MNIST", "CIFAR"], help='dataset') parser.add_argument("--seed", type=int, default=100, help='random seed') parser.add_argument("--eps", type=float, default=8.8/255, help='Target training epsilon') parser.add_argument("--norm", type=float, default='inf', help='p norm for epsilon perturbation') parser.add_argument("--bound_type", type=str, default="CROWN-IBP", choices=["IBP", "CROWN-IBP", "CROWN"], help='method of bound analysis') parser.add_argument("--model", type=str, default="cnn_7layer_bn", help='model name (Densenet_cifar_32, resnet18, ResNeXt_cifar, MobileNet_cifar, wide_resnet_cifar_bn_wo_pooling)') parser.add_argument("--num_epochs", type=int, default=2000, help='number of total epochs') parser.add_argument("--batch_size", type=int, default=256, help='batch size') parser.add_argument("--lr", type=float, default=5e-4, help='learning rate') parser.add_argument("--lr_decay_rate", type=float, default=0.1, help='learning rate decay rate') parser.add_argument("--lr_decay_milestones", nargs='+', type=int, default=[1400, 1700], help='learning rate dacay milestones') parser.add_argument("--scheduler_name", type=str, default="SmoothedScheduler", choices=["LinearScheduler", "SmoothedScheduler"], help='epsilon scheduler') parser.add_argument("--scheduler_opts", type=str, default="start=101,length=801,mid=0.4", help='options for epsilon scheduler') parser.add_argument("--bound_opts", type=str, default=None, choices=["same-slope", "zero-lb", "one-lb"], help='bound options for relu') parser.add_argument('--clip_grad_norm', type=float, default=8.0) args = parser.parse_args() exp_name = args.model + '_b' + str(args.batch_size) + '_' + str(args.bound_type) + '_epoch' + str(args.num_epochs) + '_' + args.scheduler_opts + '_' + str(args.eps)[:6] os.makedirs('saved_models/', exist_ok=True) log_file = f'saved_models/{exp_name}{"_test" if args.verify else ""}.log' file_handler = logging.FileHandler(log_file) logger.addHandler(file_handler) def Train(model, t, loader, eps_scheduler, norm, train, opt, bound_type, method='robust', loss_fusion=True, final_node_name=None): num_class = 10 meter = MultiAverageMeter() if train: model.train() eps_scheduler.train() eps_scheduler.step_epoch() eps_scheduler.set_epoch_length(int((len(loader.dataset) + loader.batch_size - 1) / loader.batch_size)) else: model.eval() eps_scheduler.eval() exp_module = get_exp_module(model) def get_bound_loss(x=None, c=None): if loss_fusion: bound_lower, bound_upper = False, True else: bound_lower, bound_upper = True, False if bound_type == 'IBP': lb, ub = model(method_opt="compute_bounds", x=x, IBP=True, C=c, method=None, final_node_name=final_node_name, no_replicas=True) elif bound_type == 'CROWN': lb, ub = model(method_opt="compute_bounds", x=x, IBP=False, C=c, method='backward', bound_lower=bound_lower, bound_upper=bound_upper) elif bound_type == 'CROWN-IBP': # lb, ub = model.compute_bounds(ptb=ptb, IBP=True, x=data, C=c, method='backward') # pure IBP bound # we use a mixed IBP and CROWN-IBP bounds, leading to better performance (Zhang et al., ICLR 2020) factor = (eps_scheduler.get_max_eps() - eps_scheduler.get_eps()) / eps_scheduler.get_max_eps() ilb, iub = model(method_opt="compute_bounds", x=x, IBP=True, C=c, method=None, final_node_name=final_node_name, no_replicas=True) if factor < 1e-50: lb, ub = ilb, iub else: clb, cub = model(method_opt="compute_bounds", IBP=False, C=c, method='backward', bound_lower=bound_lower, bound_upper=bound_upper, final_node_name=final_node_name, no_replicas=True) if loss_fusion: ub = cub * factor + iub * (1 - factor) else: lb = clb * factor + ilb * (1 - factor) if loss_fusion: if isinstance(model, BoundDataParallel): max_input = model(get_property=True, node_class=BoundExp, att_name='max_input') else: max_input = exp_module.max_input return None, torch.mean(torch.log(ub) + max_input) else: # Pad zero at the beginning for each example, and use fake label '0' for all examples lb_padded = torch.cat((torch.zeros(size=(lb.size(0), 1), dtype=lb.dtype, device=lb.device), lb), dim=1) fake_labels = torch.zeros(size=(lb.size(0),), dtype=torch.int64, device=lb.device) robust_ce = CrossEntropyLoss()(-lb_padded, fake_labels) return lb, robust_ce for i, (data, labels) in enumerate(loader): start = time.time() eps_scheduler.step_batch() eps = eps_scheduler.get_eps() # For small eps just use natural training, no need to compute LiRPA bounds batch_method = method if eps < 1e-50: batch_method = "natural" if train: opt.zero_grad() # bound input for Linf norm used only if norm == np.inf: data_max = torch.reshape((1. - loader.mean) / loader.std, (1, -1, 1, 1)) data_min = torch.reshape((0. - loader.mean) / loader.std, (1, -1, 1, 1)) data_ub = torch.min(data + (eps / loader.std).view(1,-1,1,1), data_max) data_lb = torch.max(data - (eps / loader.std).view(1,-1,1,1), data_min) else: data_ub = data_lb = data if list(model.parameters())[0].is_cuda: data, labels = data.cuda(), labels.cuda() data_lb, data_ub = data_lb.cuda(), data_ub.cuda() ptb = PerturbationLpNorm(norm=norm, eps=eps, x_L=data_lb, x_U=data_ub) x = BoundedTensor(data, ptb) if loss_fusion: if batch_method == 'natural' or not train: output = model(x, labels) # , disable_multi_gpu=True regular_ce = torch.mean(torch.log(output)) else: model(x, labels) regular_ce = torch.tensor(0., device=data.device) meter.update('CE', regular_ce.item(), x.size(0)) x = (x, labels) c = None else: # Generate speicification matrix (when loss fusion is not used). c = get_spec_matrix(data, labels, num_class) x = (x,) if final_node_name is None else (x, labels) output = model(x, final_node_name=final_node_name) regular_ce = CrossEntropyLoss()(output, labels) # regular CrossEntropyLoss used for warming up meter.update('CE', regular_ce.item(), x[0].size(0)) meter.update('Err', torch.sum(torch.argmax(output, dim=1) != labels).item() / x[0].size(0), x[0].size(0)) if batch_method == 'robust': lb, robust_ce = get_bound_loss(x=x, c=c) loss = robust_ce elif batch_method == 'natural': loss = regular_ce if train: loss.backward() if args.clip_grad_norm: grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.clip_grad_norm) meter.update('grad_norm', grad_norm) if isinstance(eps_scheduler, AdaptiveScheduler): eps_scheduler.update_loss(loss.item() - regular_ce.item()) opt.step() meter.update('Loss', loss.item(), data.size(0)) if batch_method != 'natural': meter.update('Robust_CE', robust_ce.item(), data.size(0)) if not loss_fusion: # For an example, if lower bounds of margins is >0 for all classes, the output is verifiably correct. # If any margin is < 0 this example is counted as an error meter.update('Verified_Err', torch.sum((lb < 0).any(dim=1)).item() / data.size(0), data.size(0)) meter.update('Time', time.time() - start) if (i + 1) % 50 == 0 and train: logger.info('[{:2d}:{:4d}]: eps={:.12f} {}'.format(t, i + 1, eps, meter)) logger.info('[{:2d}:{:4d}]: eps={:.12f} {}'.format(t, i + 1, eps, meter)) return meter def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Step 1: Initial original model as usual, see model details in models/example_feedforward.py and models/example_resnet.py if args.data == 'MNIST': model_ori = models.Models[args.model](in_ch=1, in_dim=28) else: model_ori = models.Models[args.model](in_ch=3, in_dim=32) epoch = 0 if args.load: checkpoint = torch.load(args.load) epoch, state_dict = checkpoint['epoch'], checkpoint['state_dict'] opt_state = None try: opt_state = checkpoint['optimizer'] except KeyError: print('no opt_state found') for k, v in state_dict.items(): assert torch.isnan(v).any().cpu().numpy() == 0 and torch.isinf(v).any().cpu().numpy() == 0 model_ori.load_state_dict(state_dict) logger.info('Checkpoint loaded: {}'.format(args.load)) ## Step 2: Prepare dataset as usual if args.data == 'MNIST': dummy_input = torch.randn(2, 1, 28, 28) train_data = datasets.MNIST("./data", train=True, download=True, transform=transforms.ToTensor()) test_data = datasets.MNIST("./data", train=False, download=True, transform=transforms.ToTensor()) elif args.data == 'CIFAR': dummy_input = torch.randn(2, 3, 32, 32) normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]) train_data = datasets.CIFAR10("./data", train=True, download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4, padding_mode='edge'), transforms.ToTensor(), normalize])) test_data = datasets.CIFAR10("./data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor(), normalize])) train_data = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),4)) test_data = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size//2, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),4)) if args.data == 'MNIST': train_data.mean = test_data.mean = torch.tensor([0.0]) train_data.std = test_data.std = torch.tensor([1.0]) elif args.data == 'CIFAR': train_data.mean = test_data.mean = torch.tensor([0.4914, 0.4822, 0.4465]) train_data.std = test_data.std = torch.tensor([0.2023, 0.1994, 0.2010]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, bound_opts={'activation_bound_option':args.bound_opts}, device=args.device) final_name1 = model.final_name model_loss = BoundedModule(CrossEntropyWrapper(model_ori), (dummy_input, torch.zeros(1, dtype=torch.long)), bound_opts={'activation_bound_option': args.bound_opts, 'loss_fusion': True}, device=args.device) # after CrossEntropyWrapper, the final name will change because of one additional input node in CrossEntropyWrapper final_name2 = model_loss._modules[final_name1].output_name[0] assert type(model._modules[final_name1]) == type(model_loss._modules[final_name2]) if args.no_loss_fusion: model_loss = BoundedModule(model_ori, dummy_input, bound_opts={'activation_bound_option':args.bound_opts}, device=args.device) final_name2 = None model_loss = BoundDataParallel(model_loss) ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler opt = optim.Adam(model_loss.parameters(), lr=args.lr) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=args.lr_decay_milestones, gamma=args.lr_decay_rate) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) logger.info(str(model_ori)) # skip epochs if epoch > 0: epoch_length = int((len(train_data.dataset) + train_data.batch_size - 1) / train_data.batch_size) eps_scheduler.set_epoch_length(epoch_length) eps_scheduler.train() for i in range(epoch): lr_scheduler.step() eps_scheduler.step_epoch(verbose=True) for j in range(epoch_length): eps_scheduler.step_batch() logger.info('resume from eps={:.12f}'.format(eps_scheduler.get_eps())) if args.load: if opt_state: opt.load_state_dict(opt_state) logger.info('resume opt_state') ## Step 5: start training if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model, 1, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False, final_node_name=None) else: timer = 0.0 best_err = 1e10 for t in range(epoch + 1, args.num_epochs+1): logger.info("Epoch {}, learning rate {}".format(t, lr_scheduler.get_last_lr())) start_time = time.time() Train(model_loss, t, train_data, eps_scheduler, norm, True, opt, args.bound_type, loss_fusion=not args.no_loss_fusion) lr_scheduler.step() epoch_time = time.time() - start_time timer += epoch_time logger.info('Epoch time: {:.4f}, Total time: {:.4f}'.format(epoch_time, timer)) logger.info("Evaluating...") torch.cuda.empty_cache() state_dict = sync_params(model_ori, model_loss, loss_fusion=True) with torch.no_grad(): if t > int(eps_scheduler.params['start']) + int(eps_scheduler.params['length']): m = Train(model_loss, t, test_data, FixedScheduler(8./255), norm, False, None, 'IBP', loss_fusion=False, final_node_name=final_name2) else: m = Train(model_loss, t, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False, final_node_name=final_name2) save_dict = {'state_dict': state_dict, 'epoch': t, 'optimizer': opt.state_dict()} if t < int(eps_scheduler.params['start']): torch.save(save_dict, 'saved_models/natural_' + exp_name) elif t > int(eps_scheduler.params['start']) + int(eps_scheduler.params['length']): current_err = m.avg('Verified_Err') if current_err < best_err: best_err = current_err torch.save(save_dict, 'saved_models/' + exp_name + '_best_' + str(best_err)[:6]) else: torch.save(save_dict, 'saved_models/' + exp_name) else: torch.save(save_dict, 'saved_models/' + exp_name) torch.cuda.empty_cache() if __name__ == "__main__": logger.info(args) main(args) ================================================ FILE: examples/vision/custom_op.py ================================================ """ A example for custom operators. In this example, we create a custom operator called "PlusConstant", which can be written as "f(x) = x + c" for some constant "c" (an attribute of the operator). """ import torch import torch.nn as nn import torchvision from auto_LiRPA import BoundedModule, BoundedTensor, register_custom_op from auto_LiRPA.operators import Bound from auto_LiRPA.perturbations import PerturbationLpNorm from auto_LiRPA.utils import Flatten """ Step 1: Define a `torch.autograd.Function` class to declare and implement the computation of the operator. """ class PlusConstantOp(torch.autograd.Function): @staticmethod def symbolic(g, x, const): """ In this function, define the arguments and attributes of the operator. "custom::PlusConstant" is the name of the new operator, "x" is an argument of the operator, "const_i" is an attribute which stands for "c" in the operator. There can be multiple arguments and attributes. For attribute naming, use a suffix such as "_i" to specify the data type, where "_i" stands for integer, "_t" stands for tensor, "_f" stands for float, etc. """ return g.op('custom::PlusConstant', x, const_i=const) @staticmethod def forward(ctx, x, const): """ In this function, implement the computation for the operator, i.e., f(x) = x + c in this case. """ return x + const """ Step 2: Define a `torch.nn.Module` class to declare a module using the defined custom operator. """ class PlusConstant(nn.Module): def __init__(self, const=1): super().__init__() self.const = const def forward(self, x): """ Use `PlusConstantOp.apply` to call the defined custom operator. """ return PlusConstantOp.apply(x, self.const) """ Step 3: Implement a Bound class to support bound computation for the new operator. """ class BoundPlusConstant(Bound): def __init__(self, attr, inputs, output_index, options): """ `const` is an attribute and can be obtained from the dict `attr` """ super().__init__(attr, inputs, output_index, options) self.const = attr['const'] def forward(self, x): return x + self.const def bound_backward(self, last_lA, last_uA, x, *args, **kwargs): """ Backward mode bound propagation """ print('Calling bound_backward for custom::PlusConstant') def _bound_oneside(last_A): # If last_lA or last_uA is None, it means lower or upper bound # is not required, so we simply return None. if last_A is None: return None, 0 # The function f(x) = x + c is a linear function with coefficient 1. # Then A · f(x) = A · (x + c) = A · x + A · c. # Thus the new A matrix is the same as the last A matrix: A = last_A # For bias, compute A · c and reduce the dimensions by sum: bias = last_A.sum(dim=list(range(2, last_A.ndim))) * self.const return A, bias lA, lbias = _bound_oneside(last_lA) uA, ubias = _bound_oneside(last_lA) return [(lA, uA)], lbias, ubias def interval_propagate(self, *v): """ IBP computation """ print('Calling interval_propagate for custom::PlusConstant') # Interval bound of the input h_L, h_U = v[0] # Since this function is monotonic, we can get the lower bound and upper bound # by applying the function on h_L and h_U respectively. lower = h_L + self.const upper = h_U + self.const return lower, upper """ Step 4: Register the custom operator """ register_custom_op("custom::PlusConstant", BoundPlusConstant) # Use the `PlusConstant` module in model definition model = nn.Sequential( Flatten(), nn.Linear(28 * 28, 256), PlusConstant(const=1), nn.Linear(256, 10), ) print("Model:", model) test_data = torchvision.datasets.MNIST("./data", train=False, download=True, transform=torchvision.transforms.ToTensor()) N = 1 n_classes = 10 image = test_data.data[:N].view(N,1,28,28) true_label = test_data.targets[:N] image = image.to(torch.float32) / 255.0 if torch.cuda.is_available(): image = image.cuda() model = model.cuda() lirpa_model = BoundedModule(model, torch.empty_like(image), device=image.device) eps = 0.3 norm = float("inf") ptb = PerturbationLpNorm(norm = norm, eps = eps) image = BoundedTensor(image, ptb) pred = lirpa_model(image) label = torch.argmax(pred, dim=1).cpu().detach().numpy() for method in ['IBP', 'IBP+backward (CROWN-IBP)', 'backward (CROWN)']: print("Bounding method:", method) lb, ub = lirpa_model.compute_bounds(x=(image,), method=method.split()[0]) for i in range(N): print("Image {} top-1 prediction {} ground-truth {}".format(i, label[i], true_label[i])) for j in range(n_classes): indicator = '(ground-truth)' if j == true_label[i] else '' print("f_{j}(x_0): {l:8.3f} <= f_{j}(x_0+delta) <= {u:8.3f} {ind}".format( j=j, l=lb[i][j].item(), u=ub[i][j].item(), ind=indicator)) print() ================================================ FILE: examples/vision/data/.gitignore ================================================ MNIST cifar* ================================================ FILE: examples/vision/data/ImageNet64/imagenet_data_loader.py ================================================ import os import numpy as np from PIL import Image class DatasetDownsampledImageNet(): def __init__(self): # self.data_path = data_path os.mkdir('train') os.mkdir('test') for i in range(1000): os.mkdir('train/' + str(i)) os.mkdir('test/' + str(i)) print(i) self.load_data('raw_data/Imagenet64_train_npz', count=0, fname='train/') self.load_data('raw_data/Imagenet64_val_npz', count=1e8, fname='test/') def load_data(self, data_path, img_size=64, count=0., fname=''): files = os.listdir(data_path) img_size2 = img_size * img_size # count = 0 # 1e8 # test data start with 1 for file in files: f = np.load(data_path + '/' + file) x = np.array(f['data']) y = np.array(f['labels']) - 1 x = np.dstack((x[:, :img_size2], x[:, img_size2:2 * img_size2], x[:, 2 * img_size2:])) x = x.reshape((x.shape[0], img_size, img_size, 3)) for i, img in enumerate(x): img = Image.fromarray(img.reshape(img_size, img_size, 3)) name = str(int(count)).zfill(9) label = str(y[i]) print(count, fname + label + '/' + name + '_label_' + label.zfill(4) + '.png') # img.show() img.save(fname + label + '/' + name + '_label_' + label.zfill(4) + '.png') count += 1 if __name__ == "__main__": DatasetDownsampledImageNet() ================================================ FILE: examples/vision/data/tinyImageNet/.gitignore ================================================ tiny-imagenet-200* ================================================ FILE: examples/vision/data/tinyImageNet/tinyimagenet_download.sh ================================================ #!/bin/bash # download and unzip dataset wget http://cs231n.stanford.edu/tiny-imagenet-200.zip unzip tiny-imagenet-200.zip current="$(pwd)/tiny-imagenet-200" # training data echo "preparing training data..." cd $current/train for DIR in $(ls); do cd $DIR rm *.txt mv images/* . rm -r images cd .. done # validation data echo "preparing validation data..." cd $current/val annotate_file="val_annotations.txt" length=$(cat $annotate_file | wc -l) for i in $(seq 1 $length); do # fetch i th line line=$(sed -n ${i}p $annotate_file) # get file name and directory name file=$(echo $line | cut -f1 -d" " ) directory=$(echo $line | cut -f2 -d" ") mkdir -p $directory mv images/$file $directory done rm -r images echo "done" ================================================ FILE: examples/vision/datasets.py ================================================ import multiprocessing import torch from torch.utils import data from functools import partial import torchvision.transforms as transforms import torchvision.datasets as datasets # compute image statistics (by Andreas https://discuss.pytorch.org/t/computing-the-mean-and-std-of-dataset/34949/4) def get_stats(loader): mean = 0.0 for images, _ in loader: batch_samples = images.size(0) reshaped_img = images.view(batch_samples, images.size(1), -1) mean += reshaped_img.mean(2).sum(0) w = images.size(2) h = images.size(3) mean = mean / len(loader.dataset) var = 0.0 for images, _ in loader: batch_samples = images.size(0) images = images.view(batch_samples, images.size(1), -1) var += ((images - mean.unsqueeze(1))**2).sum([0,2]) std = torch.sqrt(var / (len(loader.dataset)*w*h)) return mean, std # load MNIST of Fashion-MNIST def mnist_loaders(dataset, batch_size, shuffle_train = True, shuffle_test = False, ratio=None, test_batch_size=None): # Use the AWS mirror and avoid the yann.lecun.com mirror. dataset.mirrors = [ 'https://ossci-datasets.s3.amazonaws.com/mnist/', ] mnist_train = dataset("./data", train=True, download=True, transform=transforms.ToTensor()) mnist_test = dataset("./data", train=False, download=True, transform=transforms.ToTensor()) if ratio is not None: # only sample in training data num_of_each_class_train = int(len(mnist_train) // 10 * ratio) # num_of_each_class_test = int(len(mnist_test)//10*ratio) class_idx_train = [(mnist_train.targets == _).nonzero().numpy().squeeze() for _ in range(10)] # class_idx_test = [(mnist_test.targets==_).nonzero().numpy().squeeze() for _ in range(10)] for i in range(len(class_idx_train)): class_idx_train[i] = class_idx_train[i][:num_of_each_class_train] # class_idx_test[i] = class_idx_test[i][:num_of_each_class_test] mnist_train = data.Subset(mnist_train, [y for z in class_idx_train for y in z]) train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=shuffle_train, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),2)) if test_batch_size: batch_size = test_batch_size test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=shuffle_test, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),2)) std = [1.0] train_loader.std = std test_loader.std = std return train_loader, test_loader def cifar_loaders(batch_size, shuffle_train = True, shuffle_test = False, train_random_transform = False, normalize_input = False, num_examples = None, test_batch_size=None): if normalize_input: std = [0.2023, 0.1994, 0.2010] normalize = transforms.Normalize(mean = [0.4914, 0.4822, 0.4465], std = std) else: std = [1.0, 1.0, 1.0] normalize = transforms.Normalize(mean=[0, 0, 0], std=std) if train_random_transform: if normalize_input: train = datasets.CIFAR10('./data', train=True, download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), normalize, ])) else: train = datasets.CIFAR10('./data', train=True, download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), ])) else: train = datasets.CIFAR10('./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor(),normalize])) test = datasets.CIFAR10('./data', train=False, transform=transforms.Compose([transforms.ToTensor(), normalize])) if num_examples: indices = list(range(num_examples)) train = data.Subset(train, indices) test = data.Subset(test, indices) train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=shuffle_train, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),6)) if test_batch_size: batch_size = test_batch_size test_loader = torch.utils.data.DataLoader(test, batch_size=max(batch_size, 1), shuffle=shuffle_test, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),6)) train_loader.std = std test_loader.std = std return train_loader, test_loader def svhn_loaders(batch_size, shuffle_train = True, shuffle_test = False, train_random_transform = False, normalize_input = False, num_examples = None, test_batch_size=None): if normalize_input: mean = [0.43768206, 0.44376972, 0.47280434] std = [0.19803014, 0.20101564, 0.19703615] normalize = transforms.Normalize(mean = mean, std = std) else: std = [1.0, 1.0, 1.0] normalize = transforms.Normalize(mean=[0, 0, 0], std=std) if train_random_transform: if normalize_input: train = datasets.SVHN('./data', split='train', download=True, transform=transforms.Compose([ transforms.RandomCrop(32, 4), transforms.ToTensor(), normalize, ])) else: train = datasets.SVHN('./data', split='train', download=True, transform=transforms.Compose([ transforms.RandomCrop(32, 4), transforms.ToTensor(), ])) else: train = datasets.SVHN('./data', split='train', download=True, transform=transforms.Compose([transforms.ToTensor(),normalize])) test = datasets.SVHN('./data', split='test', download=True, transform=transforms.Compose([transforms.ToTensor(), normalize])) if num_examples: indices = list(range(num_examples)) train = data.Subset(train, indices) test = data.Subset(test, indices) train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=shuffle_train, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),6)) if test_batch_size: batch_size = test_batch_size test_loader = torch.utils.data.DataLoader(test, batch_size=max(batch_size, 1), shuffle=shuffle_test, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),6)) train_loader.std = std test_loader.std = std mean, std = get_stats(train_loader) print('dataset mean = ', mean.numpy(), 'std = ', std.numpy()) return train_loader, test_loader def load_data(data, batch_size): if data == 'MNIST': dummy_input = torch.randn(1, 1, 28, 28) train_data = datasets.MNIST('./data', train=True, download=True, transform=transforms.ToTensor()) test_data = datasets.MNIST('./data', train=False, download=True, transform=transforms.ToTensor()) elif data == 'CIFAR': dummy_input = torch.randn(1, 3, 32, 32) normalize = transforms.Normalize(mean = [0.4914, 0.4822, 0.4465], std = [0.2023, 0.1994, 0.2010]) train_data = datasets.CIFAR10('./data', train=True, download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4, padding_mode='edge'), transforms.ToTensor(), normalize])) test_data = datasets.CIFAR10('./data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor(), normalize])) train_data = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),4)) test_data = torch.utils.data.DataLoader(test_data, batch_size=batch_size, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),4)) if data == 'MNIST': train_data.mean = test_data.mean = torch.tensor([0.0]) train_data.std = test_data.std = torch.tensor([1.0]) elif data == 'CIFAR': train_data.mean = test_data.mean = torch.tensor([0.4914, 0.4822, 0.4465]) train_data.std = test_data.std = torch.tensor([0.2023, 0.1994, 0.2010]) return dummy_input, train_data, test_data # when new loaders is added, they must be registered here loaders = { "MNIST": partial(mnist_loaders, datasets.MNIST), "FashionMNIST": partial(mnist_loaders, datasets.FashionMNIST), "CIFAR": cifar_loaders, "svhn": svhn_loaders, } ================================================ FILE: examples/vision/efficient_convolution.py ================================================ """ Demonstration of efficient convolutional network implementation in auto_LiRPA. auto_LiRPA library supports an efficient algorithm for computing bounds for convolutional networks. The "patches" mode implementation makes full backward bounds (CROWN) for convolutional layers significantly faster by using more efficient GPU operators. The convolution mode can be set by the "conv_mode" key in the bound_opts parameter when constructing your BoundeModule object and the new "patches" mode is enabled by default. In this example we show the differences between "patches" mode and the old "matrix" mode in memory consumption, on a relatively large ResNet network. """ import sys import torch import random import numpy as np import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * import models device = 'cpu' if torch.cuda.is_available(): device = 'cuda' conv_mode = sys.argv[1] if len(sys.argv) > 1 else 'patches' # conv_mode can be set as 'matrix' or 'patches' seed = 1234 torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) random.seed(seed) np.random.seed(seed) ## Step 1: Define the model # model_ori = models.model_resnet(width=1, mult=4) # model_ori = models.ResNet18(in_planes=2) # model_ori = models.vnncomp_resnet2b() model_ori = models.vnncomp_resnet4b() model_ori = model_ori.to(device=device) ## Step 2: Prepare dataset as usual. # test_data = torchvision.datasets.MNIST("./data", train=False, download=True, transform=torchvision.transforms.ToTensor()) normalize = torchvision.transforms.Normalize(mean = [0.4914, 0.4822, 0.4465], std = [0.2023, 0.1994, 0.2010]) test_data = torchvision.datasets.CIFAR10("./data", train=False, download=True, transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor(), normalize])) # For illustration we only use 1 image from dataset N = 1 n_classes = 10 image = torch.Tensor(test_data.data[:N]).reshape(N,3,32,32) # Convert to float between 0. and 1. image = image.to(torch.float32) / 255.0 if device == 'cuda': image = image.cuda() ## Step 3: wrap model with auto_LiRPA. # The second parameter is for constructing the trace of the computational graph, and its content is not important. # The new "patches" conv_mode provides an more efficient implementation for convolutional neural networks. model = BoundedModule(model_ori, image, bound_opts={"conv_mode": conv_mode}, device=device) ## Step 4: Compute bounds using LiRPA given a perturbation eps = 0.1 norm = 2 ptb = PerturbationLpNorm(norm = norm, eps = eps) image = BoundedTensor(image, ptb) # Get model prediction as usual pred = model(image) # Compute bounds if device == 'cuda': torch.cuda.empty_cache() print('Using {} mode to compute convolution.'.format(conv_mode)) lb, ub = model.compute_bounds(IBP=False, C=None, method='backward') ## Step 5: Final output # pred = pred.detach().cpu().numpy() lb = lb.detach().cpu().numpy() ub = ub.detach().cpu().numpy() for i in range(N): # print("Image {} top-1 prediction {}".format(i, label[i])) for j in range(n_classes): print("f_{j}(x_0): {l:8.5f} <= f_{j}(x_0+delta) <= {u:8.5f}".format(j=j, l=lb[i][j], u=ub[i][j])) print() # Print the GPU memory usage print('Memory usage in "{}" mode:'.format(conv_mode)) if device == 'cuda': print(torch.cuda.memory_summary()) ================================================ FILE: examples/vision/imagenet_training.py ================================================ import random import time import argparse import multiprocessing import logging import torch.optim as optim from torch.nn import CrossEntropyLoss from auto_LiRPA import BoundedModule, BoundedTensor, BoundDataParallel, CrossEntropyWrapper from auto_LiRPA.bound_ops import BoundExp from auto_LiRPA.perturbations import * from auto_LiRPA.utils import MultiAverageMeter, logger, get_spec_matrix, sync_params import models import torchvision.datasets as datasets import torchvision.transforms as transforms from auto_LiRPA.eps_scheduler import * def get_exp_module(bounded_module): for _, node in bounded_module.named_modules(): # Find the Exp neuron in computational graph if isinstance(node, BoundExp): return node return None parser = argparse.ArgumentParser() parser.add_argument("--verify", action="store_true", help='verification mode, do not train') parser.add_argument("--load", type=str, default="", help='Load pretrained model') parser.add_argument("--device", type=str, default="cuda", choices=["cpu", "cuda"], help='use cpu or cuda') parser.add_argument("--data_dir", type=str, default="data/ImageNet64", help='dir of dataset') parser.add_argument("--seed", type=int, default=100, help='random seed') parser.add_argument("--eps", type=float, default=1. / 255, help='Target training epsilon') parser.add_argument("--norm", type=float, default='inf', help='p norm for epsilon perturbation') parser.add_argument("--bound_type", type=str, default="CROWN-IBP", choices=["IBP", "CROWN-IBP", "CROWN"], help='method of bound analysis') parser.add_argument("--model", type=str, default="wide_resnet_imagenet64_1000class", help='model name (mlp_3layer, cnn_4layer, cnn_6layer, cnn_7layer, resnet)') parser.add_argument("--num_epochs", type=int, default=240, help='number of total epochs') parser.add_argument("--batch_size", type=int, default=125, help='batch size') parser.add_argument("--lr", type=float, default=1e-3, help='learning rate') parser.add_argument("--lr_decay_milestones", nargs='+', type=int, default=[200, 220], help='learning rate dacay milestones') parser.add_argument("--scheduler_name", type=str, default="SmoothedScheduler", choices=["LinearScheduler", "AdaptiveScheduler", "SmoothedScheduler"], help='epsilon scheduler') parser.add_argument("--scheduler_opts", type=str, default="start=100,length=80", help='options for epsilon scheduler') parser.add_argument("--bound_opts", type=str, default=None, choices=["same-slope", "zero-lb", "one-lb"], help='bound options') parser.add_argument('--clip_grad_norm', type=float, default=8.0) parser.add_argument('--in_planes', type=int, default=16) parser.add_argument('--widen_factor', type=int, default=10) args = parser.parse_args() exp_name = args.model + '_b' + str(args.batch_size) + '_' + str(args.bound_type) + '_epoch' + str( args.num_epochs) + '_' + args.scheduler_opts + '_' + str(args.eps)[:6] log_file = f'saved_models/{exp_name}{"_test" if args.verify else ""}.log' file_handler = logging.FileHandler(log_file) logger.addHandler(file_handler) def Train(model, t, loader, eps_scheduler, norm, train, opt, bound_type, method='robust', loss_fusion=True, final_node_name=None): num_class = 1000 meter = MultiAverageMeter() if train: model.train() eps_scheduler.train() eps_scheduler.step_epoch() eps_scheduler.set_epoch_length(int((len(loader.dataset) + loader.batch_size - 1) / loader.batch_size)) else: model.eval() eps_scheduler.eval() exp_module = get_exp_module(model) def get_bound_loss(x=None, c=None): if loss_fusion: bound_lower, bound_upper = False, True else: bound_lower, bound_upper = True, False if bound_type == 'IBP': lb, ub = model(method_opt="compute_bounds", x=x, IBP=True, C=c, method=None, final_node_name=final_node_name, no_replicas=True) elif bound_type == 'CROWN': lb, ub = model(method_opt="compute_bounds", x=x, IBP=False, C=c, method='backward', bound_lower=bound_lower, bound_upper=bound_upper) elif bound_type == 'CROWN-IBP': # lb, ub = model.compute_bounds(ptb=ptb, IBP=True, x=data, C=c, method='backward') # pure IBP bound # we use a mixed IBP and CROWN-IBP bounds, leading to better performance (Zhang et al., ICLR 2020) factor = (eps_scheduler.get_max_eps() - eps_scheduler.get_eps()) / eps_scheduler.get_max_eps() ilb, iub = model(method_opt="compute_bounds", x=x, IBP=True, C=c, method=None, final_node_name=final_node_name, no_replicas=True) if factor < 1e-50: lb, ub = ilb, iub else: clb, cub = model(method_opt="compute_bounds", IBP=False, C=c, method='backward', bound_lower=bound_lower, bound_upper=bound_upper, final_node_name=final_node_name, no_replicas=True) if loss_fusion: ub = cub * factor + iub * (1 - factor) else: lb = clb * factor + ilb * (1 - factor) if loss_fusion: if isinstance(model, BoundDataParallel): max_input = model(get_property=True, node_class=BoundExp, att_name='max_input') else: max_input = exp_module.max_input return None, torch.mean(torch.log(ub) + max_input) else: # Pad zero at the beginning for each example, and use fake label '0' for all examples lb_padded = torch.cat((torch.zeros(size=(lb.size(0), 1), dtype=lb.dtype, device=lb.device), lb), dim=1) fake_labels = torch.zeros(size=(lb.size(0),), dtype=torch.int64, device=lb.device) robust_ce = CrossEntropyLoss()(-lb_padded, fake_labels) return lb, robust_ce for i, (data, labels) in enumerate(loader): start = time.time() eps_scheduler.step_batch() eps = eps_scheduler.get_eps() # For small eps just use natural training, no need to compute LiRPA bounds batch_method = method if eps < 1e-50: batch_method = "natural" if train: opt.zero_grad() # bound input for Linf norm used only if norm == np.inf: data_max = torch.reshape((1. - loader.mean) / loader.std, (1, -1, 1, 1)) data_min = torch.reshape((0. - loader.mean) / loader.std, (1, -1, 1, 1)) data_ub = torch.min(data + (eps / loader.std).view(1, -1, 1, 1), data_max) data_lb = torch.max(data - (eps / loader.std).view(1, -1, 1, 1), data_min) else: data_ub = data_lb = data if list(model.parameters())[0].is_cuda: data, labels = data.cuda(), labels.cuda() data_lb, data_ub = data_lb.cuda(), data_ub.cuda() ptb = PerturbationLpNorm(norm=norm, eps=eps, x_L=data_lb, x_U=data_ub) x = BoundedTensor(data, ptb) if loss_fusion: if batch_method == 'natural' or not train: output = model(x, labels) regular_ce = torch.mean(torch.log(output)) else: model(x, labels) regular_ce = torch.tensor(0., device=data.device) meter.update('CE', regular_ce.item(), x.size(0)) x = (x, labels) c = None else: c = get_spec_matrix(data, labels, num_class) x = (x, labels) output = model(x, final_node_name=final_node_name) regular_ce = CrossEntropyLoss()(output, labels) # regular CrossEntropyLoss used for warming up meter.update('CE', regular_ce.item(), x[0].size(0)) meter.update('Err', torch.sum(torch.argmax(output, dim=1) != labels).item() / x[0].size(0), x[0].size(0)) if batch_method == 'robust': # print(data.sum()) lb, robust_ce = get_bound_loss(x=x, c=c) loss = robust_ce elif batch_method == 'natural': loss = regular_ce if train: loss.backward() if args.clip_grad_norm: grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.clip_grad_norm) meter.update('grad_norm', grad_norm) if isinstance(eps_scheduler, AdaptiveScheduler): eps_scheduler.update_loss(loss.item() - regular_ce.item()) opt.step() meter.update('Loss', loss.item(), data.size(0)) if batch_method != 'natural': meter.update('Robust_CE', robust_ce.item(), data.size(0)) if not loss_fusion: # For an example, if lower bounds of margins is >0 for all classes, the output is verifiably correct. # If any margin is < 0 this example is counted as an error meter.update('Verified_Err', torch.sum((lb < 0).any(dim=1)).item() / data.size(0), data.size(0)) meter.update('Time', time.time() - start) if (i + 1) % 500 == 0 and train: logger.info('[{:2d}:{:4d}]: eps={:.12f} {}'.format(t, i + 1, eps, meter)) logger.info('[{:2d}:{:4d}]: eps={:.12f} {}'.format(t, i + 1, eps, meter)) return meter def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Step 1: Initial original model as usual, see model details in models/example_feedforward.py and models/example_resnet.py model_ori = models.Models[args.model](in_planes=args.in_planes, widen_factor=args.widen_factor) epoch = 0 if args.load: checkpoint = torch.load(args.load) epoch, state_dict, opt_state = checkpoint['epoch'], checkpoint['state_dict'], checkpoint.get('optimizer') for k, v in state_dict.items(): assert torch.isnan(v).any().cpu().numpy() == 0 and torch.isinf(v).any().cpu().numpy() == 0 model_ori.load_state_dict(state_dict) logger.info('Checkpoint loaded: {}'.format(args.load)) ## Step 2: Prepare dataset as usual dummy_input = torch.randn(2, 3, 56, 56) normalize = transforms.Normalize(mean=[0.4815, 0.4578, 0.4082], std=[0.2153, 0.2111, 0.2121]) train_data = datasets.ImageFolder(args.data_dir + '/train', transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(56, padding_mode='edge'), transforms.ToTensor(), normalize, ])) test_data = datasets.ImageFolder(args.data_dir + '/test', transform=transforms.Compose([ # transforms.RandomResizedCrop(64, scale=(0.875, 0.875), ratio=(1., 1.)), transforms.CenterCrop(56), transforms.ToTensor(), normalize])) train_data = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=min(multiprocessing.cpu_count(), 4)) test_data = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size // 4, pin_memory=True, num_workers=min(multiprocessing.cpu_count(), 4)) train_data.mean = test_data.mean = torch.tensor([0.4815, 0.4578, 0.4082]) train_data.std = test_data.std = torch.tensor([0.2153, 0.2111, 0.2121]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, bound_opts={'activation_bound_option':args.bound_opts}, device=args.device) model_loss = BoundedModule(CrossEntropyWrapper(model_ori), (dummy_input, torch.zeros(1, dtype=torch.long)), bound_opts= { 'activation_bound_option': args.bound_opts, 'loss_fusion': True }, device=args.device) model_loss = BoundDataParallel(model_loss) ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler opt = optim.Adam(model_loss.parameters(), lr=args.lr) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=args.lr_decay_milestones, gamma=0.1) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) logger.info(str(model_ori)) if args.load: if opt_state: opt.load_state_dict(opt_state) logger.info('resume opt_state') # skip epochs if epoch > 0: epoch_length = int((len(train_data.dataset) + train_data.batch_size - 1) / train_data.batch_size) eps_scheduler.set_epoch_length(epoch_length) eps_scheduler.train() for i in range(epoch): lr_scheduler.step() eps_scheduler.step_epoch(verbose=True) for j in range(epoch_length): eps_scheduler.step_batch() logger.info('resume from eps={:.12f}'.format(eps_scheduler.get_eps())) ## Step 5: start training if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model, 1, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False, final_node_name=None) else: timer = 0.0 best_err = 1e10 for t in range(epoch + 1, args.num_epochs + 1): logger.info("Epoch {}, learning rate {}".format(t, lr_scheduler.get_last_lr())) start_time = time.time() Train(model_loss, t, train_data, eps_scheduler, norm, True, opt, args.bound_type, loss_fusion=True) lr_scheduler.step() epoch_time = time.time() - start_time timer += epoch_time logger.info('Epoch time: {:.4f}, Total time: {:.4f}'.format(epoch_time, timer)) logger.info("Evaluating...") torch.cuda.empty_cache() state_dict = sync_params(model_ori, model_loss, loss_fusion=True) with torch.no_grad(): if int(eps_scheduler.params['start']) + int(eps_scheduler.params['length']) > t >= int( eps_scheduler.params['start']): m = Train(model_loss, t, test_data, eps_scheduler, norm, False, None, args.bound_type, loss_fusion=True) else: model_ori.load_state_dict(state_dict) model = BoundedModule(model_ori, dummy_input, bound_opts={'activation_bound_option':args.bound_opts}, device=args.device) model = BoundDataParallel(model) m = Train(model, t, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False) del model save_dict = {'state_dict': state_dict, 'epoch': t, 'optimizer': opt.state_dict()} if t < int(eps_scheduler.params['start']): torch.save(save_dict, 'saved_models/natural_' + exp_name) elif t > int(eps_scheduler.params['start']) + int(eps_scheduler.params['length']): current_err = m.avg('Verified_Err') if current_err < best_err: best_err = current_err torch.save(save_dict, 'saved_models/' + exp_name + '_best_' + str(best_err)[:6]) else: torch.save(save_dict, 'saved_models/' + exp_name) torch.cuda.empty_cache() if __name__ == "__main__": logger.info(args) main(args) ================================================ FILE: examples/vision/jacobian.py ================================================ """Examples of computing Jacobian bounds. We show examples of: - Computing Jacobian bounds - Computing Linf local Lipschitz constants - Computing JVP bounds """ import numpy as np import torch import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from auto_LiRPA.utils import Flatten from auto_LiRPA.jacobian import JacobianOP, GradNorm def build_model(in_ch=3, in_dim=32): model = nn.Sequential( Flatten(), nn.Linear(in_ch*in_dim**2, 100), nn.ReLU(), nn.Linear(100, 200), nn.ReLU(), nn.Linear(200, 10), ) return model def example_jacobian(model_ori, x0, bound_opts, device): """Example: computing Jacobian bounds.""" class JacobianWrapper(nn.Module): def __init__(self, model): super().__init__() self.model = model def forward(self, x): y = self.model(x) return JacobianOP.apply(y, x) model = BoundedModule(JacobianWrapper(model_ori), x0, bound_opts=bound_opts, device=device) def func(x0): return model_ori(x0.requires_grad_(True)) ret_ori = torch.autograd.functional.jacobian(func, x0).squeeze(2) ret_new = model(x0) assert torch.allclose(ret_ori, ret_new) ret = [] for eps in [0, 1./255, 4./255]: x = BoundedTensor(x0, PerturbationLpNorm(norm=np.inf, eps=eps)) lower, upper = model.compute_jacobian_bounds(x) print(f'Gap between upper and lower Jacobian bound for eps={eps:.5f}', (upper - lower).max()) if eps == 0: assert torch.allclose( ret_new.view(-1), lower.sum(dim=0, keepdim=True).view(-1)) assert torch.allclose( ret_new.view(-1), upper.sum(dim=0, keepdim=True).view(-1)) ret.append((lower.detach(), upper.detach())) return ret def example_local_lipschitz(model_ori, x0, bound_opts, device): """Example: computing Linf local Lipschitz constant.""" class LocalLipschitzWrapper(nn.Module): def __init__(self, model): super().__init__() self.model = model self.grad_norm = GradNorm(norm=1) def forward(self, x, mask): y = self.model(x) y_selected = y.matmul(mask) jacobian = JacobianOP.apply(y_selected, x) lipschitz = self.grad_norm(jacobian) return lipschitz mask = torch.zeros(10, 1, device=device) mask[1, 0] = 1 model = BoundedModule(LocalLipschitzWrapper(model_ori), (BoundedTensor(x0), mask), bound_opts=bound_opts, device=device) y = model_ori(x0.requires_grad_(True)) ret_ori = torch.autograd.grad(y[:, 1].sum(), x0)[0].abs().flatten(1).sum(dim=-1).view(-1) ret_new = model(x0, mask).view(-1) assert torch.allclose(ret_ori, ret_new) ret = [] for eps in [0, 1./255, 4./255]: x = BoundedTensor(x0, PerturbationLpNorm(norm=np.inf, eps=eps)) lip = [] for i in range(mask.shape[0]): mask.zero_() mask[i, 0] = 1 ub = model.compute_jacobian_bounds((x, mask), bound_lower=False)[1] lip.append(ub) lip = torch.concat(lip).max() print(f'Linf local Lipschitz constant for eps={eps:.5f}: {lip.item()}') ret.append(lip.detach()) return ret def example_jvp(model_ori, x0, bound_opts, device): """Example: computing Jacobian-Vector Product.""" class JVPWrapper(nn.Module): def __init__(self, model): super().__init__() self.model = model self.grad_norm = GradNorm(norm=1) def forward(self, x, v): y = self.model(x) jacobian = JacobianOP.apply(y, x).flatten(2) jvp = (jacobian * v.flatten(1).unsqueeze(1)).sum(dim=-1) return jvp vector = torch.rand_like(x0) model = BoundedModule(JVPWrapper(model_ori), (BoundedTensor(x0), vector), bound_opts=bound_opts, device=device) def func(x0): return model_ori(x0.requires_grad_(True)) ret_ori = torch.autograd.functional.jvp(func, x0, vector)[-1].view(-1) ret_new = model(x0, vector) assert torch.allclose(ret_ori, ret_new) ret = [] for eps in [0, 1./255, 4./255]: x = BoundedTensor(x0, PerturbationLpNorm(norm=np.inf, eps=eps)) lb, ub = model.compute_jacobian_bounds((x, vector)) print(f'JVP lower bound for eps={eps:.5f}: {lb}') print(f'JVP upper bound for eps={eps:.5f}: {ub}') ret.append((lb, ub)) return ret def compute_jacobians(model_ori, x0, bound_opts=None, device='cpu'): results = [[] for _ in range(3)] model_ori = model_ori.to(device) x0 = x0.to(device) print('Model:', model_ori) results[0] = example_jacobian(model_ori, x0, bound_opts, device) results[1] = example_local_lipschitz(model_ori, x0, bound_opts, device) results[2] = example_jvp(model_ori, x0, bound_opts, device) return results if __name__ == '__main__': torch.manual_seed(0) # Create a small model and load pre-trained parameters. model_ori = build_model(in_dim=8) device = 'cuda' if torch.cuda.is_available() else 'cpu' x0 = torch.randn(1, 3, 8, 8, device=device) compute_jacobians(model_ori, x0, device=device) ================================================ FILE: examples/vision/models/__init__.py ================================================ from models.resnet import model_resnet from models.feedforward import * from models.resnext import * from models.resnext_imagenet64 import * from models.densenet import * from models.mobilenet import * from models.densenet_no_bn import * from models.densenet_imagenet import * from models.wide_resnet_imagenet64 import * from models.wide_resnet_cifar import * from models.resnet18 import * from models.vnncomp_resnet import resnet2b as vnncomp_resnet2b, resnet4b as vnncomp_resnet4b Models = { 'mlp_2layer': mlp_2layer, 'mlp_3layer': mlp_3layer, 'mlp_3layer_weight_perturb': mlp_3layer_weight_perturb, 'mlp_5layer': mlp_5layer, 'cnn_4layer': cnn_4layer, 'cnn_6layer': cnn_6layer, 'cnn_7layer': cnn_7layer, 'cnn_7layer_bn': cnn_7layer_bn, 'cnn_7layer_bn_imagenet': cnn_7layer_bn_imagenet, 'resnet': model_resnet, 'resnet18': ResNet18, 'ResNeXt_cifar': ResNeXt_cifar, 'ResNeXt_imagenet64': ResNeXt_imagenet64, 'Densenet_cifar_32': Densenet_cifar_32, 'Densenet_cifar_wobn': Densenet_cifar_wobn, 'Densenet_imagenet': Densenet_imagenet, 'MobileNet_cifar': MobileNetV2, 'wide_resnet_cifar': wide_resnet_cifar, 'wide_resnet_cifar_bn': wide_resnet_cifar_bn, 'wide_resnet_cifar_bn_wo_pooling': wide_resnet_cifar_bn_wo_pooling, 'wide_resnet_cifar_bn_wo_pooling_dropout': wide_resnet_cifar_bn_wo_pooling_dropout, 'wide_resnet_imagenet64': wide_resnet_imagenet64, 'wide_resnet_imagenet64_1000class': wide_resnet_imagenet64_1000class, 'vnncomp_resnet2b': vnncomp_resnet2b, 'vnncomp_resnet4b': vnncomp_resnet4b, } ================================================ FILE: examples/vision/models/densenet.py ================================================ '''DenseNet in PyTorch. https://github.com/kuangliu/pytorch-cifar ''' import math import torch import torch.nn as nn import torch.nn.functional as F class Bottleneck(nn.Module): def __init__(self, in_planes, growth_rate): super(Bottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=True) self.bn2 = nn.BatchNorm2d(4*growth_rate) self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=True) def forward(self, x): out = self.conv1(F.relu(self.bn1(x))) out = self.conv2(F.relu(self.bn2(out))) # out = self.conv1(F.relu(x)) # out = self.conv2(F.relu(out)) out = torch.cat([out,x], 1) return out class Transition(nn.Module): def __init__(self, in_planes, out_planes): super(Transition, self).__init__() self.bn = nn.BatchNorm2d(in_planes) self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=True) def forward(self, x): out = self.conv(F.relu(self.bn(x))) out = F.avg_pool2d(out, 2) return out class DenseNet(nn.Module): def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): super(DenseNet, self).__init__() self.growth_rate = growth_rate num_planes = 2*growth_rate self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=True) self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) num_planes += nblocks[0]*growth_rate out_planes = int(math.floor(num_planes*reduction)) self.trans1 = Transition(num_planes, out_planes) num_planes = out_planes self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) num_planes += nblocks[1]*growth_rate out_planes = int(math.floor(num_planes*reduction)) self.trans2 = Transition(num_planes, out_planes) num_planes = out_planes self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) num_planes += nblocks[2]*growth_rate # out_planes = int(math.floor(num_planes*reduction)) # self.trans3 = Transition(num_planes, out_planes) # num_planes = out_planes # self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) # num_planes += nblocks[3]*growth_rate self.bn = nn.BatchNorm2d(num_planes) self.linear1 = nn.Linear(14336, 512) self.linear2 = nn.Linear(512, num_classes) def _make_dense_layers(self, block, in_planes, nblock): layers = [] for i in range(nblock): layers.append(block(in_planes, self.growth_rate)) in_planes += self.growth_rate return nn.Sequential(*layers) def forward(self, x): out = self.conv1(x) out = self.trans1(self.dense1(out)) out = self.trans2(self.dense2(out)) out = self.dense3(out) out = F.relu(self.bn(out)) out = torch.flatten(out, 1) out = F.relu(self.linear1(out)) out = self.linear2(out) return out def Densenet_cifar_32(in_ch=3, in_dim=32): return DenseNet(Bottleneck, [2,4,4], growth_rate=32) if __name__ == "__main__": from thop import profile net = Densenet_cifar_32() x = torch.randn(1,3,32,32) y = net(x) print(net) macs, params = profile(net, (torch.randn(1, 3, 32, 32),)) print(macs / 1000000, params / 1000000) # 6830M, 7M print(y) ================================================ FILE: examples/vision/models/densenet_imagenet.py ================================================ '''DenseNet in PyTorch. https://github.com/kuangliu/pytorch-cifar ''' import math import torch import torch.nn as nn import torch.nn.functional as F class Bottleneck(nn.Module): def __init__(self, in_planes, growth_rate): super(Bottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=True) self.bn2 = nn.BatchNorm2d(4*growth_rate) self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=True) def forward(self, x): out = self.conv1(F.relu(self.bn1(x))) out = self.conv2(F.relu(self.bn2(out))) # out = self.conv1(F.relu(x)) # out = self.conv2(F.relu(out)) out = torch.cat([out,x], 1) return out class Transition(nn.Module): def __init__(self, in_planes, out_planes): super(Transition, self).__init__() self.bn = nn.BatchNorm2d(in_planes) self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=True) def forward(self, x): out = self.conv(F.relu(self.bn(x))) out = F.avg_pool2d(out, 2) return out class DenseNet(nn.Module): def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=200): super(DenseNet, self).__init__() self.growth_rate = growth_rate num_planes = 2*growth_rate self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=True) self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) num_planes += nblocks[0]*growth_rate out_planes = int(math.floor(num_planes*reduction)) self.trans1 = Transition(num_planes, out_planes) num_planes = out_planes self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) num_planes += nblocks[1]*growth_rate out_planes = int(math.floor(num_planes*reduction)) self.trans2 = Transition(num_planes, out_planes) num_planes = out_planes self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) num_planes += nblocks[2]*growth_rate # out_planes = int(math.floor(num_planes*reduction)) # self.trans3 = Transition(num_planes, out_planes) # num_planes = out_planes # self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) # num_planes += nblocks[3]*growth_rate self.bn = nn.BatchNorm2d(num_planes) self.linear1 = nn.Linear(43904, 512) self.linear2 = nn.Linear(512, num_classes) def _make_dense_layers(self, block, in_planes, nblock): layers = [] for i in range(nblock): layers.append(block(in_planes, self.growth_rate)) in_planes += self.growth_rate return nn.Sequential(*layers) def forward(self, x): out = self.conv1(x) out = self.trans1(self.dense1(out)) out = self.trans2(self.dense2(out)) out = self.dense3(out) out = F.relu(self.bn(out)) out = torch.flatten(out, 1) out = F.relu(self.linear1(out)) out = self.linear2(out) return out def Densenet_imagenet(in_ch=3, in_dim=56): return DenseNet(Bottleneck, [2,4,4], growth_rate=32) if __name__ == "__main__": from thop import profile net = Densenet_imagenet() x = torch.randn(1,3,56,56) y = net(x) print(net) macs, params = profile(net, (torch.randn(1, 3, 56, 56),)) print(macs / 1000000, params / 1000000) # 564M, 11M print(y.shape) ================================================ FILE: examples/vision/models/densenet_no_bn.py ================================================ '''DenseNet in PyTorch. https://github.com/kuangliu/pytorch-cifar ''' import math import torch import torch.nn as nn import torch.nn.functional as F class Bottleneck(nn.Module): def __init__(self, in_planes, growth_rate): super(Bottleneck, self).__init__() # self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=True) # self.bn2 = nn.BatchNorm2d(4*growth_rate) self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=True) def forward(self, x): # out = self.conv1(F.relu(self.bn1(x))) # out = self.conv2(F.relu(self.bn2(out))) out = self.conv1(F.relu(x)) out = self.conv2(F.relu(out)) out = torch.cat([out,x], 1) return out class Transition(nn.Module): def __init__(self, in_planes, out_planes): super(Transition, self).__init__() # self.bn = nn.BatchNorm2d(in_planes) self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=True) def forward(self, x): out = self.conv(F.relu(x)) out = F.avg_pool2d(out, 2) return out class DenseNet(nn.Module): def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): super(DenseNet, self).__init__() self.growth_rate = growth_rate num_planes = 2*growth_rate self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=True) self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) num_planes += nblocks[0]*growth_rate out_planes = int(math.floor(num_planes*reduction)) self.trans1 = Transition(num_planes, out_planes) num_planes = out_planes self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) num_planes += nblocks[1]*growth_rate out_planes = int(math.floor(num_planes*reduction)) self.trans2 = Transition(num_planes, out_planes) num_planes = out_planes self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) num_planes += nblocks[2]*growth_rate # out_planes = int(math.floor(num_planes*reduction)) # self.trans3 = Transition(num_planes, out_planes) # num_planes = out_planes # self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) # num_planes += nblocks[3]*growth_rate # self.bn = nn.BatchNorm2d(num_planes) self.linear1 = nn.Linear(9216, 512) self.linear2 = nn.Linear(512, num_classes) def _make_dense_layers(self, block, in_planes, nblock): layers = [] for i in range(nblock): layers.append(block(in_planes, self.growth_rate)) in_planes += self.growth_rate return nn.Sequential(*layers) def forward(self, x): out = self.conv1(x) out = self.trans1(self.dense1(out)) out = self.trans2(self.dense2(out)) out = self.dense3(out) out = F.relu(out) out = torch.flatten(out, 1) out = F.relu(self.linear1(out)) out = self.linear2(out) return out def Densenet_cifar_wobn(in_ch=3, in_dim=56): return DenseNet(Bottleneck, [2,4,6], growth_rate=16) if __name__ == "__main__": net = Densenet_cifar_wobn() x = torch.randn(1,3,32,32) y = net(x) print(net) print(y) ================================================ FILE: examples/vision/models/feedforward.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from auto_LiRPA import PerturbationLpNorm, BoundedParameter # CNN, relatively large 4-layer # parameter in_ch: input image channel, 1 for MNIST and 3 for CIFAR # parameter in_dim: input dimension, 28 for MNIST and 32 for CIFAR # parameter width: width multiplier class cnn_4layer(nn.Module): def __init__(self, in_ch, in_dim, width=2, linear_size=256): super(cnn_4layer, self).__init__() self.conv1 = nn.Conv2d(in_ch, 4 * width, 4, stride=2, padding=1) self.conv2 = nn.Conv2d(4 * width, 8 * width, 4, stride=2, padding=1) self.fc1 = nn.Linear(8 * width * (in_dim // 4) * (in_dim // 4), linear_size) self.fc2 = nn.Linear(linear_size, 10) def forward(self, x): x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = torch.flatten(x, 1) x = F.relu(self.fc1(x)) x = self.fc2(x) return x class mlp_2layer(nn.Module): def __init__(self, in_ch, in_dim, width=1): super(mlp_2layer, self).__init__() self.fc1 = nn.Linear(in_ch * in_dim * in_dim, 256 * width) self.fc2 = nn.Linear(256 * width, 10) def forward(self, x): x = torch.flatten(x, 1) x = F.relu(self.fc1(x)) x = self.fc2(x) return x class mlp_3layer(nn.Module): def __init__(self, in_ch, in_dim, width=1): super(mlp_3layer, self).__init__() self.fc1 = nn.Linear(in_ch * in_dim * in_dim, 256 * width) self.fc2 = nn.Linear(256 * width, 128 * width) self.fc3 = nn.Linear(128 * width, 10) def forward(self, x): x = torch.flatten(x, 1) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x class mlp_3layer_weight_perturb(nn.Module): def __init__(self, in_ch=1, in_dim=28, width=1, pert_weight=True, pert_bias=False, norm=2): super(mlp_3layer_weight_perturb, self).__init__() self.fc1 = nn.Linear(in_ch * in_dim * in_dim, 64 * width) self.fc2 = nn.Linear(64 * width, 64 * width) self.fc3 = nn.Linear(64 * width, 10) eps = 0.01 self.ptb = PerturbationLpNorm(norm=norm, eps=eps) if pert_weight: self.fc1.weight = BoundedParameter(self.fc1.weight.data, self.ptb) self.fc2.weight = BoundedParameter(self.fc2.weight.data, self.ptb) self.fc3.weight = BoundedParameter(self.fc3.weight.data, self.ptb) if pert_bias: self.fc1.bias = BoundedParameter(self.fc1.bias.data, self.ptb) self.fc2.bias = BoundedParameter(self.fc2.bias.data, self.ptb) self.fc3.bias = BoundedParameter(self.fc3.bias.data, self.ptb) def forward(self, x): x = x.view(-1, 784) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x class mlp_5layer(nn.Module): def __init__(self, in_ch, in_dim, width=1): super(mlp_5layer, self).__init__() self.fc1 = nn.Linear(in_ch * in_dim * in_dim, 256 * width) self.fc2 = nn.Linear(256 * width, 256 * width) self.fc3 = nn.Linear(256 * width, 256 * width) self.fc4 = nn.Linear(256 * width, 128 * width) self.fc5 = nn.Linear(128 * width, 10) def forward(self, x): x = torch.flatten(x, 1) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.relu(self.fc3(x)) x = F.relu(self.fc4(x)) x = self.fc5(x) return x # Model can also be defined as a nn.Sequential def cnn_7layer(in_ch=3, in_dim=32, width=64, linear_size=512): model = nn.Sequential( nn.Conv2d(in_ch, width, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(width, width, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(width, 2 * width, 3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1), nn.ReLU(), nn.Flatten(), nn.Linear((in_dim//2) * (in_dim//2) * 2 * width, linear_size), nn.ReLU(), nn.Linear(linear_size,10) ) return model def cnn_7layer_bn(in_ch=3, in_dim=32, width=64, linear_size=512): model = nn.Sequential( nn.Conv2d(in_ch, width, 3, stride=1, padding=1), nn.BatchNorm2d(width), nn.ReLU(), nn.Conv2d(width, width, 3, stride=1, padding=1), nn.BatchNorm2d(width), nn.ReLU(), nn.Conv2d(width, 2 * width, 3, stride=2, padding=1), nn.BatchNorm2d(2 * width), nn.ReLU(), nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1), nn.BatchNorm2d(2 * width), nn.ReLU(), nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1), nn.BatchNorm2d(2 * width), nn.ReLU(), nn.Flatten(), nn.Linear((in_dim//2) * (in_dim//2) * 2 * width, linear_size), nn.ReLU(), nn.Linear(linear_size,10) ) return model def cnn_7layer_bn_imagenet(in_ch=3, in_dim=32, width=64, linear_size=512): model = nn.Sequential( nn.Conv2d(in_ch, width, 3, stride=1, padding=1), nn.BatchNorm2d(width), nn.ReLU(), nn.Conv2d(width, width, 3, stride=1, padding=1), nn.BatchNorm2d(width), nn.ReLU(), nn.Conv2d(width, 2 * width, 3, stride=2, padding=1), nn.BatchNorm2d(2 * width), nn.ReLU(), nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1), nn.BatchNorm2d(2 * width), nn.ReLU(), nn.Conv2d(2 * width, 2 * width, 3, stride=2, padding=1), nn.BatchNorm2d(2 * width), nn.ReLU(), nn.Flatten(), nn.Linear(25088, linear_size), nn.ReLU(), nn.Linear(linear_size,200) ) return model def cnn_6layer(in_ch, in_dim, width=32, linear_size=256): model = nn.Sequential( nn.Conv2d(in_ch, width, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(width, width, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(width, 2 * width, 3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1), nn.ReLU(), nn.Flatten(), nn.Linear((in_dim//2) * (in_dim//2) * 2 * width, linear_size), nn.ReLU(), nn.Linear(linear_size,10) ) return model ================================================ FILE: examples/vision/models/mobilenet.py ================================================ '''MobileNetV2 in PyTorch. See the paper "Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation" for more details. ''' import torch import torch.nn as nn import torch.nn.functional as F class Block(nn.Module): '''expand + depthwise + pointwise''' def __init__(self, in_planes, out_planes, expansion, stride): super(Block, self).__init__() self.stride = stride planes = expansion * in_planes self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) # self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False) # self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) # self.bn3 = nn.BatchNorm2d(out_planes) self.shortcut = nn.Sequential() if stride == 1 and in_planes != out_planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False), # nn.BatchNorm2d(out_planes), ) def forward(self, x): out = F.relu((self.conv1(x))) out = F.relu((self.conv2(out))) out = self.conv3(out) out = out + self.shortcut(x) if self.stride==1 else out return out class MobileNetV2(nn.Module): # (expansion, out_planes, num_blocks, stride) cfg = [(1, 16, 1, 1), (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10 (6, 32, 3, 2), (6, 64, 4, 2), (6, 96, 3, 1), (6, 160, 3, 2), (6, 320, 1, 1)] def __init__(self, num_classes=10): super(MobileNetV2, self).__init__() # NOTE: change conv1 stride 2 -> 1 for CIFAR10 self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) # self.bn1 = nn.BatchNorm2d(32) self.layers = self._make_layers(in_planes=32) self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False) # self.bn2 = nn.BatchNorm2d(1280) self.linear = nn.Linear(1280, num_classes) def _make_layers(self, in_planes): layers = [] for expansion, out_planes, num_blocks, stride in self.cfg: strides = [stride] + [1]*(num_blocks-1) for stride in strides: layers.append(Block(in_planes, out_planes, expansion, stride)) in_planes = out_planes return nn.Sequential(*layers) def forward(self, x): out = F.relu((self.conv1(x))) out = self.layers(out) out = F.relu((self.conv2(out))) # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10 out = F.avg_pool2d(out, 4) out = torch.flatten(out, 1) out = self.linear(out) return out if __name__ == "__main__": net = MobileNetV2() x = torch.randn(2,3,32,32) y = net(x) print(y.size()) ================================================ FILE: examples/vision/models/resnet.py ================================================ ''' ResNet used in https://arxiv.org/pdf/1805.12514.pdf https://github.com/locuslab/convex_adversarial/blob/0d11e671ad9318745a2439afce513c82dc6bf5ce/examples/problems.py ''' import torch import torch.nn as nn import math class Dense(nn.Module): def __init__(self, *Ws): super(Dense, self).__init__() self.Ws = nn.ModuleList(list(Ws)) if len(Ws) > 0 and hasattr(Ws[0], 'out_features'): self.out_features = Ws[0].out_features def forward(self, *xs): xs = xs[-len(self.Ws):] out = sum(W(x) for x, W in zip(xs, self.Ws) if W is not None) return out class DenseSequential(nn.Sequential): def forward(self, x): xs = [x] for module in self._modules.values(): if 'Dense' in type(module).__name__: xs.append(module(*xs)) else: xs.append(module(xs[-1])) return xs[-1] def model_resnet(in_ch=3, in_dim=32, width=1, mult=16, N=1): def block(in_filters, out_filters, k, downsample): if not downsample: k_first = 3 skip_stride = 1 k_skip = 1 else: k_first = 4 skip_stride = 2 k_skip = 2 return [ Dense(nn.Conv2d(in_filters, out_filters, k_first, stride=skip_stride, padding=1)), nn.ReLU(), Dense(nn.Conv2d(in_filters, out_filters, k_skip, stride=skip_stride, padding=0), None, nn.Conv2d(out_filters, out_filters, k, stride=1, padding=1)), nn.ReLU() ] conv1 = [nn.Conv2d(in_ch, mult, 3, stride=1, padding=3 if in_dim == 28 else 1), nn.ReLU()] conv2 = block(mult, mult * width, 3, False) for _ in range(N): conv2.extend(block(mult * width, mult * width, 3, False)) conv3 = block(mult * width, mult * 2 * width, 3, True) for _ in range(N - 1): conv3.extend(block(mult * 2 * width, mult * 2 * width, 3, False)) conv4 = block(mult * 2 * width, mult * 4 * width, 3, True) for _ in range(N - 1): conv4.extend(block(mult * 4 * width, mult * 4 * width, 3, False)) layers = ( conv1 + conv2 + conv3 + conv4 + [nn.Flatten(), nn.Linear(mult * 4 * width * 8 * 8, 1000), nn.ReLU(), nn.Linear(1000, 10)] ) model = DenseSequential( *layers ) for m in model.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() return model if __name__ == "__main__": model = model_resnet(in_ch=1, in_dim=28) dummy = torch.randn(8, 1, 28, 28) print(model) print(model(dummy).shape) ================================================ FILE: examples/vision/models/resnet18.py ================================================ '''ResNet in PyTorch. For Pre-activation ResNet, see 'preact_resnet.py'. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Deep Residual Learning for Image Recognition. arXiv:1512.03385 ''' import torch import torch.nn as nn import torch.nn.functional as F class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_planes, planes, stride=1): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d( in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion*planes) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) out += self.shortcut(x) out = F.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_planes, planes, stride=1): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(self.expansion*planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion*planes) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = F.relu(self.bn2(self.conv2(out))) out = self.bn3(self.conv3(out)) out += self.shortcut(x) out = F.relu(out) return out class ResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10, in_planes=64): super(ResNet, self).__init__() self.in_planes = in_planes self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(in_planes) self.layer1 = self._make_layer(block, in_planes, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, in_planes * 2, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, in_planes * 4, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, in_planes * 8, num_blocks[3], stride=2) self.linear = nn.Linear(in_planes * 8 * block.expansion, num_classes) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = torch.flatten(out, 1) out = self.linear(out) return out def ResNet18(in_planes=64): return ResNet(BasicBlock, [2, 2, 2, 2], in_planes=in_planes) if __name__ == "__main__": from thop import profile net = ResNet18(in_planes=64) x = torch.randn(1,3,32,32) y = net(x) print(net) macs, params = profile(net, (torch.randn(1, 3, 32, 32),)) print(macs / 1000000, params / 1000000) # 556M, 11M print(y) ================================================ FILE: examples/vision/models/resnext.py ================================================ '''ResNeXt in PyTorch. See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. https://github.com/kuangliu/pytorch-cifar ''' import torch import torch.nn as nn import torch.nn.functional as F class Block(nn.Module): '''Grouped convolution block.''' expansion = 2 def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): super(Block, self).__init__() group_width = cardinality * bottleneck_width self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=True) self.bn1 = nn.BatchNorm2d(group_width) self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=True) # self.bn2 = nn.BatchNorm2d(group_width) self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=True) # self.bn3 = nn.BatchNorm2d(self.expansion*group_width) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion*group_width: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=True), # nn.BatchNorm2d(self.expansion*group_width) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) # out = F.relu(self.bn2(self.conv2(out))) # out = self.bn3(self.conv3(out)) # out = F.relu(self.conv1(x)) out = F.relu(self.conv2(out)) out = self.conv3(out) out += self.shortcut(x) out = F.relu(out) return out class ResNeXt(nn.Module): def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10): super(ResNeXt, self).__init__() self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.in_planes = 16 self.conv1 = nn.Conv2d(3, 16, kernel_size=3, bias=True, padding=1) # self.bn1 = nn.BatchNorm2d(16) self.layer1 = self._make_layer(num_blocks[0], 1) self.layer2 = self._make_layer(num_blocks[1], 2) self.layer3 = self._make_layer(num_blocks[2], 2) # self.layer4 = self._make_layer(num_blocks[3], 2) self.linear1 = nn.Linear(cardinality*bottleneck_width*512, 512) self.linear2 = nn.Linear(512, num_classes) def _make_layer(self, num_blocks, stride): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width # Increase bottleneck_width by 2 after each stage. self.bottleneck_width *= 2 return nn.Sequential(*layers) def forward(self, x): out = F.relu(self.conv1(x)) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = torch.flatten(out, 1) out = F.relu(self.linear1(out)) out = self.linear2(out) return out def ResNeXt29_2x64d(): return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64) def ResNeXt29_4x64d(): return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64) def ResNeXt29_8x64d(): return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64) def ResNeXt29_32x4d(): return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4) def ResNeXt_cifar(in_ch=3, in_dim=32): return ResNeXt(num_blocks=[1,1,1], cardinality=2, bottleneck_width=32) if __name__ == "__main__": from thop import profile net = ResNeXt_cifar() x = torch.randn(1,3,32,32) y = net(x) print(net) macs, params = profile(net, (torch.randn(1, 3, 32, 32),)) print(macs / 1000000, params / 1000000) # 6830M, 7M print(y) ================================================ FILE: examples/vision/models/resnext_imagenet64.py ================================================ '''ResNeXt in PyTorch. See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. https://github.com/kuangliu/pytorch-cifar ''' import torch import torch.nn as nn import torch.nn.functional as F class Block(nn.Module): '''Grouped convolution block.''' expansion = 2 def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): super(Block, self).__init__() group_width = cardinality * bottleneck_width self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=True) self.bn1 = nn.BatchNorm2d(group_width) self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=True) # self.bn2 = nn.BatchNorm2d(group_width) self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=True) # self.bn3 = nn.BatchNorm2d(self.expansion*group_width) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion*group_width: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=True), # nn.BatchNorm2d(self.expansion*group_width) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) # out = F.relu(self.bn2(self.conv2(out))) # out = self.bn3(self.conv3(out)) # out = F.relu(self.conv1(x)) out = F.relu(self.conv2(out)) out = self.conv3(out) out += self.shortcut(x) out = F.relu(out) return out class ResNeXt(nn.Module): def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=200): super(ResNeXt, self).__init__() self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.in_planes = 16 self.conv1 = nn.Conv2d(3, 16, kernel_size=3, bias=True, padding=1) # self.bn1 = nn.BatchNorm2d(16) self.layer1 = self._make_layer(num_blocks[0], 1) self.layer2 = self._make_layer(num_blocks[1], 2) self.layer3 = self._make_layer(num_blocks[2], 2) # self.layer4 = self._make_layer(num_blocks[3], 2) self.linear1 = nn.Linear(cardinality*bottleneck_width*1568, 512) self.linear2 = nn.Linear(512, num_classes) def _make_layer(self, num_blocks, stride): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width # Increase bottleneck_width by 2 after each stage. self.bottleneck_width *= 2 return nn.Sequential(*layers) def forward(self, x): out = F.relu(self.conv1(x)) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = torch.flatten(out, 1) out = F.relu(self.linear1(out)) out = self.linear2(out) return out def ResNeXt_imagenet64(): return ResNeXt(num_blocks=[2,2,2], cardinality=2, bottleneck_width=8) if __name__ == "__main__": from thop import profile net = ResNeXt_imagenet64() x = torch.randn(1,3,56,56) y = net(x) print(net) macs, params = profile(net, (torch.randn(1, 3, 56, 56),)) print(macs / 1000000, params / 1000000) # 64M, 13M print(y.shape) ================================================ FILE: examples/vision/models/vnncomp_resnet.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_planes, planes, stride=1, bn=True, kernel=3): super(BasicBlock, self).__init__() self.bn = bn if kernel == 3: self.conv1 = nn.Conv2d( in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=(not self.bn)) if self.bn: self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=(not self.bn)) elif kernel == 2: self.conv1 = nn.Conv2d( in_planes, planes, kernel_size=2, stride=stride, padding=1, bias=(not self.bn)) if self.bn: self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=2, stride=1, padding=0, bias=(not self.bn)) elif kernel == 1: self.conv1 = nn.Conv2d( in_planes, planes, kernel_size=1, stride=stride, padding=0, bias=(not self.bn)) if self.bn: self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=1, stride=1, padding=0, bias=(not self.bn)) else: exit("kernel not supported!") if self.bn: self.bn2 = nn.BatchNorm2d(planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion*planes: if self.bn: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=(not self.bn)), nn.BatchNorm2d(self.expansion*planes) ) else: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=(not self.bn)), ) def forward(self, x): if self.bn: out = F.relu(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) else: out = F.relu(self.conv1(x)) out = self.conv2(out) out += self.shortcut(x) out = F.relu(out) return out class ResNet5(nn.Module): def __init__(self, block, num_blocks=2, num_classes=10, in_planes=64, bn=True, last_layer="avg"): super(ResNet5, self).__init__() self.in_planes = in_planes self.bn = bn self.last_layer = last_layer self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=2, padding=1, bias=not self.bn) if self.bn: self.bn1 = nn.BatchNorm2d(in_planes) self.layer1 = self._make_layer(block, in_planes*2, num_blocks, stride=2, bn=bn, kernel=3) if self.last_layer == "avg": self.avg2d = nn.AvgPool2d(4) self.linear = nn.Linear(in_planes * 8 * block.expansion, num_classes) elif self.last_layer == "dense": self.linear1 = nn.Linear(in_planes * 8 * block.expansion * 16, 100) self.linear2 = nn.Linear(100, num_classes) else: exit("last_layer type not supported!") def _make_layer(self, block, planes, num_blocks, stride, bn, kernel): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride, bn, kernel)) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): if self.bn: out = F.relu(self.bn1(self.conv1(x))) else: out = F.relu(self.conv1(x)) out = self.layer1(out) if self.last_layer == "avg": out = self.avg2d(out) out = torch.flatten(out, 1) out = self.linear(out) elif self.last_layer == "dense": out = torch.flatten(out, 1) out = F.relu(self.linear1(out)) out = self.linear2(out) return out class ResNet9(nn.Module): def __init__(self, block, num_blocks=2, num_classes=10, in_planes=64, bn=True, last_layer="avg"): super(ResNet9, self).__init__() self.in_planes = in_planes self.bn = bn self.last_layer = last_layer self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=2, padding=1, bias=not self.bn) if self.bn: self.bn1 = nn.BatchNorm2d(in_planes) self.layer1 = self._make_layer(block, in_planes*2, num_blocks, stride=2, bn=bn, kernel=3) self.layer2 = self._make_layer(block, in_planes*2, num_blocks, stride=2, bn=bn, kernel=3) if self.last_layer == "avg": self.avg2d = nn.AvgPool2d(4) self.linear = nn.Linear(in_planes * 2 * block.expansion, num_classes) elif self.last_layer == "dense": self.linear1 = nn.Linear(in_planes * 2 * block.expansion * 16, 100) self.linear2 = nn.Linear(100, num_classes) else: exit("last_layer type not supported!") def _make_layer(self, block, planes, num_blocks, stride, bn, kernel): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride, bn, kernel)) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): if self.bn: out = F.relu(self.bn1(self.conv1(x))) else: out = F.relu(self.conv1(x)) out = self.layer1(out) out = self.layer2(out) if self.last_layer == "avg": out = self.avg2d(out) out = torch.flatten(out, 1) out = self.linear(out) elif self.last_layer == "dense": out = torch.flatten(out, 1) out = F.relu(self.linear1(out)) out = self.linear2(out) return out def resnet2b(): return ResNet5(BasicBlock, num_blocks=2, in_planes=8, bn=False, last_layer="dense") def resnet4b(): return ResNet9(BasicBlock, num_blocks=2, in_planes=16, bn=False, last_layer="dense") if __name__ == '__main__': print('ResNet-2B:\n', resnet2b()) print('ResNet-4B:\n', resnet4b()) ================================================ FILE: examples/vision/models/wide_resnet_cifar.py ================================================ import torch import torch.nn as nn import torch.nn.init as init import torch.nn.functional as F from torch.autograd import Variable import sys import numpy as np def conv3x3(in_planes, out_planes, stride=1): return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True) def conv_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: init.xavier_uniform_(m.weight, gain=np.sqrt(2)) init.constant_(m.bias, 0) elif classname.find('BatchNorm') != -1: init.constant_(m.weight, 1) init.constant_(m.bias, 0) class wide_basic(nn.Module): def __init__(self, in_planes, planes, dropout_rate, stride=1, use_bn=False): super(wide_basic, self).__init__() self.use_bn = use_bn self.dropout_rate = dropout_rate if use_bn: self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True) if dropout_rate: self.dropout = nn.Dropout(p=dropout_rate) # self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True) self.shortcut = nn.Sequential() if stride != 1 or in_planes != planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True), ) def forward(self, x): # out = self.dropout(self.conv1(F.relu(self.bn1(x)))) if self.use_bn: out = self.conv1(F.relu(self.bn1(x))) else: out = self.conv1(F.relu(x)) if self.dropout_rate: out = self.dropout(out) # out = self.conv2(F.relu(self.bn2(out))) out = self.conv2(F.relu(out)) out += self.shortcut(x) return out class Wide_ResNet(nn.Module): def __init__(self, depth, widen_factor, dropout_rate, num_classes, use_bn=False, use_pooling=True): super(Wide_ResNet, self).__init__() self.in_planes = 16 self.use_bn = use_bn self.use_pooling = use_pooling assert ((depth-4)%6 ==0), 'Wide-resnet depth should be 6n+4' n = (depth-4)/6 k = widen_factor print('| Wide-Resnet %dx%d' %(depth, k)) nStages = [self.in_planes, self.in_planes*2*k, self.in_planes*4*k, self.in_planes*8*k] self.conv1 = conv3x3(3,nStages[0]) self.layer1 = self._wide_layer(wide_basic, nStages[1], n, dropout_rate, stride=1) self.layer2 = self._wide_layer(wide_basic, nStages[2], n, dropout_rate, stride=2) self.layer3 = self._wide_layer(wide_basic, nStages[3], n, dropout_rate, stride=2) # self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.1) if self.use_pooling: self.linear1 = nn.Linear(nStages[3], 512) else: self.linear1 = nn.Linear(nStages[3]*64, 512) self.linear2 = nn.Linear(512, num_classes) def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride): strides = [stride] + [1]*(int(num_blocks)-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, dropout_rate, stride, self.use_bn)) self.in_planes = planes return nn.Sequential(*layers) def forward(self, x): out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = F.relu(out) if self.use_pooling: out = F.avg_pool2d(out, 8) out = torch.flatten(out, 1) out = F.relu(self.linear1(out)) out = self.linear2(out) return out def wide_resnet_cifar(in_ch=3, in_dim=32): return Wide_ResNet(16, 4, 0.3, 10) def wide_resnet_cifar_bn(in_ch=3, in_dim=32): return Wide_ResNet(10, 4, None, 10, use_bn=True) def wide_resnet_cifar_bn_wo_pooling(in_ch=3, in_dim=32): # 1113M, 21M return Wide_ResNet(10, 4, None, 10, use_bn=True, use_pooling=False) def wide_resnet_cifar_bn_wo_pooling_dropout(in_ch=3, in_dim=32): # 1113M, 21M return Wide_ResNet(10, 4, 0.3, 10, use_bn=True, use_pooling=False) if __name__ == '__main__': from thop import profile net = wide_resnet_cifar_bn_wo_pooling_dropout() print(net) y = net(torch.randn(1,3,32,32)) macs, params = profile(net, (torch.randn(1, 3, 32, 32),)) print(macs/1000000, params/1000000) # 1096M, 5M print(y.size()) ================================================ FILE: examples/vision/models/wide_resnet_imagenet64.py ================================================ import torch import torch.nn as nn import torch.nn.init as init import torch.nn.functional as F import sys import numpy as np def conv3x3(in_planes, out_planes, stride=1): return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True) def conv_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: init.xavier_uniform_(m.weight, gain=np.sqrt(2)) init.constant_(m.bias, 0) elif classname.find('BatchNorm') != -1: init.constant_(m.weight, 1) init.constant_(m.bias, 0) class wide_basic(nn.Module): def __init__(self, in_planes, planes, dropout_rate, stride=1): super(wide_basic, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True) # self.dropout = nn.Dropout(p=dropout_rate) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True) self.shortcut = nn.Sequential() if stride != 1 or in_planes != planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True), ) def forward(self, x): # out = self.dropout(self.conv1(F.relu(self.bn1(x)))) out = self.conv1(F.relu(self.bn1(x))) out = self.conv2(F.relu(self.bn2(out))) out += self.shortcut(x) return out class Wide_ResNet(nn.Module): def __init__(self, depth, widen_factor, dropout_rate, num_classes, in_planes=16, in_dim=56): super(Wide_ResNet, self).__init__() self.in_planes = in_planes assert ((depth-4)%6 ==0), 'Wide-resnet depth should be 6n+4' n = (depth-4)/6 k = widen_factor print('| Wide-Resnet %dx%d' %(depth, k)) nStages = [in_planes, in_planes*k, in_planes*2*k, in_planes*4*k] self.conv1 = conv3x3(3,nStages[0]) self.layer1 = self._wide_layer(wide_basic, nStages[1], n, dropout_rate, stride=1) self.layer2 = self._wide_layer(wide_basic, nStages[2], n, dropout_rate, stride=2) self.layer3 = self._wide_layer(wide_basic, nStages[3], n, dropout_rate, stride=2) self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.1) self.linear = nn.Linear(nStages[3] * (in_dim//4//7)**2, num_classes) def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride): strides = [stride] + [1]*(int(num_blocks)-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, dropout_rate, stride)) self.in_planes = planes return nn.Sequential(*layers) def forward(self, x): out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = F.relu(self.bn1(out)) out = F.avg_pool2d(out, 7) out = torch.flatten(out, 1) out = self.linear(out) return out def wide_resnet_imagenet64(in_ch=3, in_dim=56, in_planes=16, widen_factor=10): return Wide_ResNet(10, widen_factor, 0.3, 200, in_dim=in_dim, in_planes=in_planes) def wide_resnet_imagenet64_1000class(in_ch=3, in_dim=56, in_planes=16, widen_factor=10): return Wide_ResNet(10, widen_factor, 0.3, 1000, in_dim=in_dim, in_planes=in_planes) if __name__ == '__main__': from thop import profile net = wide_resnet_imagenet64_1000class() print(net) y = net(torch.randn(1,3,56,56)) macs, params = profile(net, (torch.randn(1, 3, 56, 56),)) print(macs, params) # 5229M, 8M print(y.size()) ================================================ FILE: examples/vision/save_intermediate_bound.py ================================================ """ A simple example for saving intermediate bounds. """ import os import torch import torch.nn as nn import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from auto_LiRPA.utils import Flatten def mnist_model(): model = nn.Sequential( nn.Conv2d(1, 16, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 4, stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(32*7*7,100), nn.ReLU(), nn.Linear(100, 10) ) return model model = mnist_model() # Optionally, load the pretrained weights. checkpoint = torch.load( os.path.join(os.path.dirname(__file__), 'pretrained/mnist_a_adv.pth'), map_location=torch.device('cpu')) model.load_state_dict(checkpoint) test_data = torchvision.datasets.MNIST( './data', train=False, download=True, transform=torchvision.transforms.ToTensor()) # For illustration we only use 2 image from dataset N = 2 n_classes = 10 image = test_data.data[:N].view(N,1,28,28) true_label = test_data.targets[:N] # Convert to float image = image.to(torch.float32) / 255.0 if torch.cuda.is_available(): image = image.cuda() model = model.cuda() lirpa_model = BoundedModule(model, torch.empty_like(image), device=image.device) print('Running on', image.device) eps = 0.3 norm = float("inf") ptb = PerturbationLpNorm(norm = norm, eps = eps) image = BoundedTensor(image, ptb) lirpa_model.set_bound_opts({'optimize_bound_args': {'iteration': 20, 'lr_alpha': 0.1, }}) lb, ub = lirpa_model.compute_bounds(x=(image,), method='CROWN-Optimized') # Intermediate layer bounds are returned as a dictionary, and if an argument is given, # a pytorch checkpoint will also be saved to disk. save_dict = lirpa_model.save_intermediate('./mnist_a_adv_bounds.pt') # To avoid saving the file and get just the bounds, call without any arguments: # save_dict = lirpa_model.save_intermediate() ================================================ FILE: examples/vision/simple_training.py ================================================ """ A simple script to train certified defense using the auto_LiRPA library. We compute output bounds under input perturbations using auto_LiRPA, and use them to form a "robust loss" for certified defense. Several different bound options are supported, such as IBP, CROWN, and CROWN-IBP. This is a basic example on MNIST and CIFAR-10 datasets with Lp (p>=0) norm perturbation. For faster training, please see our examples with loss fusion such as cifar_training.py and tinyimagenet_training.py """ import time import random import multiprocessing import argparse import torch.optim as optim from torch.nn import CrossEntropyLoss from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from auto_LiRPA.utils import MultiAverageMeter from auto_LiRPA.eps_scheduler import LinearScheduler, AdaptiveScheduler, SmoothedScheduler, FixedScheduler import models import torchvision.datasets as datasets import torchvision.transforms as transforms parser = argparse.ArgumentParser() parser.add_argument("--verify", action="store_true", help='verification mode, do not train') parser.add_argument("--load", type=str, default="", help='Load pretrained model') parser.add_argument("--device", type=str, default="cuda", choices=["cpu", "cuda"], help='use cpu or cuda') parser.add_argument("--data", type=str, default="MNIST", choices=["MNIST", "CIFAR"], help='dataset') parser.add_argument("--seed", type=int, default=100, help='random seed') parser.add_argument("--eps", type=float, default=0.3, help='Target training epsilon') parser.add_argument("--norm", type=float, default='inf', help='p norm for epsilon perturbation') parser.add_argument("--bound_type", type=str, default="CROWN-IBP", choices=["IBP", "CROWN-IBP", "CROWN", "CROWN-FAST"], help='method of bound analysis') parser.add_argument("--model", type=str, default="resnet", help='model name (mlp_3layer, cnn_4layer, cnn_6layer, cnn_7layer, resnet)') parser.add_argument("--num_epochs", type=int, default=100, help='number of total epochs') parser.add_argument("--batch_size", type=int, default=256, help='batch size') parser.add_argument("--lr", type=float, default=5e-4, help='learning rate') parser.add_argument("--scheduler_name", type=str, default="SmoothedScheduler", choices=["LinearScheduler", "AdaptiveScheduler", "SmoothedScheduler", "FixedScheduler"], help='epsilon scheduler') parser.add_argument("--scheduler_opts", type=str, default="start=3,length=60", help='options for epsilon scheduler') parser.add_argument("--bound_opts", type=str, default=None, choices=["same-slope", "zero-lb", "one-lb"], help='bound options') parser.add_argument("--conv_mode", type=str, choices=["matrix", "patches"], default="patches") parser.add_argument("--save_model", type=str, default='') args = parser.parse_args() def Train(model, t, loader, eps_scheduler, norm, train, opt, bound_type, method='robust'): num_class = 10 meter = MultiAverageMeter() if train: model.train() eps_scheduler.train() eps_scheduler.step_epoch() eps_scheduler.set_epoch_length(int((len(loader.dataset) + loader.batch_size - 1) / loader.batch_size)) else: model.eval() eps_scheduler.eval() for i, (data, labels) in enumerate(loader): start = time.time() eps_scheduler.step_batch() eps = eps_scheduler.get_eps() # For small eps just use natural training, no need to compute LiRPA bounds batch_method = method if eps < 1e-20: batch_method = "natural" if train: opt.zero_grad() # generate specifications c = torch.eye(num_class).type_as(data)[labels].unsqueeze(1) - torch.eye(num_class).type_as(data).unsqueeze(0) # remove specifications to self I = (~(labels.data.unsqueeze(1) == torch.arange(num_class).type_as(labels.data).unsqueeze(0))) c = (c[I].view(data.size(0), num_class - 1, num_class)) # bound input for Linf norm used only if norm == np.inf: data_max = torch.reshape((1. - loader.mean) / loader.std, (1, -1, 1, 1)) data_min = torch.reshape((0. - loader.mean) / loader.std, (1, -1, 1, 1)) data_ub = torch.min(data + (eps / loader.std).view(1,-1,1,1), data_max) data_lb = torch.max(data - (eps / loader.std).view(1,-1,1,1), data_min) else: data_ub = data_lb = data if list(model.parameters())[0].is_cuda: data, labels, c = data.cuda(), labels.cuda(), c.cuda() data_lb, data_ub = data_lb.cuda(), data_ub.cuda() # Specify Lp norm perturbation. # When using Linf perturbation, we manually set element-wise bound x_L and x_U. eps is not used for Linf norm. if norm > 0: ptb = PerturbationLpNorm(norm=norm, eps=eps, x_L=data_lb, x_U=data_ub) elif norm == 0: ptb = PerturbationL0Norm(eps = eps_scheduler.get_max_eps(), ratio = eps_scheduler.get_eps()/eps_scheduler.get_max_eps()) x = BoundedTensor(data, ptb) output = model(x) regular_ce = CrossEntropyLoss()(output, labels) # regular CrossEntropyLoss used for warming up meter.update('CE', regular_ce.item(), x.size(0)) meter.update('Err', torch.sum(torch.argmax(output, dim=1) != labels).cpu().detach().numpy() / x.size(0), x.size(0)) if batch_method == "robust": if bound_type == "IBP": lb, ub = model.compute_bounds(IBP=True, C=c, method=None) elif bound_type == "CROWN": lb, ub = model.compute_bounds(IBP=False, C=c, method="backward", bound_upper=False) elif bound_type == "CROWN-IBP": # lb, ub = model.compute_bounds(ptb=ptb, IBP=True, x=data, C=c, method="backward") # pure IBP bound # we use a mixed IBP and CROWN-IBP bounds, leading to better performance (Zhang et al., ICLR 2020) factor = (eps_scheduler.get_max_eps() - eps) / eps_scheduler.get_max_eps() ilb, iub = model.compute_bounds(IBP=True, C=c, method=None) if factor < 1e-5: lb = ilb else: clb, cub = model.compute_bounds(IBP=False, C=c, method="backward", bound_upper=False) lb = clb * factor + ilb * (1 - factor) elif bound_type == "CROWN-FAST": # Similar to CROWN-IBP but no mix between IBP and CROWN bounds. lb, ub = model.compute_bounds(IBP=True, C=c, method=None) lb, ub = model.compute_bounds(IBP=False, C=c, method="backward", bound_upper=False) # Pad zero at the beginning for each example, and use fake label "0" for all examples lb_padded = torch.cat((torch.zeros(size=(lb.size(0),1), dtype=lb.dtype, device=lb.device), lb), dim=1) fake_labels = torch.zeros(size=(lb.size(0),), dtype=torch.int64, device=lb.device) robust_ce = CrossEntropyLoss()(-lb_padded, fake_labels) if batch_method == "robust": loss = robust_ce elif batch_method == "natural": loss = regular_ce if train: loss.backward() eps_scheduler.update_loss(loss.item() - regular_ce.item()) opt.step() meter.update('Loss', loss.item(), data.size(0)) if batch_method != "natural": meter.update('Robust_CE', robust_ce.item(), data.size(0)) # For an example, if lower bounds of margins is >0 for all classes, the output is verifiably correct. # If any margin is < 0 this example is counted as an error meter.update('Verified_Err', torch.sum((lb < 0).any(dim=1)).item() / data.size(0), data.size(0)) meter.update('Time', time.time() - start) if i % 50 == 0 and train: print('[{:2d}:{:4d}]: eps={:.8f} {}'.format(t, i, eps, meter)) print('[{:2d}:{:4d}]: eps={:.8f} {}'.format(t, i, eps, meter)) def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Step 1: Initial original model as usual, see model details in models/example_feedforward.py and models/example_resnet.py if args.data == 'MNIST': model_ori = models.Models[args.model](in_ch=1, in_dim=28) else: model_ori = models.Models[args.model](in_ch=3, in_dim=32) if args.load: state_dict = torch.load(args.load)['state_dict'] model_ori.load_state_dict(state_dict) ## Step 2: Prepare dataset as usual if args.data == 'MNIST': dummy_input = torch.randn(2, 1, 28, 28) train_data = datasets.MNIST("./data", train=True, download=True, transform=transforms.ToTensor()) test_data = datasets.MNIST("./data", train=False, download=True, transform=transforms.ToTensor()) elif args.data == 'CIFAR': dummy_input = torch.randn(2, 3, 32, 32) normalize = transforms.Normalize(mean = [0.4914, 0.4822, 0.4465], std = [0.2023, 0.1994, 0.2010]) train_data = datasets.CIFAR10("./data", train=True, download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), normalize])) test_data = datasets.CIFAR10("./data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor(), normalize])) train_data = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),4)) test_data = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, pin_memory=True, num_workers=min(multiprocessing.cpu_count(),4)) if args.data == 'MNIST': train_data.mean = test_data.mean = torch.tensor([0.0]) train_data.std = test_data.std = torch.tensor([1.0]) elif args.data == 'CIFAR': train_data.mean = test_data.mean = torch.tensor([0.4914, 0.4822, 0.4465]) train_data.std = test_data.std = torch.tensor([0.2023, 0.1994, 0.2010]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, bound_opts={'activation_bound_option':args.bound_opts, 'conv_mode': args.conv_mode}, device=args.device) ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler opt = optim.Adam(model.parameters(), lr=args.lr) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.StepLR(opt, step_size=10, gamma=0.5) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) print("Model structure: \n", str(model_ori)) ## Step 5: start training if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model, 1, test_data, eps_scheduler, norm, False, None, args.bound_type) else: timer = 0.0 for t in range(1, args.num_epochs+1): if eps_scheduler.reached_max_eps(): # Only decay learning rate after reaching the maximum eps lr_scheduler.step() print("Epoch {}, learning rate {}".format(t, lr_scheduler.get_lr())) start_time = time.time() Train(model, t, train_data, eps_scheduler, norm, True, opt, args.bound_type) epoch_time = time.time() - start_time timer += epoch_time print('Epoch time: {:.4f}, Total time: {:.4f}'.format(epoch_time, timer)) print("Evaluating...") with torch.no_grad(): Train(model, t, test_data, eps_scheduler, norm, False, None, args.bound_type) torch.save({'state_dict': model_ori.state_dict(), 'epoch': t}, args.save_model if args.save_model != "" else args.model) if __name__ == "__main__": main(args) ================================================ FILE: examples/vision/simple_verification.py ================================================ """ A simple example for bounding neural network outputs under input perturbations. This example serves as a skeleton for robustness verification of neural networks. """ import os from collections import defaultdict import torch import torch.nn as nn import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from auto_LiRPA.utils import Flatten ## Step 1: Define computational graph by implementing forward() # This simple model comes from https://github.com/locuslab/convex_adversarial def mnist_model(): model = nn.Sequential( nn.Conv2d(1, 16, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 4, stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(32*7*7,100), nn.ReLU(), nn.Linear(100, 10) ) return model model = mnist_model() # Optionally, load the pretrained weights. checkpoint = torch.load( os.path.join(os.path.dirname(__file__), 'pretrained/mnist_a_adv.pth'), map_location=torch.device('cpu')) model.load_state_dict(checkpoint) ## Step 2: Prepare dataset as usual test_data = torchvision.datasets.MNIST( './data', train=False, download=True, transform=torchvision.transforms.ToTensor()) # For illustration we only use 2 image from dataset N = 2 n_classes = 10 image = test_data.data[:N].view(N,1,28,28) true_label = test_data.targets[:N] # Convert to float image = image.to(torch.float32) / 255.0 if torch.cuda.is_available(): image = image.cuda() model = model.cuda() ## Step 3: wrap model with auto_LiRPA # The second parameter is for constructing the trace of the computational graph, # and its content is not important. lirpa_model = BoundedModule(model, torch.empty_like(image), device=image.device) print('Running on', image.device) # Visualize the lirpa_model # Visualization file is saved as "bounded_mnist_model.png" or "bounded_mnist_model.dot" lirpa_model.visualize("bounded_mnist_model") print() ## Step 4: Compute bounds using LiRPA given a perturbation eps = 0.3 norm = float("inf") ptb = PerturbationLpNorm(norm = norm, eps = eps) image = BoundedTensor(image, ptb) # Get model prediction as usual pred = lirpa_model(image) label = torch.argmax(pred, dim=1).cpu().detach().numpy() print('Demonstration 1: Bound computation and comparisons of different methods.\n') ## Step 5: Compute bounds for final output for method in [ 'IBP', 'IBP+backward (CROWN-IBP)', 'backward (CROWN)', 'CROWN-Optimized (alpha-CROWN)']: print('Bounding method:', method) if 'Optimized' in method: # For optimized bound, you can change the number of iterations, learning rate, etc here. Also you can increase verbosity to see per-iteration loss values. lirpa_model.set_bound_opts({'optimize_bound_args': {'iteration': 20, 'lr_alpha': 0.1}}) lb, ub = lirpa_model.compute_bounds(x=(image,), method=method.split()[0]) for i in range(N): print(f'Image {i} top-1 prediction {label[i]} ground-truth {true_label[i]}') for j in range(n_classes): indicator = '(ground-truth)' if j == true_label[i] else '' print('f_{j}(x_0): {l:8.3f} <= f_{j}(x_0+delta) <= {u:8.3f} {ind}'.format( j=j, l=lb[i][j].item(), u=ub[i][j].item(), ind=indicator)) print() print('Demonstration 2: Obtaining linear coefficients of the lower and upper bounds.\n') # There are many bound coefficients during CROWN bound calculation; here we are interested in the linear bounds # of the output layer, with respect to the input layer (the image). required_A = defaultdict(set) required_A[lirpa_model.output_name[0]].add(lirpa_model.input_name[0]) # Helper functions to concretize the linear bounds def concretize_bound(A, bias, xL, xU, upper: bool): """ Concretize linear bound. If upper is True: use A_pos * xU + A_neg * xL + bias If upper is False: use A_pos * xL + A_neg * xU + bias """ A_pos = torch.clamp(A, min=0.0) A_neg = torch.clamp(A, max=0.0) if upper: return ( torch.einsum("boijk,boijk->bo", A_pos, xU) + torch.einsum("boijk,boijk->bo", A_neg, xL) + bias ) else: return ( torch.einsum("boijk,boijk->bo", A_pos, xL) + torch.einsum("boijk,boijk->bo", A_neg, xU) + bias ) # Prepare input bounds x_L = (image - eps).unsqueeze(1) x_U = (image + eps).unsqueeze(1) for method in [ 'IBP+backward (CROWN-IBP)', 'backward (CROWN)', 'CROWN', 'CROWN-Optimized (alpha-CROWN)']: print("Bounding method:", method) if 'Optimized' in method: # For optimized bound, you can change the number of iterations, learning rate, etc here. Also you can increase verbosity to see per-iteration loss values. lirpa_model.set_bound_opts({'optimize_bound_args': {'iteration': 30, 'lr_alpha': 0.1}}) lb, ub, A_dict = lirpa_model.compute_bounds(x=(image,), method=method.split()[0], return_A=True, needed_A_dict=required_A) lower_A, lower_bias = A_dict[lirpa_model.output_name[0]][lirpa_model.input_name[0]]['lA'], A_dict[lirpa_model.output_name[0]][lirpa_model.input_name[0]]['lbias'] upper_A, upper_bias = A_dict[lirpa_model.output_name[0]][lirpa_model.input_name[0]]['uA'], A_dict[lirpa_model.output_name[0]][lirpa_model.input_name[0]]['ubias'] print(f'lower bound linear coefficients size (batch, output_dim, *input_dims): {list(lower_A.size())}') print(f'lower bound linear coefficients norm (smaller is better): {lower_A.norm()}') print(f'lower bound bias term size (batch, output_dim): {list(lower_bias.size())}') print(f'lower bound bias term sum (larger is better): {lower_bias.sum()}') print(f'upper bound linear coefficients size (batch, output_dim, *input_dims): {list(upper_A.size())}') print(f'upper bound linear coefficients norm (smaller is better): {upper_A.norm()}') print(f'upper bound bias term size (batch, output_dim): {list(upper_bias.size())}') print(f'upper bound bias term sum (smaller is better): {upper_bias.sum()}') print(f'These linear lower and upper bounds are valid everywhere within the perturbation radii.\n') # Validate the concretization of the linear bounds concretized_lb = concretize_bound(lower_A, lower_bias, x_L, x_U, upper=False) concretized_ub = concretize_bound(upper_A, upper_bias, x_L, x_U, upper=True) assert torch.allclose( concretized_lb, lb, rtol=1e-4, atol=1e-5), "Lower bound mismatch! Error: {}".format((concretized_lb - lb).abs().max()) assert torch.allclose( concretized_ub, ub, rtol=1e-4, atol=1e-5), "Upper bound mismatch! Error: {}".format((concretized_ub - ub).abs().max()) ## An example for computing margin bounds. # In compute_bounds() function you can pass in a specification matrix C, which is a final linear matrix applied to the last layer NN output. # For example, if you are interested in the margin between the groundtruth class and another class, you can use C to specify the margin. # This generally yields tighter bounds. # Here we compute the margin between groundtruth class and groundtruth class + 1. # If you have more than 1 specifications per batch element, you can expand the second dimension of C (it is 1 here for demonstration). lirpa_model = BoundedModule(model, torch.empty_like(image), device=image.device) C = torch.zeros(size=(N, 1, n_classes), device=image.device) groundtruth = true_label.to(device=image.device).unsqueeze(1).unsqueeze(1) target_label = (groundtruth + 1) % n_classes C.scatter_(dim=2, index=groundtruth, value=1.0) C.scatter_(dim=2, index=target_label, value=-1.0) print('Demonstration 3: Computing bounds with a specification matrix.\n') print('Specification matrix:\n', C) for method in ['IBP', 'IBP+backward (CROWN-IBP)', 'backward (CROWN)', 'CROWN-Optimized (alpha-CROWN)']: print('Bounding method:', method) if 'Optimized' in method: # For optimized bound, you can change the number of iterations, learning rate, etc here. Also you can increase verbosity to see per-iteration loss values. lirpa_model.set_bound_opts({'optimize_bound_args': {'iteration': 20, 'lr_alpha': 0.1, }}) lb, ub = lirpa_model.compute_bounds(x=(image,), method=method.split()[0], C=C) for i in range(N): print('Image {} top-1 prediction {} ground-truth {}'.format(i, label[i], true_label[i])) print('margin bounds: {l:8.3f} <= f_{j}(x_0+delta) - f_{target}(x_0+delta) <= {u:8.3f}'.format( j=true_label[i], target=(true_label[i] + 1) % n_classes, l=lb[i][0].item(), u=ub[i][0].item())) print() ================================================ FILE: examples/vision/tinyimagenet_training.py ================================================ import os import random import time import argparse import multiprocessing import logging import torch.optim as optim from torch.nn import CrossEntropyLoss from auto_LiRPA import BoundedModule, BoundedTensor, BoundDataParallel, CrossEntropyWrapper from auto_LiRPA.bound_ops import BoundExp from auto_LiRPA.perturbations import * from auto_LiRPA.utils import MultiAverageMeter, logger, get_spec_matrix, sync_params import models import torchvision.datasets as datasets import torchvision.transforms as transforms from auto_LiRPA.eps_scheduler import * def get_exp_module(bounded_module): for _, node in bounded_module.named_modules(): # Find the Exp neuron in computational graph if isinstance(node, BoundExp): return node return None parser = argparse.ArgumentParser() parser.add_argument("--verify", action="store_true", help='verification mode, do not train') parser.add_argument("--load", type=str, default="", help='Load pretrained model') parser.add_argument("--device", type=str, default="cuda", choices=["cpu", "cuda"], help='use cpu or cuda') parser.add_argument("--data_dir", type=str, default="data/tinyImageNet/tiny-imagenet-200", help='dir of dataset') parser.add_argument("--seed", type=int, default=100, help='random seed') parser.add_argument("--eps", type=float, default=1. / 255, help='Target training epsilon') parser.add_argument("--norm", type=float, default='inf', help='p norm for epsilon perturbation') parser.add_argument("--bound_type", type=str, default="CROWN-IBP", choices=["IBP", "CROWN-IBP", "CROWN"], help='method of bound analysis') parser.add_argument("--model", type=str, default="wide_resnet_imagenet64", help='model name (cnn_7layer_bn_imagenet, ResNeXt_imagenet64, ResNeXt_imagenet64)') parser.add_argument("--num_epochs", type=int, default=600, help='number of total epochs') parser.add_argument("--batch_size", type=int, default=128, help='batch size') parser.add_argument("--lr", type=float, default=5e-4, help='learning rate') parser.add_argument("--lr_decay_milestones", nargs='+', type=int, default=[600, 700], help='learning rate dacay milestones') parser.add_argument("--scheduler_name", type=str, default="SmoothedScheduler", choices=["LinearScheduler", "AdaptiveScheduler", "SmoothedScheduler"], help='epsilon scheduler') parser.add_argument("--scheduler_opts", type=str, default="start=100,length=400,mid=0.4", help='options for epsilon scheduler') parser.add_argument("--bound_opts", type=str, default=None, choices=["same-slope", "zero-lb", "one-lb"], help='bound options') parser.add_argument('--clip_grad_norm', type=float, default=8.0) parser.add_argument('--in_planes', type=int, default=16) parser.add_argument('--widen_factor', type=int, default=10) args = parser.parse_args() exp_name = args.model + '_b' + str(args.batch_size) + '_' + str(args.bound_type) + '_epoch' + str( args.num_epochs) + '_' + args.scheduler_opts + '_ImageNet_' + str(args.eps)[:6] os.makedirs('saved_models/', exist_ok=True) log_file = f'saved_models/{exp_name}{"_test" if args.verify else ""}.log' file_handler = logging.FileHandler(log_file) logger.addHandler(file_handler) def Train(model, t, loader, eps_scheduler, norm, train, opt, bound_type, method='robust', loss_fusion=True, final_node_name=None): num_class = 200 meter = MultiAverageMeter() if train: model.train() eps_scheduler.train() eps_scheduler.step_epoch() eps_scheduler.set_epoch_length(int((len(loader.dataset) + loader.batch_size - 1) / loader.batch_size)) else: model.eval() eps_scheduler.eval() exp_module = get_exp_module(model) def get_bound_loss(x=None, c=None): if loss_fusion: bound_lower, bound_upper = False, True else: bound_lower, bound_upper = True, False if bound_type == 'IBP': lb, ub = model(method_opt="compute_bounds", x=x, IBP=True, C=c, method=None, final_node_name=final_node_name, no_replicas=True) elif bound_type == 'CROWN': lb, ub = model(method_opt="compute_bounds", x=x, IBP=False, C=c, method='backward', bound_lower=bound_lower, bound_upper=bound_upper) elif bound_type == 'CROWN-IBP': # lb, ub = model.compute_bounds(ptb=ptb, IBP=True, x=data, C=c, method='backward') # pure IBP bound # we use a mixed IBP and CROWN-IBP bounds, leading to better performance (Zhang et al., ICLR 2020) factor = (eps_scheduler.get_max_eps() - eps_scheduler.get_eps()) / eps_scheduler.get_max_eps() ilb, iub = model(method_opt="compute_bounds", x=x, IBP=True, C=c, method=None, final_node_name=final_node_name, no_replicas=True) if factor < 1e-50: lb, ub = ilb, iub else: clb, cub = model(method_opt="compute_bounds", IBP=False, C=c, method='backward', bound_lower=bound_lower, bound_upper=bound_upper, final_node_name=final_node_name, no_replicas=True) if loss_fusion: ub = cub * factor + iub * (1 - factor) else: lb = clb * factor + ilb * (1 - factor) if loss_fusion: if isinstance(model, BoundDataParallel): max_input = model(get_property=True, node_class=BoundExp, att_name='max_input') else: max_input = exp_module.max_input return None, torch.mean(torch.log(ub) + max_input) else: # Pad zero at the beginning for each example, and use fake label '0' for all examples lb_padded = torch.cat((torch.zeros(size=(lb.size(0), 1), dtype=lb.dtype, device=lb.device), lb), dim=1) fake_labels = torch.zeros(size=(lb.size(0),), dtype=torch.int64, device=lb.device) robust_ce = CrossEntropyLoss()(-lb_padded, fake_labels) return lb, robust_ce for i, (data, labels) in enumerate(loader): start = time.time() eps_scheduler.step_batch() eps = eps_scheduler.get_eps() # For small eps just use natural training, no need to compute LiRPA bounds batch_method = method if eps < 1e-50: batch_method = "natural" if train: opt.zero_grad() # bound input for Linf norm used only if norm == np.inf: data_max = torch.reshape((1. - loader.mean) / loader.std, (1, -1, 1, 1)) data_min = torch.reshape((0. - loader.mean) / loader.std, (1, -1, 1, 1)) data_ub = torch.min(data + (eps / loader.std).view(1, -1, 1, 1), data_max) data_lb = torch.max(data - (eps / loader.std).view(1, -1, 1, 1), data_min) else: data_ub = data_lb = data if list(model.parameters())[0].is_cuda: data, labels = data.cuda(), labels.cuda() data_lb, data_ub = data_lb.cuda(), data_ub.cuda() ptb = PerturbationLpNorm(norm=norm, eps=eps, x_L=data_lb, x_U=data_ub) x = BoundedTensor(data, ptb) if loss_fusion: if batch_method == 'natural' or not train: output = model(x, labels) regular_ce = torch.mean(torch.log(output)) else: model(x, labels) regular_ce = torch.tensor(0., device=data.device) meter.update('CE', regular_ce.item(), x.size(0)) x = (x, labels) c = None else: c = get_spec_matrix(data, labels, num_class) x = (x, labels) output = model(x, final_node_name=final_node_name) regular_ce = CrossEntropyLoss()(output, labels) # regular CrossEntropyLoss used for warming up meter.update('CE', regular_ce.item(), x[0].size(0)) meter.update('Err', torch.sum(torch.argmax(output, dim=1) != labels).item() / x[0].size(0), x[0].size(0)) if batch_method == 'robust': # print(data.sum()) lb, robust_ce = get_bound_loss(x=x, c=c) loss = robust_ce elif batch_method == 'natural': loss = regular_ce if train: loss.backward() if args.clip_grad_norm: grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.clip_grad_norm) meter.update('grad_norm', grad_norm) if isinstance(eps_scheduler, AdaptiveScheduler): eps_scheduler.update_loss(loss.item() - regular_ce.item()) opt.step() meter.update('Loss', loss.item(), data.size(0)) if batch_method != 'natural': meter.update('Robust_CE', robust_ce.item(), data.size(0)) if not loss_fusion: # For an example, if lower bounds of margins is >0 for all classes, the output is verifiably correct. # If any margin is < 0 this example is counted as an error meter.update('Verified_Err', torch.sum((lb < 0).any(dim=1)).item() / data.size(0), data.size(0)) meter.update('Time', time.time() - start) if (i + 1) % 250 == 0 and train: logger.info('[{:2d}:{:4d}]: eps={:.12f} {}'.format(t, i + 1, eps, meter)) logger.info('[{:2d}:{:4d}]: eps={:.12f} {}'.format(t, i + 1, eps, meter)) return meter def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Step 1: Initial original model as usual, see model details in models/example_feedforward.py and models/example_resnet.py model_ori = models.Models[args.model](in_planes=args.in_planes, widen_factor=args.widen_factor) epoch = 0 if args.load: checkpoint = torch.load(args.load) epoch, state_dict, opt_state = checkpoint['epoch'], checkpoint['state_dict'], checkpoint.get('optimizer') for k, v in state_dict.items(): assert torch.isnan(v).any().cpu().numpy() == 0 and torch.isinf(v).any().cpu().numpy() == 0 model_ori.load_state_dict(state_dict) logger.info('Checkpoint loaded: {}'.format(args.load)) ## Step 2: Prepare dataset as usual dummy_input = torch.randn(2, 3, 56, 56) normalize = transforms.Normalize(mean=[0.4802, 0.4481, 0.3975], std=[0.2302, 0.2265, 0.2262]) train_data = datasets.ImageFolder(args.data_dir + '/train', transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(56, padding_mode='edge'), transforms.ToTensor(), normalize, ])) test_data = datasets.ImageFolder(args.data_dir + '/val', transform=transforms.Compose([ # transforms.RandomResizedCrop(64, scale=(0.875, 0.875), ratio=(1., 1.)), transforms.CenterCrop(56), transforms.ToTensor(), normalize])) train_data = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=min(multiprocessing.cpu_count(), 4)) test_data = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size // 5, pin_memory=True, num_workers=min(multiprocessing.cpu_count(), 4)) train_data.mean = test_data.mean = torch.tensor([0.4802, 0.4481, 0.3975]) train_data.std = test_data.std = torch.tensor([0.2302, 0.2265, 0.2262]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, bound_opts={'activation_bound_option':args.bound_opts}, device=args.device) model_loss = BoundedModule(CrossEntropyWrapper(model_ori), (dummy_input, torch.zeros(1, dtype=torch.long)), bound_opts= { 'activation_bound_option': args.bound_opts, 'loss_fusion': True }, device=args.device) model_loss = BoundDataParallel(model_loss) ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler opt = optim.Adam(model_loss.parameters(), lr=args.lr) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=args.lr_decay_milestones, gamma=0.1) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) logger.info(str(model_ori)) if args.load: if opt_state: opt.load_state_dict(opt_state) logger.info('resume opt_state') # skip epochs if epoch > 0: epoch_length = int((len(train_data.dataset) + train_data.batch_size - 1) / train_data.batch_size) eps_scheduler.set_epoch_length(epoch_length) eps_scheduler.train() for i in range(epoch): lr_scheduler.step() eps_scheduler.step_epoch(verbose=True) for j in range(epoch_length): eps_scheduler.step_batch() logger.info('resume from eps={:.12f}'.format(eps_scheduler.get_eps())) ## Step 5: start training if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model, 1, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False, final_node_name=None) else: timer = 0.0 best_err = 1e10 for t in range(epoch + 1, args.num_epochs + 1): logger.info("Epoch {}, learning rate {}".format(t, lr_scheduler.get_last_lr())) start_time = time.time() Train(model_loss, t, train_data, eps_scheduler, norm, True, opt, args.bound_type, loss_fusion=True) lr_scheduler.step() epoch_time = time.time() - start_time timer += epoch_time logger.info('Epoch time: {:.4f}, Total time: {:.4f}'.format(epoch_time, timer)) logger.info("Evaluating...") torch.cuda.empty_cache() state_dict = sync_params(model_ori, model_loss, loss_fusion=True) with torch.no_grad(): if int(eps_scheduler.params['start']) + int(eps_scheduler.params['length']) > t >= int( eps_scheduler.params['start']): m = Train(model_loss, t, test_data, eps_scheduler, norm, False, None, args.bound_type, loss_fusion=True) else: model_ori.load_state_dict(state_dict) model = BoundedModule(model_ori, dummy_input, bound_opts={'activation_bound_option':args.bound_opts}, device=args.device) model = BoundDataParallel(model) m = Train(model, t, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False) del model save_dict = {'state_dict': state_dict, 'epoch': t, 'optimizer': opt.state_dict()} if t < int(eps_scheduler.params['start']): torch.save(save_dict, 'saved_models/natural_' + exp_name) elif t > int(eps_scheduler.params['start']) + int(eps_scheduler.params['length']): current_err = m.avg('Verified_Err') if current_err < best_err: best_err = current_err torch.save(save_dict, 'saved_models/' + exp_name + '_best_' + str(best_err)[:6]) else: torch.save(save_dict, 'saved_models/' + exp_name) torch.cuda.empty_cache() if __name__ == "__main__": logger.info(args) main(args) ================================================ FILE: examples/vision/verify_two_node.py ================================================ """ Example for multi-node perturbation. An input image is splited to two parts where each part is perturbed respectively constained by L-inf norm. It is expected to output the same results as running `simple_verification.py` where the whole image is perturbed constained by L-inf norm. """ import os import torch.nn as nn import torch.nn.functional as F import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * ## Step 1: Define computational graph by implementing forward() class cnn_MNIST(nn.Module): def __init__(self): super(cnn_MNIST, self).__init__() self.conv1 = nn.Conv2d(1, 8, 4, stride=2, padding=1) self.conv2 = nn.Conv2d(8, 16, 4, stride=2, padding=1) self.fc1 = nn.Linear(784, 256) self.fc2 = nn.Linear(256, 10) def forward(self, x, y): x = torch.cat([x, y], dim=2) # concat the two parts of input x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = x.view(-1, 784) x = F.relu(self.fc1(x)) x = self.fc2(x) return x model = cnn_MNIST() # Load the pretrained weights checkpoint = torch.load(os.path.join(os.path.dirname(__file__),"pretrained/mnist_cnn_small.pth"), map_location=torch.device('cpu')) model.load_state_dict(checkpoint) ## Step 2: Prepare dataset as usual test_data = torchvision.datasets.MNIST( "./data", train=False, download=True, transform=torchvision.transforms.ToTensor()) # For illustration we only use 2 image from dataset N = 2 n_classes = 10 image = test_data.data[:N].view(N,1,28,28) # Convert to float image = image.to(torch.float32) / 255.0 if torch.cuda.is_available(): image = image.cuda() model = model.cuda() ## Step 3: wrap model with auto_LiRPA # The second parameter is for constructing the trace of the computational graph, # and its content is not important. image_1, image_2 = torch.split(torch.empty_like(image), [14, 14], dim=2) model = BoundedModule( model, (image_1, image_2), device=image.device, bound_opts={'conv_mode': 'matrix'} # Patches mode is not supported currently ) ## Step 4: Compute bounds using LiRPA given a perturbation eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image_1, image_2 = torch.split(image, [14, 14], dim=2) image_1 = BoundedTensor(image_1, ptb) image_2 = BoundedTensor(image_2, ptb) # Get model prediction as usual pred = model(image_1, image_2) label = torch.argmax(pred, dim=1).cpu().numpy() # Compute bounds lb, ub = model.compute_bounds() ## Step 5: Final output pred = pred.detach().cpu().numpy() lb = lb.detach().cpu().numpy() ub = ub.detach().cpu().numpy() for i in range(N): print("Image {} top-1 prediction {}".format(i, label[i])) for j in range(n_classes): print("f_{j}(x_0) = {fx0:8.3f}, {l:8.3f} <= f_{j}(x_0+delta) <= {u:8.3f}".format( j=j, fx0=pred[i][j], l=lb[i][j], u=ub[i][j])) print() ================================================ FILE: examples/vision/weight_perturbation_training.py ================================================ """ A simple example for certified robustness against model weight perturbations. Since our framework works on general computational graphs, where both model weights and model inputs are inputs of the computational graph, our perturbation analysis can naturally be applied to the model weights, allowing analysis for certified model robustness under weight perturbations. This file provides a simple example of certified defense for model weight perturbations. See our paper https://arxiv.org/abs/2002.12920 for more details. """ import random import time import os import argparse import logging import torch.optim as optim from torch.nn import CrossEntropyLoss from auto_LiRPA import BoundedModule, CrossEntropyWrapper, BoundDataParallel, BoundedParameter from auto_LiRPA.bound_ops import BoundExp from auto_LiRPA.perturbations import * from auto_LiRPA.utils import MultiAverageMeter, logger, get_spec_matrix from datasets import mnist_loaders import torchvision.datasets as datasets import models from auto_LiRPA.eps_scheduler import LinearScheduler, AdaptiveScheduler, SmoothedScheduler, FixedScheduler def get_exp_module(bounded_module): for _, node in bounded_module.named_modules(): # Find the Exp neuron in computational graph if isinstance(node, BoundExp): return node return None parser = argparse.ArgumentParser() parser.add_argument("--verify", action="store_true", help='verification mode, do not train') parser.add_argument("--load", type=str, default="", help='Load pretrained model') parser.add_argument("--device", type=str, default="cuda", choices=["cpu", "cuda"], help='use cpu or cuda') parser.add_argument("--data", type=str, default="MNIST", choices=["MNIST", "FashionMNIST"], help='dataset') parser.add_argument("--ratio", type=float, default=None, help='percent of training used, None means whole training data') parser.add_argument("--seed", type=int, default=100, help='random seed') parser.add_argument("--eps", type=float, default=0.1, help='Target training epsilon for weight perturbations') parser.add_argument("--norm", type=float, default='inf', help='p norm for epsilon perturbation') parser.add_argument("--bound_type", type=str, default="CROWN-IBP", choices=["IBP", "CROWN-IBP", "CROWN"], help='method of bound analysis') parser.add_argument("--opt", type=str, default='ADAM', choices=["ADAM", "SGD"], help='optimizer') parser.add_argument("--num_epochs", type=int, default=150, help='number of total epochs') parser.add_argument("--batch_size", type=int, default=256, help='batch size') parser.add_argument("--lr", type=float, default=0.001, help='learning rate') parser.add_argument("--lr_decay_milestones", nargs='+', type=int, default=[120, 140], help='learning rate dacay milestones') parser.add_argument("--scheduler_name", type=str, default="LinearScheduler", choices=["LinearScheduler", "AdaptiveScheduler", "SmoothedScheduler"], help='epsilon scheduler') parser.add_argument("--scheduler_opts", type=str, default="start=10,length=100", help='options for epsilon scheduler') parser.add_argument("--bound_opts", type=str, default=None, choices=["same-slope", "zero-lb", "one-lb"], help='bound options') parser.add_argument('--clip_grad_norm', type=float, default=8.0) parser.add_argument('--truncate_data', type=int, help='Truncate the training/test batches in unit test') parser.add_argument('--multigpu', action='store_true', help='MultiGPU training') num_class = 10 args = parser.parse_args() exp_name = 'mlp_MNIST'+'_b'+str(args.batch_size)+'_'+str(args.bound_type)+'_epoch'+str(args.num_epochs)+'_'+args.scheduler_opts+'_'+str(args.eps)[:6] log_file = f'{exp_name}{"_test" if args.verify else ""}.log' file_handler = logging.FileHandler(log_file) logger.addHandler(file_handler) ## Training one epoch. def Train(model, t, loader, eps_scheduler, norm, train, opt, bound_type, method='robust', loss_fusion=True, final_node_name=None): meter = MultiAverageMeter() if train: model.train() eps_scheduler.train() eps_scheduler.step_epoch(verbose=False) eps_scheduler.set_epoch_length(int((len(loader.dataset) + loader.batch_size - 1) / loader.batch_size)) else: model.eval() eps_scheduler.eval() # Used for loss-fusion. Get the exp operation in computational graph. exp_module = get_exp_module(model) def get_bound_loss(x=None, c=None): if loss_fusion: # When loss fusion is used, we need the upper bound for the final loss function. bound_lower, bound_upper = False, True else: # When loss fusion is not used, we need the lower bound for the logit layer. bound_lower, bound_upper = True, False if bound_type == 'IBP': lb, ub = model(method_opt="compute_bounds", x=x, C=c, method="IBP", final_node_name=final_node_name, no_replicas=True) elif bound_type == 'CROWN': lb, ub = model(method_opt="compute_bounds", x=x, C=c, method="backward", bound_lower=bound_lower, bound_upper=bound_upper) elif bound_type == 'CROWN-IBP': # we use a mixed IBP and CROWN-IBP bounds, leading to better performance (Zhang et al., ICLR 2020) # factor = (eps_scheduler.get_max_eps() - eps_scheduler.get_eps()) / eps_scheduler.get_max_eps() ilb, iub = model(method_opt="compute_bounds", x=x, C=c, method="IBP", final_node_name=final_node_name, no_replicas=True) lb, ub = model(method_opt="compute_bounds", C=c, method="CROWN-IBP", bound_lower=bound_lower, bound_upper=bound_upper, final_node_name=final_node_name, average_A=True, no_replicas=True) if loss_fusion: # When loss fusion is enabled, we need to get the common factor before softmax. if isinstance(model, BoundDataParallel): max_input = model(get_property=True, node_class=BoundExp, att_name='max_input') else: max_input = exp_module.max_input return None, torch.mean(torch.log(ub) + max_input) else: # Pad zero at the beginning for each example, and use fake label '0' for all examples lb_padded = torch.cat((torch.zeros(size=(lb.size(0), 1), dtype=lb.dtype, device=lb.device), lb), dim=1) fake_labels = torch.zeros(size=(lb.size(0),), dtype=torch.int64, device=lb.device) robust_ce = CrossEntropyLoss()(-lb_padded, fake_labels) return lb, robust_ce for i, (data, labels) in enumerate(loader): # For unit test. We only use a small number of batches if args.truncate_data: if i >= args.truncate_data: break start = time.time() eps_scheduler.step_batch() eps = eps_scheduler.get_eps() # For small eps just use natural training, no need to compute LiRPA bounds batch_method = method if eps < 1e-50: batch_method = "natural" if train: opt.zero_grad() if list(model.parameters())[0].is_cuda: data, labels = data.cuda(), labels.cuda() model.ptb.eps = eps x = data if loss_fusion: if batch_method == 'natural' or not train: output = model(x, labels) # , disable_multi_gpu=True regular_ce = torch.mean(torch.log(output)) else: model(x, labels) regular_ce = torch.tensor(0., device=data.device) meter.update('CE', regular_ce.item(), x.size(0)) x = (x, labels) c = None else: # Generate speicification matrix (when loss fusion is not used). c = get_spec_matrix(data, labels, num_class) x = (x, labels) output = model(x, final_node_name=final_node_name) regular_ce = CrossEntropyLoss()(output, labels) # regular CrossEntropyLoss used for warming up meter.update('CE', regular_ce.item(), x[0].size(0)) meter.update('Err', torch.sum(torch.argmax(output, dim=1) != labels).item() / x[0].size(0), x[0].size(0)) if batch_method == 'robust': lb, robust_ce = get_bound_loss(x=x, c=c) loss = robust_ce elif batch_method == 'natural': loss = regular_ce if train: loss.backward() if args.clip_grad_norm: grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.clip_grad_norm) meter.update('grad_norm', grad_norm) if isinstance(eps_scheduler, AdaptiveScheduler): eps_scheduler.update_loss(loss.item() - regular_ce.item()) opt.step() meter.update('Loss', loss.item(), data.size(0)) if batch_method != 'natural': meter.update('Robust_CE', robust_ce.item(), data.size(0)) if not loss_fusion: # For an example, if lower bounds of margins is >0 for all classes, the output is verifiably correct. # If any margin is < 0 this example is counted as an error meter.update('Verified_Err', torch.sum((lb < 0).any(dim=1)).item() / data.size(0), data.size(0)) meter.update('Time', time.time() - start) if (i + 1) % 50 == 0 and train: logger.info('[{:2d}:{:4d}]: eps={:.12f} {}'.format(t, i + 1, eps, meter)) logger.info('[{:2d}:{:4d}]: eps={:.12f} {}'.format(t, i + 1, eps, meter)) return meter def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Load the model with BoundedParameter for weight perturbation. model_ori = models.Models['mlp_3layer_weight_perturb']() epoch = 0 ## Load a checkpoint, if requested. if args.load: checkpoint = torch.load(args.load) epoch, state_dict = checkpoint['epoch'], checkpoint['state_dict'] opt_state = None try: opt_state = checkpoint['optimizer'] except KeyError: print('no opt_state found') for k, v in state_dict.items(): assert torch.isnan(v).any().cpu().numpy() == 0 and torch.isinf(v).any().cpu().numpy() == 0 model_ori.load_state_dict(state_dict) logger.info('Checkpoint loaded: {}'.format(args.load)) ## Step 2: Prepare dataset as usual dummy_input = torch.randn(2, 1, 28, 28) train_data, test_data = mnist_loaders(datasets.MNIST, batch_size=args.batch_size, ratio=args.ratio) train_data.mean = test_data.mean = torch.tensor([0.0]) train_data.std = test_data.std = torch.tensor([1.0]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, device=args.device, bound_opts={ 'activation_bound_option':args.bound_opts, 'sparse_intermediate_bounds': False, 'sparse_conv_intermediate_bounds': False, 'sparse_intermediate_bounds_with_ibp': False}) final_name1 = model.final_name model_loss = BoundedModule(CrossEntropyWrapper(model_ori), (dummy_input, torch.zeros(1, dtype=torch.long)), device=args.device, bound_opts= {'activation_bound_option': args.bound_opts, 'loss_fusion': True, 'sparse_intermediate_bounds': False, 'sparse_conv_intermediate_bounds': False, 'sparse_intermediate_bounds_with_ibp': False}) # after CrossEntropyWrapper, the final name will change because of one more input node in CrossEntropyWrapper final_name2 = model_loss._modules[final_name1].output_name[0] assert type(model._modules[final_name1]) == type(model_loss._modules[final_name2]) if args.multigpu: model_loss = BoundDataParallel(model_loss) model_loss.ptb = model.ptb = model_ori.ptb # Perturbation on the parameters ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler if args.opt == 'ADAM': opt = optim.Adam(model_loss.parameters(), lr=args.lr, weight_decay=0.01) elif args.opt == 'SGD': opt = optim.SGD(model_loss.parameters(), lr=args.lr, weight_decay=0.01) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=args.lr_decay_milestones, gamma=0.1) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) logger.info(str(model_ori)) # Skip epochs if we continue training from a checkpoint. if epoch > 0: epoch_length = int((len(train_data.dataset) + train_data.batch_size - 1) / train_data.batch_size) eps_scheduler.set_epoch_length(epoch_length) eps_scheduler.train() for i in range(epoch): lr_scheduler.step() eps_scheduler.step_epoch(verbose=True) for j in range(epoch_length): eps_scheduler.step_batch() logger.info('resume from eps={:.12f}'.format(eps_scheduler.get_eps())) if args.load: if opt_state: opt.load_state_dict(opt_state) logger.info('resume opt_state') ## Step 5: start training. if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model_loss, 1, test_data, eps_scheduler, norm, False, None, args.bound_type, loss_fusion=False, final_node_name=final_name2) else: timer = 0.0 best_loss = 1e10 # Main training loop for t in range(epoch + 1, args.num_epochs+1): logger.info("Epoch {}, learning rate {}".format(t, lr_scheduler.get_last_lr())) start_time = time.time() # Training one epoch Train(model_loss, t, train_data, eps_scheduler, norm, True, opt, args.bound_type, loss_fusion=True) lr_scheduler.step() epoch_time = time.time() - start_time timer += epoch_time logger.info('Epoch time: {:.4f}, Total time: {:.4f}'.format(epoch_time, timer)) logger.info("Evaluating...") torch.cuda.empty_cache() state_dict = model_loss.state_dict() # Test one epoch. with torch.no_grad(): m = Train(model, t, test_data, eps_scheduler, norm, False, None, args.bound_type, loss_fusion=False, final_node_name=final_name1) # Save checkpoints. save_dict = {'state_dict': state_dict, 'epoch': t, 'optimizer': opt.state_dict()} if not os.path.exists('saved_models'): os.mkdir('saved_models') if t < int(eps_scheduler.params['start']): torch.save(save_dict, 'saved_models/natural_' + exp_name) elif t > int(eps_scheduler.params['start']) + int(eps_scheduler.params['length']): current_loss = m.avg('Loss') if current_loss < best_loss: best_loss = current_loss torch.save(save_dict, 'saved_models/' + exp_name + '_best_' + str(best_loss)[:6]) else: torch.save(save_dict, 'saved_models/' + exp_name) else: torch.save(save_dict, 'saved_models/' + exp_name) torch.cuda.empty_cache() if __name__ == "__main__": main(args) ================================================ FILE: setup.py ================================================ from setuptools import setup, find_packages from pathlib import Path # Check PyTorch version pytorch_version_l = '2.0.0' pytorch_version_u = '2.9.0' # excluded torchvision_version_l = '0.12.0' torchvision_version_u = '0.24.0' # excluded msg_install_pytorch = (f'It is recommended to manually install PyTorch ' f'(>={pytorch_version_l},<{pytorch_version_u}) suitable ' 'for your system ahead: https://pytorch.org/get-started.\n') try: import torch if torch.__version__ < pytorch_version_l: print(f'PyTorch version {torch.__version__} is too low. ' + msg_install_pytorch) if torch.__version__ >= pytorch_version_u: print(f'PyTorch version {torch.__version__} is too high. ' + msg_install_pytorch) except ModuleNotFoundError: print(f'PyTorch is not installed. {msg_install_pytorch}') with open('auto_LiRPA/__init__.py') as file: for line in file.readlines(): if '__version__' in line: version = eval(line.strip().split()[-1]) this_directory = Path(__file__).parent long_description = (this_directory / 'README.md').read_text() print(f'Installing auto_LiRPA {version}') setup( name='auto_LiRPA', version=version, description='A library for Automatic Linear Relaxation based Perturbation Analysis (LiRPA) on general computational graphs, with a focus on adversarial robustness verification and certification of deep neural networks.', long_description=long_description, long_description_content_type='text/markdown', url='https://github.com/Verified-Intelligence/auto_LiRPA', author='α,β-CROWN Team', author_email='huan@huan-zhang.com, xiangru4@illinois.edu', packages=find_packages(), install_requires=[ f'torch>={pytorch_version_l},<{pytorch_version_u}', f'torchvision>={torchvision_version_l},<{torchvision_version_u}', 'numpy>=1.20', 'packaging>=20.0', 'pytest==8.1.1', 'pylint>=2.15', 'pytest-order>=1.0.0', 'pytest-mock>=3.14', 'appdirs>=1.4', 'pyyaml>=5.0', 'ninja>=1.10', 'tqdm>=4.64', 'graphviz>=0.20.3' ], platforms=['any'], license='BSD', ) ================================================ FILE: tests/.gitignore ================================================ .cache ================================================ FILE: tests/data/.gitignore ================================================ cifar-10-python.tar.gz cifar-10-batches-py MNIST ================================================ FILE: tests/test_1d_activation.py ================================================ """Test one dimensional activation functions (e.g., ReLU, tanh, exp, sin, etc)""" import functools import pytest import torch import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from auto_LiRPA.utils import logger from auto_LiRPA.operators.s_shaped import TanhGradOp, SigmoidGradOp from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE # Wrap the computation with a nn.Module class test_model(nn.Module): def __init__(self, act_func): super().__init__() self.act_func = act_func def forward(self, x): return self.act_func(x) def pow_2(x): return torch.pow(x, 2) def pow_3(x): return torch.pow(x, 3) class GELUOp(torch.autograd.Function): @staticmethod def symbolic(g, x): return g.op('custom::Gelu', x) @staticmethod def forward(ctx, x): return torch.nn.functional.gelu(x) def GELU(x): return GELUOp.apply(x) def gen_hardtanh(min_val, max_val): return functools.partial(torch.nn.functional.hardtanh, min_val=min_val, max_val=max_val) # The original tanhgrad and sigmoidgrad also take in the gradient from the following layer # and multiply it. Here we only implement the part that computes the local gradient. def tanhgrad(x): return TanhGradOp.apply(x) def sigmoidgrad(x): return SigmoidGradOp.apply(x) class Test1DActivation(TestCase): def __init__(self, methodName='runTest', device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, device=device, dtype=dtype) def create_test(self, act_func, low, high, ntests=1000, nsamples=1000, method='IBP', activation_bound_option='adaptive', input_lb=None, input_ub=None): print(f'Testing activation {act_func} (method {method}, activation_bound_option {activation_bound_option})') model = test_model(act_func) image = torch.zeros(1, ntests) bounded_model = BoundedModule( model, image, bound_opts={ 'optimize_bound_args': {'iteration': 2}, 'activation_bound_option': activation_bound_option }, device=self.default_device) if input_lb is None or input_ub is None: # Generate randomly bounded inputs. p = torch.rand(1, ntests) * (high - low) + low q = torch.rand(1, ntests) * (high - low) + low input_lb = torch.min(p, q) input_ub = torch.max(p, q) else: low, high = torch.min(input_lb), torch.max(input_ub) input_center = (input_lb + input_ub) / 2.0 ptb = PerturbationLpNorm(norm=float("inf"), eps=None, x_L=input_lb, x_U=input_ub) ptb_data = BoundedTensor(input_center, ptb) # Generate reference results. table = act_func(torch.linspace(start=low, end=high, steps=nsamples+1)) def lookup(l, u): assert torch.all(u <= high) assert torch.all(l >= low) shape = l.size() l = l.squeeze() u = u.squeeze() # select all sample points between l and u. low_index = torch.ceil((l - low) / (high - low) * nsamples).int() # Make sure we do not have index 0. high_index = torch.floor((u - low) / (high - low) * nsamples).int() real_lb = torch.empty_like(l) real_ub = torch.empty_like(u) for i, (li, hi) in enumerate(zip(low_index, high_index)): if li == hi + 1: # Not enough precision. l and u are too close so we cannot tell. real_lb[i] = float("inf") real_ub[i] = float("-inf") else: selected = table[li : hi+1] real_lb[i] = torch.min(selected) real_ub[i] = torch.max(selected) real_lb = real_lb.view(*shape) real_ub = real_ub.view(*shape) return real_lb, real_ub # These are reference results. IBP results should be very close to these. # Linear bound results can be looser than these. ref_forward = model(input_center) ref_output_lb, ref_output_ub = lookup(input_lb, input_ub) # Get bounding results. forward = bounded_model(ptb_data) output_lb, output_ub = bounded_model.compute_bounds( x=(ptb_data,), method=method) bounded_model.set_bound_opts({ 'optimize_bound_args': {'iteration': 2, 'init_alpha': True}, }) # Compare. assert torch.allclose(forward, ref_forward) for i in range(ntests): show = False if output_ub[0,i] < ref_output_ub[0,i] - 1e-5: logger.warning(f'upper bound is wrong {ref_output_ub[0,i] - output_ub[0,i]}') show = True if output_lb[0,i] > ref_output_lb[0,i] + 1e-5: logger.warning(f'lower bound is wrong {output_lb[0,i] - ref_output_lb[0,i]}') show = True if show: logger.warning(f'input_lb={input_lb[0,i]:8.3f}, input_ub={input_ub[0,i]:8.3f}, lb={output_lb[0,i]:8.3f}, ref_lb={ref_output_lb[0,i]:8.3f}, ub={output_ub[0,i]:8.3f}, ref_ub={ref_output_ub[0,i]:8.3f}') assert torch.all(output_ub + 1e-5 >= ref_output_ub) assert torch.all(output_lb - 1e-5 <= ref_output_lb) @pytest.mark.skip(reason="Known issue: https://github.com/Verified-Intelligence/Verifier_Development/issues/164") def test_tan(self): # Test tan(x) in different periods. for i in range(-5, 5): self.create_test( act_func=torch.tan, low=-0.5*torch.pi + i*torch.pi + 1e-20, high=0.5*torch.pi + i*torch.pi - 1e-20, method='IBP') self.create_test( act_func=torch.tan, low=-0.5*torch.pi + i*torch.pi + 1e-20, high=0.5*torch.pi + i*torch.pi - 1e-20, method='CROWN') def test_acts(self): for act_func in [torch.nn.functional.relu, torch.sin, torch.cos, torch.tanh, torch.sigmoid, torch.arctan, torch.exp, pow_2, pow_3, torch.sign, GELU, gen_hardtanh(-1,1),gen_hardtanh(-0.25,0.25),gen_hardtanh(1,10),gen_hardtanh(-5,2), tanhgrad, sigmoidgrad]: low, high = -10, 10 if act_func == torch.reciprocal: # So far only positive values are supported. low = 0.01 self.create_test(act_func=act_func, low=low, high=high, method='IBP') self.create_test(act_func=act_func, low=low, high=high, method='CROWN') if act_func not in [torch.exp, torch.sign, torch.sin, torch.cos, tanhgrad, sigmoidgrad]: # Use optimized bounds self.create_test(act_func=act_func, low=low, high=high, method='CROWN-Optimized') if act_func in [torch.sin, torch.cos]: test_samples = 10 for _ in range(test_samples): self.create_test(act_func=act_func, low=low, high=high, method='CROWN-Optimized') if act_func in [torch.nn.functional.relu]: self.create_test(act_func=act_func, low=low, high=high, method='Dynamic-Forward') if act_func in [torch.nn.functional.relu, torch.tanh]: self.create_test(act_func=act_func, low=low, high=high, method='CROWN', activation_bound_option='same-slope') print('Testing activations with large input range') for act_func in [torch.sin, torch.tanh, pow_3, GELU]: low, high = -600, 600 self.create_test(act_func=act_func, low=low, high=high, method='CROWN') if __name__ == '__main__': testcase = Test1DActivation() testcase.test_acts() ================================================ FILE: tests/test_2d_activation.py ================================================ """Test two dimensional activation functions (e.g., min, max, etc)""" import tqdm import torch import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from auto_LiRPA.utils import logger from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE # Wrap the computation with a nn.Module class test_model(nn.Module): def __init__(self, act_func): super().__init__() self.act_func = act_func def forward(self, x, y): return self.act_func(x, y) def mul(x, y): return x * y class Test2DActivation(TestCase): def __init__(self, methodName='runTest', device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, device=device, dtype=dtype) def create_test(self, act_func, low_x, high_x, low_y, high_y, ntests=10000, nsamples=1000, method='IBP'): print(f'Testing activation {act_func}') model = test_model(act_func) image = torch.zeros(2, ntests) bounded_model = BoundedModule(model, (image[0], image[1]), device=self.default_device) # Generate randomly bounded inputs. p_x = torch.rand(1, ntests) * (high_x - low_x) + low_x q_x = torch.rand(1, ntests) * (high_x - low_x) + low_x input_lb_x = torch.min(p_x, q_x) input_ub_x = torch.max(p_x, q_x) input_center_x = (input_lb_x + input_ub_x) / 2.0 ptb_x = PerturbationLpNorm(x_L=input_lb_x, x_U=input_ub_x) ptb_data_x = BoundedTensor(input_center_x, ptb_x) p_y = torch.rand(1, ntests) * (high_y - low_y) + low_y q_y = torch.rand(1, ntests) * (high_y - low_y) + low_y input_lb_y = torch.min(p_y, q_y) input_ub_y = torch.max(p_y, q_y) input_center_y = (input_lb_y + input_ub_y) / 2.0 ptb_y = PerturbationLpNorm(x_L=input_lb_y, x_U=input_ub_y) ptb_data_y = BoundedTensor(input_center_y, ptb_y) # Generate reference results. range_xy = torch.linspace(start=low_x, end=high_x, steps=nsamples+1) table = torch.empty([range_xy.shape[0], range_xy.shape[0]]) for i in range(range_xy.shape[0]): x = range_xy[i] table_y = act_func(x, torch.linspace(start=low_y, end=high_y, steps=nsamples+1)) table[i] = table_y def lookup(l_x, u_x, l_y, u_y): assert torch.all(u_x <= high_x) assert torch.all(l_x >= low_x) assert torch.all(u_y <= high_y) assert torch.all(l_y >= low_y) shape = l_x.size() l_x = l_x.squeeze() u_x = u_x.squeeze() l_y = l_y.squeeze() u_y = u_y.squeeze() # select all sample points between l and u. low_index_x = torch.ceil((l_x - low_x) / (high_x - low_x) * nsamples).int() # Make sure we do not have index 0. high_index_x = torch.floor((u_x - low_x) / (high_x - low_x) * nsamples).int() low_index_y = torch.ceil((l_y - low_y) / (high_y - low_y) * nsamples).int() # Make sure we do not have index 0. high_index_y = torch.floor((u_y - low_y) / (high_y - low_y) * nsamples).int() real_lb = torch.empty_like(l_x) real_ub = torch.empty_like(u_x) for i, (li_x, hi_x) in enumerate(zip(low_index_x, high_index_x)): li_y = low_index_y[i] hi_y = high_index_y[i] if li_x == hi_x + 1 or li_y == hi_y + 1: # Not enough precision. l and u are too close so we cannot tell. real_lb[i] = float("inf") real_ub[i] = float("-inf") else: selected = table[li_x : hi_x+1, li_y : hi_y+1].reshape(-1) real_lb[i] = torch.min(selected) real_ub[i] = torch.max(selected) real_lb = real_lb.view(*shape) real_ub = real_ub.view(*shape) return real_lb, real_ub # These are reference results. IBP results should be very close to these. Linear bound results can be looser than these. ref_forward = model(input_center_x, input_center_y) ref_output_lb, ref_output_ub = lookup(input_lb_x, input_ub_x, input_lb_y, input_ub_y) # Get bounding results. forward = bounded_model(ptb_data_x, ptb_data_y) output_lb, output_ub = bounded_model.compute_bounds(x=(ptb_data_x, ptb_data_y), method = method) # Compare. assert torch.allclose(forward, ref_forward) for i in tqdm.tqdm(range(ntests)): show = False if output_ub[0,i] < ref_output_ub[0,i] - 1e-5: logger.warning(f'upper bound is wrong {ref_output_ub[0,i] - output_ub[0,i]}') show = True if output_lb[0,i] > ref_output_lb[0,i] + 1e-5: logger.warning(f'lower bound is wrong {output_lb[0,i] - ref_output_lb[0,i]}') show = True if show: logger.warning(f'input_lb_x={input_lb_x[0,i]:8.3f}, input_ub_x={input_ub_x[0,i]:8.3f},input_lb_y={input_lb_y[0,i]:8.3f}, input_ub_y={input_ub_y[0,i]:8.3f}, lb={output_lb[0,i]:8.3f}, ref_lb={ref_output_lb[0,i]:8.3f}, ub={output_ub[0,i]:8.3f}, ref_ub={ref_output_ub[0,i]:8.3f}') assert torch.all(output_ub + 1e-5 >= ref_output_ub) assert torch.all(output_lb - 1e-5 <= ref_output_lb) def test_max(self): self.create_test(act_func=torch.max, low_x=-10, high_x=5, low_y=-1, high_y=10, method='IBP') self.create_test(act_func=torch.max, low_x=-10, high_x=5, low_y=-1, high_y=10, method='CROWN') def test_min(self): self.create_test(act_func=torch.min, low_x=-10, high_x=5, low_y=-1, high_y=10, method='IBP') self.create_test(act_func=torch.min, low_x=-10, high_x=5, low_y=-1, high_y=10, method='CROWN') def test_mul(self): self.create_test(act_func=mul, low_x=-10, high_x=5, low_y=-1, high_y=10, method='IBP') self.create_test(act_func=mul, low_x=-10, high_x=5, low_y=-1, high_y=10, method='CROWN') if __name__ == '__main__': testcase = Test2DActivation() testcase.test_max() testcase.test_min() testcase.test_mul() ================================================ FILE: tests/test_avgpool.py ================================================ import torch import torch.nn as nn import numpy as np from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE def ff(num_conv=2, num_mlp_only=None, pooling=False, activation="ReLU", hidden_size=256, input_ch=1, input_dim=28, num_classes=10, pool_kernel=3, pool_stride=1, pool_padding=1): activation = eval(f"nn.{activation}()") layers = [] if num_conv: layers.append(nn.Conv2d(input_ch, 4, 3, stride=1, padding=1)) layers.append(activation) num_channels = 4 if pooling: layers.append(nn.AvgPool2d(kernel_size=pool_kernel, stride=pool_stride, padding=pool_padding)) if num_conv >= 2: layers.append(nn.Conv2d(4, 8, 3, stride=1, padding=1)) layers.append(nn.ReLU()) if pooling: layers.append(nn.AvgPool2d(kernel_size=pool_kernel, stride=pool_stride, padding=pool_padding)) num_channels = 8 for _ in range(num_conv - 2): layers.append(nn.Conv2d(8, 8, 3, stride=1, padding=1)) layers.append(nn.ReLU()) if pooling: layers.append(nn.AvgPool2d(kernel_size=pool_kernel, stride=pool_stride, padding=pool_padding)) layers.append(nn.Flatten(1)) # Calculate output size after pooling operations if pooling and num_conv > 0: pooled_dim = input_dim for _ in range(num_conv): pooled_dim = (pooled_dim + 2 * pool_padding - pool_kernel) // pool_stride + 1 linear_input_size = num_channels * (pooled_dim ** 2) else: linear_input_size = num_channels * (input_dim ** 2) layers.append(nn.Linear(linear_input_size, hidden_size)) layers.append(nn.ReLU()) layers.append(nn.Linear(hidden_size, num_classes)) else: layers.append(nn.Flatten(1)) cur = input_ch * (input_dim ** 2) for _ in range(num_mlp_only - 1): layers.append(nn.Linear(cur, hidden_size)) layers.append(activation) cur = hidden_size layers.append(nn.Linear(hidden_size, num_classes)) return nn.Sequential(*layers) def synthetic_net(input_ch, input_dim, **kwargs): return ff(input_ch=input_ch, input_dim=input_dim, num_classes=2, **kwargs) def synthetic_4c2f_pool(input_ch, input_dim, **kwargs): return synthetic_net(input_ch, input_dim, num_conv=4, pooling=True, **kwargs) class TestAvgPool(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1234, ref_name='avgpool_test_data', generate=generate, device=device, dtype=dtype) def test(self): test_configs = [ {'input_ch': 1, 'input_dim': 5, 'hidden_size': 8, 'pool_kernel': 3, 'pool_stride': 1, 'pool_padding': 1}, {'input_ch': 1, 'input_dim': 32, 'hidden_size': 16, 'pool_kernel': 2, 'pool_stride': 2, 'pool_padding': 0} ] self.result = [] for config in test_configs: print(f"Testing config: {config}") model_ori = synthetic_4c2f_pool(**config) model_ori = model_ori.eval().to(self.default_device).to(self.default_dtype) x = torch.randn(8, config['input_ch'], config['input_dim'], config['input_dim']) ptb = PerturbationLpNorm(norm=np.inf, eps=100) x_bounded = BoundedTensor(x, ptb) print(f" Testing with default conv_mode (patches)") model = BoundedModule(model_ori, x, device=self.default_device) lb_patches, ub_patches = model.compute_bounds(x=(x_bounded,), method='backward') print(f" Patches mode - LB: {lb_patches}") print(f" Patches mode - UB: {ub_patches}") self.result += [lb_patches, ub_patches] print(f" Testing with conv_mode='matrix'") model_matrix = BoundedModule(model_ori, x, bound_opts={'conv_mode': 'matrix'}) lb_matrix, ub_matrix = model_matrix.compute_bounds(x=(x_bounded,), method='backward') print(f" Matrix mode - LB: {lb_matrix}") print(f" Matrix mode - UB: {ub_matrix}") self.result += [lb_matrix, ub_matrix] lb_diff = torch.abs(lb_patches - lb_matrix).max().item() ub_diff = torch.abs(ub_patches - ub_matrix).max().item() print(f" Max difference in LB between patches and matrix: {lb_diff}") print(f" Max difference in UB between patches and matrix: {ub_diff}") assert torch.allclose(lb_patches, lb_matrix, atol=1e-6), f"Lower bounds not equivalent between patches and matrix modes" assert torch.allclose(ub_patches, ub_matrix, atol=1e-6), f"Upper bounds not equivalent between patches and matrix modes" print(f" Matrix and patches modes produce equivalent results") print() self.check() if __name__ == '__main__': testcase = TestAvgPool(generate=False) testcase.test() ================================================ FILE: tests/test_bound_ops.py ================================================ """Test classes for bound operators""" import torch from auto_LiRPA.bound_ops import * from auto_LiRPA.linear_bound import LinearBound from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class Dummy: """Dummy node for testing""" def __init__(self, lower, upper=None, perturbed=False): self.lower = lower self.upper = upper if upper is not None else lower self.perturbed = perturbed self.output_shape = lower.shape class TestBoundOp(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name='bound_ops_data', generate=generate, device=device, dtype=dtype) def test(self): device = self.default_device dtype = self.default_dtype batch_size = 5 dim_final = 7 dim_output = 9 dim_input = 11 # multiplication of [batch_size, dim_input] and [dim_output, dim_input]^T weight = torch.randn(dim_output, dim_input, device=device) bias = torch.randn(dim_output, device=device) data_in = torch.randn(batch_size, dim_input, device=device) data_in_delta = torch.randn(batch_size, dim_input, device=device) dummy_in = Dummy( data_in - torch.abs(data_in_delta), data_in + torch.abs(data_in_delta), True) dummy_weight = Dummy(weight) dummy_bias = Dummy(bias) op = BoundLinear( attr={'transB': 1}, inputs=[dummy_in, dummy_weight, dummy_bias], output_index=0, options={}) op.batch_dim = 0 # test `forward` data_out = op(data_in, weight, bias) self.assertEqual(data_out, data_in.matmul(weight.t()) + bias) # test `bound_backward` # The `transpose` here to make the randomization consistent with the previous reference. # It can be removed once a new reference is generated. last_lA = torch.randn(batch_size, dim_final, dim_output, device=device).transpose(0, 1) last_uA = torch.randn(batch_size, dim_final, dim_output, device=device).transpose(0, 1) A, lbias, ubias = op.bound_backward(last_lA, last_uA, *op.inputs) self.assertEqual(A[0][0], last_lA.matmul(weight)) self.assertEqual(A[0][1], last_uA.matmul(weight)) self.assertEqual(lbias, last_lA.matmul(bias)) self.assertEqual(ubias, last_uA.matmul(bias)) # test `bound_forward` # note that the upper bound may be actually smaller than the lower bound # in these dummy linear bounds bound_in = LinearBound( lw=torch.randn(batch_size, dim_final, dim_input, device=device), lb=torch.randn(batch_size, dim_input, device=device), uw=torch.randn(batch_size, dim_final, dim_input, device=device), ub=torch.randn(batch_size, dim_input, device=device), lower=None, upper=None) bound_weight = LinearBound(None, None, None, None, dummy_weight.lower, dummy_weight.upper) bound_bias = LinearBound(None, None, None, None, dummy_bias.lower, dummy_bias.upper) bound_out = op.bound_forward(dim_final, bound_in, bound_weight, bound_bias) self.assertEqual( bound_out.lw, bound_in.lw.matmul(weight.t().clamp(min=0)) + bound_in.uw.matmul(weight.t().clamp(max=0))) self.assertEqual( bound_out.uw, bound_in.uw.matmul(weight.t().clamp(min=0)) + bound_in.lw.matmul(weight.t().clamp(max=0))) self.assertEqual( bound_out.lb, bound_in.lb.matmul(weight.t().clamp(min=0)) + bound_in.ub.matmul(weight.t().clamp(max=0)) + bias) self.assertEqual( bound_out.ub, bound_in.ub.matmul(weight.t().clamp(min=0)) + bound_in.lb.matmul(weight.t().clamp(max=0)) + bias) # test `interval_propagate` bound_in = ( torch.randn(*data_in.shape, device=device), torch.randn(*data_in.shape, device=device)) bound_weight = (bound_weight.lower, bound_weight.upper) bound_bias = (bound_bias.lower, bound_bias.upper) bound_out = op.interval_propagate(bound_in, bound_weight, bound_bias) self.assertEqual(bound_out[0], bound_in[0].matmul(weight.t().clamp(min=0)) + bound_in[1].matmul(weight.t().clamp(max=0)) + bias) self.assertEqual(bound_out[1], bound_in[1].matmul(weight.t().clamp(min=0)) + bound_in[0].matmul(weight.t().clamp(max=0)) + bias) # test weight perturbation # `bound_backward` ptb_weight = torch.randn(weight.shape) op.inputs[1].upper += ptb_weight op.inputs[1].perturbed = True op.inputs[2].perturbation = None # no perturbation on bias A, lbias, ubias = op.bound_backward(last_lA, last_uA, *op.inputs) # `interval_propagate` bound_weight = (op.inputs[1].lower, op.inputs[1].upper) bound_out = op.interval_propagate(bound_in, bound_weight, bound_bias) self.result = (A, lbias, ubias, bound_out) if self.generate: self.save() self.reference = self.result A_ref, lbias_ref, ubias_ref, bound_out_ref = self.reference for i in range(3): for j in range(2): if A_ref[i][j] is not None: ref = A_ref[i][j].to(device=device, dtype=dtype) self.assertEqual(A[i][j], ref) lbias_ref = lbias_ref.to(device=device, dtype=dtype) ubias_ref = ubias_ref.to(device=device, dtype=dtype) bound_out_ref = ( bound_out_ref[0].to(device=device, dtype=dtype), bound_out_ref[1].to(device=device, dtype=dtype) ) self.assertEqual(lbias, lbias_ref) self.assertEqual(ubias, ubias_ref) self.assertEqual(bound_out[0], bound_out_ref[0]) self.assertEqual(bound_out[1], bound_out_ref[1]) if __name__ == '__main__': # Change to generate=True when genearting reference results testcase = TestBoundOp(generate=False) testcase.setUp() testcase.test() ================================================ FILE: tests/test_branching_heuristics.py ================================================ import sys import torch from types import SimpleNamespace sys.path.insert(0, '../complete_verifier') from heuristics.base import RandomNeuronBranching from testcase import DEFAULT_DEVICE, DEFAULT_DTYPE, set_default_dtype_device def test_branching_heuristics(): device = DEFAULT_DEVICE dtype = DEFAULT_DTYPE set_default_dtype_device(dtype, device) import random import numpy as np seed = 123 torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) net = SimpleNamespace() branching_heuristic = RandomNeuronBranching(net) for _ in range(10000): batch_size = random.randint(1, 5) # Number of layers, and we will split the total_layers into this # many of layers. n_layers = random.randint(1, 5) total_len = random.randint(n_layers, 100) net.split_nodes = [] net.split_activations = {} for i in range(n_layers): layer = SimpleNamespace() layer.name = i activation = SimpleNamespace() activation.name = f'{i}_activation' net.split_nodes.append(layer) net.split_activations[layer.name] = [(activation, 0)] # Total number of neurons in all layers. topk = random.randint(1, total_len) # Generate random and unique scores. # scores = torch.argsort(torch.rand(batch_size, total_len)) + 1 scores = torch.rand(batch_size, total_len) + 1e-8 # Generate random mask. Mask = 1 means this neuron can be split. masks = (torch.rand(batch_size, total_len) > 0.75).float() # Generate random split locations. split_position = torch.randint( low=0, high=total_len, size=(n_layers - 1,)).sort().values print(f'testing batch={batch_size}, n_layers={n_layers}, ' f'total_len={total_len}, topk={topk}, split={split_position}') segment_lengths = (torch.cat( [split_position, torch.full(size=(1,), fill_value=total_len, device=split_position.device)]) - torch.cat([torch.zeros((1,), device=split_position.device), split_position])) segment_lengths = segment_lengths.int().tolist() # Cap to the minimum number of valid neurons in each batch. min_k = int(masks.sum(dim=1).min().item()) # Find the topk scores and indices across all layers. topk_scores, topk_indices = (scores * masks).topk(k=min(min_k, topk)) # Map the indices to groundtruth layer number. topk_layers = torch.searchsorted( split_position, topk_indices, right=True) # Map the indices to groundtruth neuron number. topk_neurons = topk_indices - torch.cat( [torch.zeros(1, device=split_position.device, dtype=torch.int64), split_position] ).view(1, -1).repeat(batch_size, 1).gather( dim=1, index=topk_layers) # Split into a list of scores for testing. all_layer_scores = scores.split(segment_lengths, dim=1) all_layer_masks = masks.split(segment_lengths, dim=1) all_layer_scores = {i: item for i, item in enumerate(all_layer_scores)} all_layer_masks = {i: item for i, item in enumerate(all_layer_masks)} branching_heuristic.update_batch_size_and_device(all_layer_scores) (calculated_layers, calculated_neurons, calculated_scores) = branching_heuristic.find_topk_scores( all_layer_scores, all_layer_masks, k=topk, return_scores=True) torch.testing.assert_close(calculated_layers, topk_layers) torch.testing.assert_close(calculated_neurons, topk_neurons) torch.testing.assert_close(calculated_scores, topk_scores) if __name__ == "__main__": test_branching_heuristics() ================================================ FILE: tests/test_clip_domains.py ================================================ """ Tests clip_domains To run tests: py.test test_clip_domains.py or: python -m pytest test_clip_domains.py Verbose (-v): py.test -v test_clip_domains.py or: python -m pytest -v test_clip_domains.py """ import torch from torch import Tensor from random import randint from typing import Union, Tuple import sys sys.path.append('../complete_verifier') # importing clip_domains from CROWN from input_split.clip import clip_domains from testcase import DEFAULT_DEVICE, DEFAULT_DTYPE, set_default_dtype_device batches = 2 # Do not use large batch sizes when running on CI device = DEFAULT_DEVICE # CI is not equipped with CUDA dtype = DEFAULT_DTYPE set_default_dtype_device(dtype, device) atol = 1e-4 # my references are defined at this level of tolerance def setup_module(module): """ Displays global information about the test run @param module: @return: """ print() print("setup_module module:%s" % module.__name__) print(f"Using device: {device}") print(f"Using dtype: {dtype}") print(f"Using atol: {atol}") print(f"Using number of batches (batch copies): {batches}") print() def setup_function(function): """ Adds spacing between tests @param function: @return: """ print(f"\nRunning test case: {function.__name__}") def _tensor(x): return torch.tensor(x, device=device, dtype=dtype) def test_case_one_one(): print() # Define the base 2D tensors A_bar_base = _tensor([[4 / 5, -7 / 20], [3 / 10, -3 / 7]]) x_L_base = _tensor([-3, -2]) x_U_base = _tensor([3, 2]) c_bar_base = _tensor([[1 / 10], [3 / 10]]) target_base = _tensor([[0], [0]]) # Expand the base tensors along the batch dimension lA, x_L, x_U, c_bar, thresholds, dm_lb = setup_test_matrices(A_bar_base, x_L_base, x_U_base, c_bar_base, target_base, batches) # In this suite, we have a reference for x_L/U ref_x_L = _tensor([-3., -1.4]).unsqueeze(0).expand(batches, -1) ref_x_U = _tensor([0.75, 2.0000]).unsqueeze(0).expand(batches, -1) old_x_L = x_L.clone() old_x_U = x_U.clone() ret = clip_domains(x_L, x_U, thresholds, lA, None, dm_lb) new_x_L, new_x_U = ret assert (new_x_L.shape == old_x_L.shape) and (new_x_U.shape == old_x_U.shape), "x_L(U) should have the same shape as before" # check the returned x_L/U matches the expected x_L/U values x_L_eq = torch.allclose(new_x_L, ref_x_L, atol=atol) x_U_eq = torch.allclose(new_x_U, ref_x_U, atol=atol) assert x_L_eq, "x_L is not correct" assert x_U_eq, "x_U is not correct" def test_case_one_two(): print() # Define the base 2D tensors A_bar_base = _tensor([[3 / 10, -3 / 7]]) x_L_base = _tensor([-3, -2]) x_U_base = _tensor([3, 2]) c_bar_base = _tensor([[3 / 10]]) target_base = _tensor([[0]]) # Expand the base tensors along the batch dimension lA, x_L, x_U, c_bar, thresholds, dm_lb = setup_test_matrices(A_bar_base, x_L_base, x_U_base, c_bar_base, target_base, batches) # In this suite, we have a reference for x_L/U ref_x_L = _tensor([-3., -1.4]).unsqueeze(0).expand(batches, -1) ref_x_U = _tensor([1.8571, 2.0000]).unsqueeze(0).expand(batches, -1) old_x_L = x_L.clone() old_x_U = x_U.clone() ret = clip_domains(x_L, x_U, thresholds, lA, None, dm_lb) new_x_L, new_x_U = ret assert (new_x_L.shape == old_x_L.shape) and (new_x_U.shape == old_x_U.shape), "x_L(U) should have the same shape as before" # check the returned x_L/U matches the expected x_L/U values x_L_eq = torch.allclose(new_x_L, ref_x_L, atol=atol) x_U_eq = torch.allclose(new_x_U, ref_x_U, atol=atol) assert x_L_eq, "x_L is not correct" assert x_U_eq, "x_U is not correct" def test_case_one_three(): print() # Define the base 2D tensors A_bar_base = _tensor([[3 / 10, -3 / 7], [3 / 10, -3 / 7]]) x_L_base = _tensor([-3, -2]) x_U_base = _tensor([3, 2]) c_bar_base = _tensor([[3 / 10], [3 / 10]]) target_base = _tensor([[0], [0]]) # Expand the base tensors along the batch dimension lA, x_L, x_U, c_bar, thresholds, dm_lb = setup_test_matrices(A_bar_base, x_L_base, x_U_base, c_bar_base, target_base, batches) # In this suite, we have a reference for x_L/U ref_x_L = _tensor([-3., -1.4]).unsqueeze(0).expand(batches, -1) ref_x_U = _tensor([1.8571, 2.0000]).unsqueeze(0).expand(batches, -1) old_x_L = x_L.clone() old_x_U = x_U.clone() ret = clip_domains(x_L, x_U, thresholds, lA, None, dm_lb) new_x_L, new_x_U = ret assert (new_x_L.shape == old_x_L.shape) and (new_x_U.shape == old_x_U.shape), "x_L(U) should have the same shape as before" # check the returned x_L/U matches the expected x_L/U values x_L_eq = torch.allclose(new_x_L, ref_x_L, atol=atol) x_U_eq = torch.allclose(new_x_U, ref_x_U, atol=atol) assert x_L_eq, "x_L is not correct" assert x_U_eq, "x_U is not correct" def test_case_one_four(): print() # Define the base 2D tensors A_bar_base = _tensor([[4 / 5, -7 / 20, 0.1], [3 / 10, -3 / 7, 0.1]]) x_L_base = _tensor([-3, -2, -1]) x_U_base = _tensor([3, 2, 1]) c_bar_base = _tensor([[1 / 10], [3 / 10]]) target_base = _tensor([[0], [0]]) # Expand the base tensors along the batch dimension lA, x_L, x_U, c_bar, thresholds, dm_lb = setup_test_matrices(A_bar_base, x_L_base, x_U_base, c_bar_base, target_base, batches) old_x_L = x_L.clone() old_x_U = x_U.clone() ret = clip_domains(x_L, x_U, thresholds, lA, None, dm_lb) new_x_L, new_x_U = ret assert (new_x_L.shape == old_x_L.shape) and (new_x_U.shape == old_x_U.shape), "x_L(U) should have the same shape as before" def test_case_two_one(): """ Visualize this test case at https://www.desmos.com/3d/fz6e11ovm3 @return: """ print() # Define the base 2D tensors A_bar_base = _tensor([[5/5, 1/5], [2/5, 1/5], [10/35, 1/5]]) x_L_base = _tensor([0, 0]) x_U_base = _tensor([1, 1]) c_bar_base = _tensor([[-1/5], [-1/5], [-1/5]]) target_base = _tensor([[0], [0], [0]]) # Expand the base tensors along the batch dimension lA, x_L, x_U, c_bar, thresholds, dm_lb = setup_test_matrices(A_bar_base, x_L_base, x_U_base, c_bar_base, target_base, batches) # In this suite, we have a reference for x_L/U ref_x_L = _tensor([0., 0.]).unsqueeze(0).expand(batches, -1) ref_x_U = _tensor([0.2000, 1.0000]).unsqueeze(0).expand(batches, -1) old_x_L = x_L.clone() old_x_U = x_U.clone() ret = clip_domains(x_L, x_U, thresholds, lA, None, dm_lb) new_x_L, new_x_U = ret assert (new_x_L.shape == old_x_L.shape) and (new_x_U.shape == old_x_U.shape), "x_L(U) should have the same shape as before" # check the returned x_L/U matches the expected x_L/U values x_L_eq = torch.allclose(new_x_L, ref_x_L, atol=atol) x_U_eq = torch.allclose(new_x_U, ref_x_U, atol=atol) assert x_L_eq, "x_L is not correct" assert x_U_eq, "x_U is not correct" def test_case_two_two(): """ Visualize this test case at https://www.desmos.com/3d/ruty3i54wu @return: """ print() # Define the base 2D tensors A_bar_base = -1. * _tensor([[5 / 5, 1 / 5], [2 / 5, 1 / 5], [10 / 35, 1 / 5]]) x_L_base = _tensor([0, 0]) x_U_base = _tensor([1, 1]) c_bar_base = -1. * _tensor([[-1 / 5], [-1 / 5], [-1 / 5]]) target_base = _tensor([[0], [0], [0]]) # Expand the base tensors along the batch dimension lA, x_L, x_U, c_bar, thresholds, dm_lb = setup_test_matrices(A_bar_base, x_L_base, x_U_base, c_bar_base, target_base, batches) # In this suite, we have a reference for x_L/U ref_x_L = x_L.clone() ref_x_U = x_U.clone() old_x_L = x_L.clone() old_x_U = x_U.clone() ret = clip_domains(x_L, x_U, thresholds, lA, None, dm_lb) new_x_L, new_x_U = ret assert (new_x_L.shape == old_x_L.shape) and (new_x_U.shape == old_x_U.shape), "x_L(U) should have the same shape as before" # check the returned x_L/U matches the expected x_L/U values x_L_eq = torch.allclose(new_x_L, ref_x_L, atol=atol) x_U_eq = torch.allclose(new_x_U, ref_x_U, atol=atol) assert x_L_eq, "x_L is not correct" assert x_U_eq, "x_U is not correct" def test_case_two_three(): """ Visualize this test case at https://www.desmos.com/3d/vogsjthmav @return: """ print() # Define the base 2D tensors A_bar_base = _tensor([[-5 / 5, -1 / 5], [2 / 5, 1 / 5], [10 / 35, 1 / 5]]) x_L_base = _tensor([0, 0]) x_U_base = _tensor([1, 1]) c_bar_base = _tensor([[1 / 5], [-1 / 5], [-1 / 5]]) target_base = _tensor([[0], [0], [0]]) # Expand the base tensors along the batch dimension lA, x_L, x_U, c_bar, thresholds, dm_lb = setup_test_matrices(A_bar_base, x_L_base, x_U_base, c_bar_base, target_base, batches) # In this suite, we have a reference for x_L/U ref_x_L = x_L.clone() ref_x_U = torch.zeros_like(x_U) ref_x_U[:] = _tensor([0.5, 1.0]) old_x_L = x_L.clone() old_x_U = x_U.clone() ret = clip_domains(x_L, x_U, thresholds, lA, None, dm_lb) new_x_L, new_x_U = ret assert (new_x_L.shape == old_x_L.shape) and (new_x_U.shape == old_x_U.shape), "x_L(U) should have the same shape as before" # check the returned x_L/U matches the expected x_L/U values x_L_eq = torch.allclose(new_x_L, ref_x_L, atol=atol) x_U_eq = torch.allclose(new_x_U, ref_x_U, atol=atol) assert x_L_eq, "x_L is not correct" assert x_U_eq, "x_U is not correct" # Rest of file are helper functions def concretize_bounds( x_hat: torch.Tensor, x_eps: torch.Tensor, lA: torch.Tensor, lbias: Union[torch.Tensor, int], C: Union[torch.Tensor, None] = None, lower: bool = True): """ Takes batches and concretizes them @param x_hat: shape (batch, input_dim) The origin position of the input domain @param x_eps: shape (batch, input_dim) The epsilon disturbance from the origin of the input domain @param lA: shape (batch, spec_dim/lA rows, input_dim) The lA matrix calculated by CROWN; When C is None, we refer to the second dimension as spec_dim. When C is given, this is denoted as lA rows @param lbias: shape (batch, spec_dim) The bias vector calculated by CROWN @param lower: Whether the lower or upper bound should be concretized @param C: shape (batch, spec_dim, lA rows) When not None, is transposed and distributed to lA and lbias to produce the specification of interest @return: The lower/upper bound of the batches """ lA = lA.view(lA.shape[0], lA.shape[1], -1) batches, spec_dim, input_dim = lA.shape if isinstance(lbias, int): lbias = _tensor([lbias]).expand(batches, spec_dim) lbias = lbias.unsqueeze(-1) # change lbiases to be column vectors if C is not None: # Let C act like the new last linear layer of the network and distribute it to lA and lbias # Update shapes C = C.reshape(batches, spec_dim, -1) C = C.transpose(1, 2) lA = C.bmm(lA) lbias = C.bmm(lbias) batches, spec_dim, input_dim = lA.shape # lA shape: (batch, spec_dim, # inputs) # dom_lb shape: (batch, spec_dim) # thresholds shape: (batch, spec_dim) # lbias shape: (batch, spec_dim, 1) sign = -1 if lower else 1 x_hat = x_hat.unsqueeze(-1) x_eps = x_eps.unsqueeze(-1) ret = lA.bmm(x_hat) + sign * lA.abs().bmm(x_eps) + lbias return ret.squeeze(2) def setup_test_matrices( A_bar_base: Tensor, x_L_base: Tensor, x_U_base: Tensor, l_bias_base: Tensor, target_base: Tensor, batches: int ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: """ Creates batch copies of base Tensors and formats them in the same format that they would be in CROWN. @param A_bar_base: shape (spec_dim, input_dim) The lA matrix of the instance @param x_L_base: shape (input_dim,) The lower bound on the input domain @param x_U_base: shape (input_dim,) The upper bound on the input domain @param l_bias_base: shape (spec_dim,) The bias vector of the instance @param target_base: shape (spec_dim,) The threshold/specification to verify @param batches: The number of batch copies to produce of the instance @return: Returns same instance in batch form """ # create the copies lA, x_L, x_U, c_bar, thresholds = create_batch_copies(A_bar_base, x_L_base, x_U_base, l_bias_base, target_base, batches) # This is how x_L, x_U, lbias will be received in CROWN # x_L/U shape: (batch, # inputs) # lA shape: (batch, spec_dim, # inputs) # dom_lb shape: (batch, spec_dim) # thresholds shape: (batch, spec_dim) x_L = x_L.flatten(1) x_U = x_U.flatten(1) c_bar = c_bar.squeeze(-1) thresholds = thresholds.squeeze(-1) # get the global lb x_hat = (x_U + x_L) / 2 x_eps = (x_U - x_L) / 2 dm_lb = concretize_bounds(x_hat, x_eps, lA, c_bar) return lA, x_L, x_U, c_bar, thresholds, dm_lb def create_batch_copies( A_bar_base: Tensor, x_L_base: Tensor, x_U_base: Tensor, l_bias_base: Tensor, target_base: Tensor, batches: int ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: """ Takes a problem not in batch form and turns them into batches. If batches = 1, we only solve the initial problem in batch form, and if batches > 1, we are solving the same problem but in multiple batches. @param A_bar_base: @param x_L_base: @param x_U_base: @param l_bias_base: @param target_base: @param batches: @return: """ A_bar = A_bar_base.unsqueeze(0).repeat(batches, 1, 1) x_L = x_L_base.unsqueeze(0).repeat(batches, 1) x_U = x_U_base.unsqueeze(0).repeat(batches, 1) l_bias = l_bias_base.unsqueeze(0).repeat(batches, 1, 1) target = target_base.unsqueeze(0).repeat(batches, 1, 1) return A_bar, x_L, x_U, l_bias, target def random_setup_generator( randint_range=(1, 10), ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: """ Creates random problem set-ups to test out if our new heuristic is compatible with various dimensions @param randint_range: A range where batches, spec_dim, and input_dim will exist in @return: """ batches, spec_dim, input_dim = randint(*randint_range), randint(*randint_range), randint(*randint_range) lA = torch.rand((batches, spec_dim, input_dim)) lbias = torch.rand((batches, spec_dim, 1)) thresholds = torch.rand((batches, spec_dim, 1)) parameters = { "batches": batches, "spec_dim": spec_dim, "input_dim": input_dim } return lA, lbias, thresholds, parameters ================================================ FILE: tests/test_constant.py ================================================ """Test BoundConstant""" import torch import os import torch.nn as nn import torch.nn.functional as F import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class cnn_MNIST(nn.Module): def __init__(self): super(cnn_MNIST, self).__init__() self.conv1 = nn.Conv2d(1, 8, 4, stride=2, padding=1) self.conv2 = nn.Conv2d(8, 16, 4, stride=2, padding=1) self.fc1 = nn.Linear(784, 256) self.fc2 = nn.Linear(256, 10) def forward(self, x): x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = x.view(-1, 784) x = 2.0 * x x = F.relu(self.fc1(x)) x = self.fc2(x) return 0.5 * x class TestConstant(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name='constant_test_data', generate=generate, device=device, dtype=dtype) def test(self): model = cnn_MNIST() checkpoint = torch.load("../examples/vision/pretrained/mnist_cnn_small.pth", map_location=self.default_device) model.load_state_dict(checkpoint) N = 2 n_classes = 10 image = torch.randn(N, 1, 28, 28) image = image.to(device=self.default_device, dtype=self.default_dtype) / 255.0 model = BoundedModule(model, torch.empty_like(image), device=self.default_device) eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(image, ptb) pred = model(image) lb, ub = model.compute_bounds() assert lb.shape == ub.shape == torch.Size((2, 10)) self.result = (lb, ub) if self.reference: self.reference = ( self.reference[0].to( device=self.default_device, dtype=self.default_dtype), self.reference[1].to( device=self.default_device, dtype=self.default_dtype) ) self.rtol = 5e-4 self.check() if __name__ == '__main__': # Change to generate=True when genearting reference results testcase = TestConstant(generate=False) testcase.setUp() testcase.test() ================================================ FILE: tests/test_constrained_concretize.py ================================================ """Test optimized bounds in simple_verification.""" import torch import numpy as np from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class ConstrainedConcretizeModel(torch.nn.Module): def __init__(self): super().__init__() self.w1 = torch.tensor([[1., -1.], [2., -1.]]) self.w2 = torch.tensor([[1., -1.]]) def forward(self, x): z1 = x.matmul(self.w1.t()) hz1 = torch.nn.functional.relu(z1) z2 = hz1.matmul(self.w2.t()) return z2 class TestConstrainedConcretize(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, 1, "test_constrained_concretize", generate, device=device, dtype=dtype) def test(self): model = ConstrainedConcretizeModel().to(self.default_device).to(self.default_dtype) # Input x. x = torch.tensor([[1., 1.]], dtype=self.default_dtype, device=self.default_device) # Lower and upper bounds of x. lower = torch.tensor([[-1., -2.]], dtype=self.default_dtype, device=self.default_device) upper = torch.tensor([[2., 1.]], dtype=self.default_dtype, device=self.default_device) # Wrap model with auto_LiRPA for bound computation. # The second parameter is for constructing the trace of the computational graph, # and its content is not important. lirpa_model = BoundedModule(model, torch.empty_like(x)) pred = lirpa_model(x) print(f'Model prediction: {pred.item()}') # Compute bounds using LiRPA using the given lower and upper bounds. norm = float("inf") ptb = PerturbationLpNorm(norm = norm, x_L=lower, x_U=upper) bounded_x = BoundedTensor(x, ptb) # Compute bounds. lb, ub = lirpa_model.compute_bounds(x=(bounded_x,), method='CROWN') print(f'CROWN bounds: lower={lb.item()}, upper={ub.item()}') # Add a new constraint of : # 1*x_0 + 1*x_1 + 2 <= 0 constraint_a = torch.tensor([[[1.0, 1.0]]], dtype=self.default_dtype, device=self.default_device) constraint_b = torch.tensor([[2.0]], dtype=self.default_dtype, device=self.default_device) constraints = (constraint_a, constraint_b) norm = float("inf") ptb = PerturbationLpNorm(norm = norm, x_L=lower, x_U=upper, constraints=constraints) bounded_x = BoundedTensor(x, ptb) # Compute bounds. constrained_lb, constrained_ub = lirpa_model.compute_bounds(x=(bounded_x,), method='CROWN') print(f'CROWN bounds (with constraints): lower={constrained_lb.item()}, upper={constrained_ub.item()}') self.result = (lb, ub, constrained_lb, constrained_ub) self.check() if __name__ == '__main__': testcase = TestConstrainedConcretize(generate=True) testcase.setUp() testcase.test() ================================================ FILE: tests/test_conv.py ================================================ import torch import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class cnn_model(nn.Module): def __init__(self, layers, padding, stride, linear): super(cnn_model, self).__init__() self.module_list = [] channel = 1 length = 28 for i in range(layers): self.module_list.append(nn.Conv2d(channel, 3, 4, stride = stride, padding = padding)) channel = 3 length = (length + 2 * padding - 4)//stride + 1 assert length > 0 self.module_list.append(nn.ReLU()) self.module_list.append(nn.Flatten()) if linear: self.module_list.append(nn.Linear(3 * length * length, 256)) self.module_list.append(nn.Linear(256, 10)) self.model = nn.Sequential(*self.module_list) def forward(self, x): x = self.model(x) return x class TestConv(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name=None, generate=generate, device=device, dtype=dtype) def test(self): models = [1, 2, 3] paddings = [1, 2] strides = [1, 3] N = 2 n_classes = 10 image = torch.randn(N, 1, 28, 28, dtype=self.default_dtype, device=self.default_device) image = image / 255.0 for layer_num in models: for padding in paddings: for stride in strides: for linear in [True, False]: model_ori = cnn_model(layer_num, padding, stride, linear) print('Model:', model_ori) model_ori = model_ori.to( device=self.default_device, dtype=self.default_dtype) model = BoundedModule(model_ori, image, device=self.default_device, bound_opts={"conv_mode": "patches"}) eps = 0.3 ptb = PerturbationLpNorm(x_L=image-eps, x_U=image+eps) image = BoundedTensor(image, ptb) pred = model(image) lb, ub = model.compute_bounds() model = BoundedModule(model_ori, image, device=self.default_device, bound_opts={"conv_mode": "matrix"}) pred = model(image) lb_ref, ub_ref = model.compute_bounds() if linear: assert lb.shape == ub.shape == torch.Size((N, n_classes)) self.assertEqual(lb, lb_ref) self.assertEqual(ub, ub_ref) if not linear and layer_num == 1: pred = model(image) lb_forward, ub_forward = model.compute_bounds(method='forward') self.assertEqual(lb, lb_forward) self.assertEqual(ub, ub_forward) pred = model(image) lb_forward, ub_forward = model.compute_bounds(method='dynamic-forward+backward') self.assertEqual(lb, lb_forward) self.assertEqual(ub, ub_forward) if __name__ == '__main__': testcase = TestConv() testcase.test() ================================================ FILE: tests/test_conv1d.py ================================================ """Test Conv1d.""" import torch import torch.nn as nn import torch.nn.functional as F from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class Model(nn.Module): def __init__(self, kernel_size=2, stride=1, padding=0, in_features=1,out_features=1): super(Model, self).__init__() self.n_n_conv1d_1 = nn.Conv1d(**{'groups': 1, 'dilation': 1, 'out_channels': 1, 'padding': padding, 'kernel_size': kernel_size, 'stride': stride, 'in_channels': 1, 'bias': True}) self.n_n_conv1d_2 = nn.Conv1d(**{'groups': 1, 'dilation': 1, 'out_channels': 1, 'padding': padding, 'kernel_size': kernel_size, 'stride': stride, 'in_channels': 1, 'bias': True}) self.relu_2 = nn.ReLU() self.n_n_conv1d_3 = nn.Conv1d(**{'groups': 1, 'dilation': 1, 'out_channels': 1, 'padding': padding, 'kernel_size': kernel_size, 'stride': stride, 'in_channels': 1, 'bias': True}) self.relu_3 = nn.ReLU() self.n_n_activation_Flatten = nn.Flatten(**{'start_dim': 1}) L_in,dialation = in_features,1 L_out_1 = math.floor((L_in+2*padding-dialation*(kernel_size-1)-1)/stride+1) L_out_2 = math.floor((L_out_1+2*padding-dialation*(kernel_size-1)-1)/stride+1) L_out_3 = math.floor((L_out_2+2*padding-dialation*(kernel_size-1)-1)/stride+1) self.n_n_linear = nn.Linear(**{'in_features':L_out_3, 'out_features':out_features,'bias':True}) def forward(self, *inputs,debug=False): t_ImageInputLayer, = inputs t_conv1d_1 = self.n_n_conv1d_1(t_ImageInputLayer) if debug: print("t_ImageInputLayer",t_ImageInputLayer.shape) if debug: print("t_conv1d_1",t_conv1d_1.shape) t_conv1d_relu_1 = F.relu(t_conv1d_1) t_conv1d_2 = self.n_n_conv1d_2(t_conv1d_relu_1) if debug: print("t_conv1d_2",t_conv1d_2.shape) t_conv1d_relu_2 = F.relu(t_conv1d_2) t_conv1d_3 = self.n_n_conv1d_3(t_conv1d_relu_2) if debug: print("t_conv1d_3",t_conv1d_3.shape) t_conv1d_relu_3 = F.relu(t_conv1d_3) t_flatten = self.n_n_activation_Flatten(t_conv1d_relu_3) if debug: print("t_flatten",t_flatten.shape) t_linear = self.n_n_linear(t_flatten) if debug: print("t_linear",t_linear.shape) return t_linear class TestConv1D(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name=None, generate=generate, device=device, dtype=dtype) def test(self): if self.default_dtype == torch.float64: data_path = 'data_64/' else: data_path = 'data/' N = 3 C = 1 M = 173 n_classes = 2 for kernel_size in [3,4]: for padding in [0,1]: for stride in [2,3]: print(kernel_size, padding, stride) model_ori = Model(kernel_size=kernel_size, padding=padding, stride=stride, in_features=M,out_features=n_classes) model_ori = model_ori.to(dtype=self.default_dtype, device=self.default_device) if not self.generate: data = torch.load(data_path + 'conv1d_test_data_{}-{}-{}'.format(kernel_size, padding, stride), weights_only=False) image = data['input'].to(dtype=self.default_dtype, device=self.default_device) model_ori(image) model_ori.load_state_dict(data['model']) else: image = torch.rand([N, C, M], dtype=self.default_dtype, device=self.default_device) model_ori(image) conv_mode = "matrix" model = BoundedModule(model_ori, image, device=self.default_device, bound_opts={"conv_mode": conv_mode}) eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(image, ptb) lb, ub, A = model.compute_bounds((image,), return_A=True, needed_A_dict={model.output_name[0]:model.input_name[0]},) ''' # 1. testing if lb == ub == pred when eps = 0 assert (lb == ub).all() and torch.allclose(lb,pred,rtol=1e-5) and torch.allclose(ub,pred,rtol=1e-5) # 2. test if A matrix equals to gradient of the input # get output's grad with respect to the input without iterating through torch.autograd.grad: # https://stackoverflow.com/questions/64988010/getting-the-outputs-grad-with-respect-to-the-input uA = A[model.output_name[0]][model.input_name[0]]['uA'] lA = A[model.output_name[0]][model.input_name[0]]['lA'] assert (uA==lA).all() assert (torch.autograd.functional.jacobian(model_ori,image_clean).sum(dim=2)==uA).all() assert (torch.autograd.functional.jacobian(model_ori,image_clean).sum(dim=2)==lA).all() # double check input_grads = torch.zeros(uA.shape) for i in range(N): for j in range(n_classes): input_grads[i][j]=torch.autograd.grad(outputs=output_clean[i,j], inputs=image_clean, retain_graph=True)[0].sum(dim=0) assert (input_grads==uA).all() assert (input_grads==lA).all() ''' # 3. test when eps = 0.3 (uncommented) if self.generate: torch.save( {'model': model_ori.state_dict(), 'input': image, 'lb': lb, 'ub': ub}, data_path + '/conv1d_test_data_{}-{}-{}'.format(kernel_size, padding, stride) ) if not self.generate: lb_ref = data['lb'] ub_ref = data['ub'] assert torch.allclose(lb, lb_ref, 1e-3) assert torch.allclose(ub, ub_ref, 1e-3) if __name__ == '__main__': testcase = TestConv1D(generate=False) testcase.test() ================================================ FILE: tests/test_distinct_patches.py ================================================ import torch import random import numpy as np import torch.nn as nn import torch.nn.functional as F import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm import sys sys.path.append('../examples/vision') from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE def reset_seed(seed=1234): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) random.seed(seed) np.random.seed(seed) class cnn_4layer_b(nn.Module): def __init__(self, paddingA, paddingB): super().__init__() self.padA = nn.ZeroPad2d(paddingA) self.padB = nn.ZeroPad2d(paddingB) self.conv1 = nn.Conv2d(3, 32, (5,5), stride=2, padding=0) self.conv2 = nn.Conv2d(32, 128, (4,4), stride=2, padding=1) self.linear = None self.fc = nn.Linear(250, 10) def forward(self, x): x = self.padA(x) x = self.conv1(x) x = self.conv2(self.padB(F.relu(x))) x = F.relu(x) x = x.view(x.size(0), -1) if self.linear is None: self.linear = nn.Linear(x.size(1), 250) x = self.linear(x) return self.fc(F.relu(x)) class TestDistinctPatches(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1234, ref_name='distinct_patches_test_data', generate=generate, device=device, dtype=dtype) self.cases = [(2,1,2,1), (0,0,0,0), (1,3,3,1), (2,2,3,1)] normalize = torchvision.transforms.Normalize( mean = [0.4914, 0.4822, 0.4465], std = [0.2023, 0.1994, 0.2010] ) test_data = torchvision.datasets.CIFAR10( "./data", train=False, download=True, transform=torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), normalize ]) ) imgs = torch.from_numpy(test_data.data[:1]).reshape(1,3,32,32).float() / 255.0 self.single_img = imgs.to(dtype=self.default_dtype, device=self.default_device) def run_conv_mode(self, model, img, conv_mode): model(img) # dummy run to initialize shapes model_lirpa = BoundedModule( model, img, device=self.default_device, bound_opts={"conv_mode": conv_mode} ) ptb = PerturbationLpNorm(norm = np.inf, eps = 0.03) img_perturbed = BoundedTensor(img, ptb) lb, ub = model_lirpa.compute_bounds( x=(img_perturbed,), IBP=False, C=None, method='backward' ) return lb, ub def test(self): self.result = [] for paddingA in self.cases: for paddingB in self.cases: print("Testing", paddingA, paddingB) reset_seed() model_ori = cnn_4layer_b(paddingA, paddingB).to( device=self.default_device, dtype=self.default_dtype ) lb_patch, ub_patch = self.run_conv_mode( model_ori, self.single_img, conv_mode='patches' ) self.result.append((lb_patch, ub_patch)) if self.generate: # We only compare with matrix mode when generating reference results lb_matrix, ub_matrix = self.run_conv_mode( model_ori, self.single_img, conv_mode='matrix' ) # Check equality assert torch.allclose(lb_patch, lb_matrix), "Lower bounds differ!" assert torch.allclose(ub_patch, ub_matrix), "Upper bounds differ!" self.check() if __name__ == '__main__': # Change to generate=True when genearting reference results testcase = TestDistinctPatches(generate=False) testcase.test() ================================================ FILE: tests/test_examples.py ================================================ """Test all the examples before release. This script is expected be manually run and is not used in automatic tests.""" import pytest import subprocess import os import sys import shlex pytest_skip = pytest.mark.skip( reason="It should be tested on a GPU server and excluded from CI") if not 'CACHE_DIR' in os.environ: cache_dir = os.path.join(os.getcwd(), '.cache') else: cache_dir = os.environ['CACHE_DIR'] if not os.path.exists(cache_dir): os.makedirs(cache_dir) def download_data_language(): url = "http://download.huan-zhang.com/datasets/language/data_language.tar.gz" if not os.path.exists('../examples/language/data/sst'): subprocess.run(shlex.split(f"wget {url}"), cwd="../examples/language") subprocess.run(shlex.split(f"tar xvf data_language.tar.gz"), cwd="../examples/language") @pytest_skip def test_transformer(): cmd = f"""python train.py --dir {cache_dir} --robust --method IBP+backward_train --train --num_epochs 2 --num_epochs_all_nodes 2 --eps_start 2 --eps_length 1 --eps 0.1""" print(cmd, file=sys.stderr) download_data_language() subprocess.run(shlex.split(cmd), cwd='../examples/language') @pytest_skip def test_lstm(): cmd = f"""python train.py --dir {cache_dir} --model lstm --lr 1e-3 --dropout 0.5 --robust --method IBP+backward_train --train --num_epochs 2 --num_epochs_all_nodes 2 --eps_start 2 --eps_length 1 --eps 0.1 --hidden_size 2 --embedding_size 2 --intermediate_size 2 --max_sent_length 4""" print(cmd, file=sys.stderr) download_data_language() subprocess.run(shlex.split(cmd), cwd='../examples/language') @pytest_skip def test_lstm_seq(): cmd = f"""python train.py --dir {cache_dir} --hidden_size 2 --num_epochs 2 --num_slices 4""" print(cmd, file=sys.stderr) subprocess.run(shlex.split(cmd), cwd='../examples/sequence') @pytest_skip def test_simple_verification(): cmd = "python simple_verification.py" print(cmd, file=sys.stderr) subprocess.run(shlex.split(cmd), cwd='../examples/vision') @pytest_skip def test_custom_op(): cmd = "python custom_op.py" print(cmd, file=sys.stderr) subprocess.run(shlex.split(cmd), cwd='../examples/vision') @pytest_skip def test_efficient_convolution(): cmd = "python efficient_convolution.py" print(cmd, file=sys.stderr) subprocess.run(shlex.split(cmd), cwd='../examples/vision') @pytest_skip def test_two_node(): cmd = "python verify_two_node.py" print(cmd, file=sys.stderr) subprocess.run(shlex.split(cmd), cwd='../examples/vision') @pytest_skip def test_simple_training(): cmd = """python simple_training.py --num_epochs 5 --scheduler_opts start=2,length=2""" print(cmd, file=sys.stderr) subprocess.run(shlex.split(cmd), cwd='../examples/vision') @pytest_skip def test_cifar_training(): cmd = """python cifar_training.py --batch_size 64 --model ResNeXt_cifar --num_epochs 5 --scheduler_opts start=2,length=2""" print(cmd, file=sys.stderr) subprocess.run(shlex.split(cmd), cwd='../examples/vision') @pytest_skip def test_weight_perturbation(): cmd = """python weight_perturbation_training.py --norm 2 --bound_type CROWN-IBP --num_epochs 3 --scheduler_opts start=2,length=1 --eps 0.01""" print(cmd, file=sys.stderr) subprocess.run(shlex.split(cmd), cwd='../examples/vision') @pytest_skip def test_tinyimagenet(): cmd = f"""python tinyimagenet_training.py --batch_size 32 --model wide_resnet_imagenet64 --num_epochs 3 --scheduler_opts start=2,length=1 --eps {0.1/255} --in_planes 2 --widen_factor 2""" print(cmd, file=sys.stderr) if not os.path.exists('../examples/vision/data/tinyImageNet/tiny-imagenet-200'): subprocess.run(shlex.split("bash tinyimagenet_download.sh"), cwd="../examples/vision/data/tinyImageNet") subprocess.run(shlex.split(cmd), cwd='../examples/vision') @pytest_skip def test_imagenet(): cmd = f"""python imagenet_training.py --batch_size 32 --model wide_resnet_imagenet64_1000class --num_epochs 3 --scheduler_opts start=2,length=1 --eps {0.1/255} --in_planes 2 --widen_factor 2""" print(cmd) if (not os.path.exists('../examples/vision/data/ImageNet64/train') or not os.path.exists('../examples/vision/data/ImageNet64/test')): print('Error: ImageNet64 dataset is not ready.') return -1 subprocess.run(shlex.split(cmd), cwd='../examples/vision') def test_release(): """Run all tests that don't require a GPU server.""" test_simple_verification() test_custom_op() test_efficient_convolution() test_two_node() if __name__ == '__main__': test_release() ================================================ FILE: tests/test_examples_ci.py ================================================ import subprocess import traceback import test_examples original_subprocess_run = subprocess.run def custom_run(*args, **kwargs): kwargs.setdefault('check', True) return original_subprocess_run(*args, **kwargs) subprocess.run = custom_run def run_tests(): # get all func start with test in test_examples other than 'test_release' # and 'test_cifar_training'(cannot run on GPU with memory lower than 32GB) test_functions = [ getattr(test_examples, func) for func in dir(test_examples) if callable(getattr(test_examples, func)) and func.startswith('test') and func not in ['test_release'] ] try: for test_func in test_functions: test_func() print(f"{test_func.__name__} executed successfully.") except Exception as e: print(f"Exception in {test_func.__name__}: {e}") traceback.print_exc() # Print detailed exception information print("Examples Test Result:") print("\nFailed tests:") print(test_func.__name__) raise print("Examples Test Result:") print("\nAll tests passed successfully.") if __name__ == '__main__': run_tests() ================================================ FILE: tests/test_general_nonlinear.py ================================================ import sys import pytest import torch.nn as nn sys.path.insert(0, '../complete_verifier') import arguments from beta_CROWN_solver import LiRPANet from bab import general_bab from auto_LiRPA import BoundedTensor from auto_LiRPA.perturbations import * from testcase import DEFAULT_DEVICE, DEFAULT_DTYPE class Sin(nn.Module): def forward(self, x): return torch.sin(x) def cifar_model_wide(): # cifar wide model = nn.Sequential( nn.Conv2d(3, 16, 4, stride=2, padding=1), Sin(), nn.Conv2d(16, 32, 4, stride=2, padding=1), Sin(), nn.Flatten(), nn.Linear(32 * 8 * 8, 100), Sin(), nn.Linear(100, 10) ) return model def bab(model_ori, data, target, norm, eps, data_max=None, data_min=None, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): data = data.to(device=device, dtype=dtype) eps = eps.to(device=device, dtype=dtype) if norm == np.inf: if data_max is None: data_ub = data + eps data_lb = data - eps else: data_max = data_max.to(device=device, dtype=dtype) data_min = data_min.to(device=device, dtype=dtype) data_ub = torch.min(data + eps, data_max) data_lb = torch.max(data - eps, data_min) else: data_ub = data_lb = data pred = torch.argmax(model_ori(data), dim=1) c = torch.zeros((1, 1, 10), device=device, dtype=dtype) # we only support c with shape of (1, 1, n) c[0, 0, pred] = 1 c[0, 0, target] = -1 rhs = torch.tensor(arguments.Config["bab"]["decision_thresh"], dtype=dtype, device=device).view(c.shape[:2]) arguments.Config.parse_config(args={}) arguments.Config['general']['device'] = 'cpu' arguments.Config["solver"]["batch_size"] = 200 arguments.Config["bab"]["decision_thresh"] = np.float64(10) # naive float obj has no max() function, np.inf will lead infeasible domain arguments.Config["solver"]["beta-crown"]["iteration"] = 20 arguments.Config["bab"]["timeout"] = 60 #300 arguments.Config["solver"]["alpha-crown"]["lr_alpha"] = 0.1 arguments.Config["solver"]["beta-crown"]["lr_beta"] = 0.1 arguments.Config["bab"]["branching"]["method"] = 'nonlinear' arguments.Config["bab"]["branching"]["candidates"] = 2 arguments.Config["general"]["enable_incomplete_verification"] = False arguments.Config["data"]["dataset"] = 'cifar' # LiRPA wrapper model = LiRPANet(model_ori, device=device, in_size=(1, 3, 32, 32)) ptb = PerturbationLpNorm(norm=norm, eps=eps, x_L=data_lb, x_U=data_ub) x = BoundedTensor(data, ptb) forward = model_ori(x) min_lb = general_bab(model, x, c, rhs)[0] if isinstance(min_lb, torch.Tensor): min_lb = min_lb.item() min_lb += arguments.Config["bab"]["decision_thresh"] print(min_lb) assert min_lb < torch.min(forward) # This test takes long time so it is set as the last test case. @pytest.mark.skip(reason="The test is failing now after removing index clamping.") # @pytest.mark.order(-1) def test(device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): model_ori = cifar_model_wide() data = torch.load('data/beta_crown_test_data') model_ori.load_state_dict(data['state_dict']) model_ori = model_ori.to(device=device, dtype=dtype) x = data['x'] pidx = data['pidx'] eps_temp = data['eps_temp'] data_max = data['data_max'] data_min = data['data_min'] bab(model_ori, x, pidx, float('inf'), eps_temp, data_max=data_max, data_min=data_min, device=device, dtype=dtype) if __name__ == "__main__": test() ================================================ FILE: tests/test_general_shape.py ================================================ """ Test inputs of general shapes (especially for matmul)""" import torch import torch.nn as nn import numpy as np from auto_LiRPA import BoundedModule, BoundedTensor, PerturbationLpNorm from auto_LiRPA.operators import BoundMatMul from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE BATCH_SIZE = 2 class GeneralShapeModel(nn.Module): def __init__(self): super().__init__() self.weight_1 = nn.Parameter(torch.randn(3, 4)) self.weight_2 = nn.Parameter(torch.randn(4, 3)) self.weight_3 = nn.Parameter(torch.randn(3, 4)) self.weight_4 = nn.Parameter(torch.randn(4, 4, 3)) self.weight_5 = nn.Parameter(torch.randn(6, 3, 4)) self.weight_6 = nn.Parameter(torch.randn(3, 5)) self.relu = nn.ReLU() def forward(self, x, w): # Basic MatMul (B, 3) @ (3, 4) -> (B, 4) y1 = x.matmul(self.weight_1) # BoundUnsqueeze and BoundTile y2 = self.relu(y1) y2 = y2.unsqueeze(1).repeat(1, 5, 1) # (B, 5, 4) y2 = y2.matmul(self.weight_2) # (B, 5, 4) @ (4, 3) -> (B, 5, 3) # More dimensions on x y3 = self.relu(y2) y3 = y3.unsqueeze(1).repeat(1, 4, 1, 1) # (B, 4, 5, 3) y3 = y3.matmul(self.weight_3) # (B, 4, 5, 3) @ (3, 4) -> (B, 4, 5, 4) # More dimensions on weight y4 = self.relu(y3) y4 = y4.matmul(self.weight_4) # (B, 4, 5, 4) @ (4, 4, 3) -> (B, 4, 5, 3) # Automatically broadcast x y5 = self.relu(y4) y5 = y5.unsqueeze(2) # (B, 4, 1, 5, 3) y5 = y5.matmul(self.weight_5) # (B, 4, 1, 5, 3) @ (6, 3, 4) -> (B, 4, 6, 5, 4) # Multiply with a weight with batch dimension y6 = self.relu(y5) y6 = y6.matmul(w) # (B, 4, 6, 5, 4) @ (B, 4, 6, 4, 3) -> (B, 4, 6, 5, 3) # Swap x and weight y7 = self.relu(y6) y7 = self.weight_6.matmul(y7) # (3, 5) @ (B, 4, 6, 5, 3) -> (B, 4, 6, 3, 3) return y7 class TestGeneralShape(TestCase): def __init__(self, methodName='runTest', seed=1, generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed, 'test_general_shape_data', generate, device=device, dtype=dtype) self.rtol = 1e-4 def test(self): model = GeneralShapeModel().to(device=self.default_device, dtype=self.default_dtype) input = torch.randn( (BATCH_SIZE, 3), device=self.default_device, dtype=self.default_dtype) eps = 100 ptb = PerturbationLpNorm(norm=np.inf, eps=eps) x = BoundedTensor(input, ptb) # w is an unperturbed input, but still have batch dimension w = torch.randn((BATCH_SIZE, 4, 6, 4, 3), device=self.default_device, dtype=self.default_dtype) lirpa_model = BoundedModule(model, (x, w), device=self.default_device) lb, ub = lirpa_model.compute_bounds((x, w), method="backward") # # Test by sampling # sample_ptb = torch.rand(BATCH_SIZE, *input.shape[1:]) * 2 * eps - eps # sample_inputs = input[0] + sample_ptb # sample_output = model(sample_inputs, w) # assert (sample_output <= ub).all() # assert (sample_output >= lb).all() self.result = [] for node in lirpa_model.nodes(): if type(node) == BoundMatMul: self.result.append((node.lower, node.upper)) self.result.append((lb, ub)) self.check() if __name__ == '__main__': testcase = TestGeneralShape(generate=False) testcase.setUp() testcase.test() ================================================ FILE: tests/test_identity.py ================================================ """Test a model with an nn.Identity layer only""" import torch import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class TestIdentity(TestCase): def __init__(self, methodName='runTest', device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, device=device, dtype=dtype) def test(self): model = nn.Sequential(nn.Identity()) x = torch.randn(2, 10, device=self.default_device, dtype=self.default_dtype) y = model(x) eps = 0.1 ptb = PerturbationLpNorm(norm=np.inf, eps=eps) x = BoundedTensor(x, ptb) model = BoundedModule(model, x, device=self.default_device) y_l, y_u = model.compute_bounds() self.assertEqual(torch.Tensor(x), y) self.assertEqual(y_l, x - eps) self.assertEqual(y_u, x + eps) if __name__ == '__main__': testcase = TestIdentity() testcase.test() ================================================ FILE: tests/test_invprop.py ================================================ """Test INVPROP.""" import sys sys.path.append('../complete_verifier') from complete_verifier.load_model import unzip_and_optimize_onnx import torch import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class SimpleExampleModel(nn.Module): def __init__(self): super().__init__() # Weights of linear layers. self.w1 = torch.tensor([[1., -1.], [2., -1.]]) self.w2 = torch.tensor([[1., -1.]]) def forward(self, x): # Linear layer. z1 = x.matmul(self.w1.t()) # Relu layer. hz1 = torch.nn.functional.relu(z1) # Linear layer. z2 = hz1.matmul(self.w2.t()) return z2 class TestInvpropSimpleExample(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name=None, generate=generate, device=device, dtype=dtype) def test(self): np.random.seed(123) model_ori = SimpleExampleModel().to( device=self.default_device, dtype=self.default_dtype) apply_output_constraints_to = ['BoundMatMul', 'BoundInput'] x = torch.tensor([[1., 1.]], device=self.default_device, dtype=self.default_dtype) model = BoundedModule(model_ori, torch.empty_like(x), bound_opts={ 'optimize_bound_args': { 'apply_output_constraints_to': apply_output_constraints_to, 'tighten_input_bounds': True, 'best_of_oc_and_no_oc': False, 'directly_optimize': [], 'oc_lr': 0.1, 'share_gammas': False, 'iteration': 1000, } }, device=self.default_device ) model.constraints = torch.ones( 1, 1, 1, device=self.default_device, dtype=self.default_dtype) model.thresholds = torch.tensor( [-1.], device=self.default_device, dtype=self.default_dtype) norm = float("inf") lower = torch.tensor( [[-1., -2.]], device=self.default_device, dtype=self.default_dtype) upper = torch.tensor( [[2., 1.]], device=self.default_device, dtype=self.default_dtype) ptb = PerturbationLpNorm(norm = norm, x_L=lower, x_U=upper) bounded_x = BoundedTensor(x, ptb) lb, ub = model.compute_bounds(x=(bounded_x,), method='alpha-CROWN') if '/0' in model._modules: tightened_ptb = model['/0'].perturbation else: tightened_ptb = model['/x'].perturbation if self.default_dtype == torch.float64: data_path = 'data_64/' else: data_path = 'data/' if self.generate: torch.save({ 'lb': lb, 'ub': ub, 'x_L': tightened_ptb.x_L, 'x_U': tightened_ptb.x_U }, data_path + 'invprop/simple_reference') else: data = torch.load(data_path + 'invprop/simple_reference') lb_ref = data['lb'] ub_ref = data['ub'] x_L_ref = data['x_L'] x_U_ref = data['x_U'] assert torch.allclose(lb, lb_ref, 1e-4) assert torch.allclose(ub, ub_ref, 1e-4) assert torch.allclose(tightened_ptb.x_L, x_L_ref, 1e-4) assert torch.allclose(tightened_ptb.x_U, x_U_ref, 1e-4) class TestInvpropOODExample(TestCase): # Based on https://github.com/kothasuhas/verify-input/tree/main/examples/ood def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name=None, generate=generate, device=device, dtype=dtype) def test(self): np.random.seed(123) import onnx2pytorch model_ori = onnx2pytorch.ConvertModel(unzip_and_optimize_onnx('data/invprop/ood.onnx')).eval() model_ori = model_ori.to( device=self.default_device, dtype=self.default_dtype) x = torch.tensor([[1., 1.]], device=self.default_device, dtype=self.default_dtype) model = BoundedModule(model_ori, torch.empty_like(x), bound_opts={ 'optimize_bound_args': { 'apply_output_constraints_to': ['BoundInput', "/input", "/input-3", "/21"], 'tighten_input_bounds': True, 'best_of_oc_and_no_oc': True, 'directly_optimize': ['/input'], 'oc_lr': 0.01, 'iteration': 1000, 'share_gammas': False, 'lr_decay': 0.99, 'early_stop_patience': 1000, 'init_alpha': False, 'lr_alpha': 0.4, 'start_save_best': -1, } }, device=self.default_device ) model.constraints = torch.tensor( [[[-1., 0., 1.]], [[0., -1., 1.]]], device=self.default_device, dtype=self.default_dtype) model.thresholds = torch.tensor( [0., 0.], device=self.default_device, dtype=self.default_dtype) norm = float("inf") lower = torch.tensor( [[-2., -2.], [-2., -2.]], device=self.default_device, dtype=self.default_dtype) upper = torch.tensor( [[0., 0.], [0., 0.]], device=self.default_device, dtype=self.default_dtype) ptb = PerturbationLpNorm(norm = norm, x_L=lower, x_U=upper) x_expand = BoundedTensor(torch.tensor( [[-1., -1.], [-1., -1.]], device=self.default_device, dtype=self.default_dtype), ptb) c = torch.tensor([[[-1., 0., 1.]], [[0., -1., 1.]]], device=self.default_device, dtype=self.default_dtype) # Init manually, to set bound_upper=False model.init_alpha( (x_expand,), share_alphas=False, c=c, bound_upper=False) model.compute_bounds(x=(x_expand,), C=c, method='CROWN-Optimized') if self.default_dtype == torch.float64: data_path = 'data_64/' else: data_path = 'data/' if self.generate: torch.save({ 'lower': model['/input'].lower, 'upper': model['/input'].upper, }, data_path + 'invprop/ood_reference') else: data = torch.load(data_path + 'invprop/ood_reference') lower_ref = data['lower'] upper_ref = data['upper'] lower_diff = model['/input'].lower[0] - lower_ref[0] assert torch.allclose(model['/input'].lower[0], lower_ref[0], atol=1e-3), (lower_diff, lower_diff.abs().max()) assert torch.all(torch.isposinf(lower_ref[1])) assert torch.all(torch.isposinf(model['/input'].lower[1])) upper_diff = model['/input'].upper[0] - upper_ref[0] assert torch.allclose(model['/input'].upper[0], upper_ref[0], atol=1e-3), (upper_diff, upper_diff.abs().max()) assert torch.all(torch.isneginf(upper_ref[1])) assert torch.all(torch.isneginf(model['/input'].upper[1])) if __name__ == '__main__': testcase = TestInvpropSimpleExample(generate=False) testcase.test() testcase = TestInvpropOODExample(generate=False) testcase.test() ================================================ FILE: tests/test_jacobian.py ================================================ # pylint: disable=wrong-import-position """Test Jacobian bounds.""" import sys import torch import torch.nn as nn sys.path.append('../examples/vision') from jacobian import compute_jacobians from auto_LiRPA import BoundedModule from auto_LiRPA.utils import Flatten from auto_LiRPA.jacobian import JacobianOP from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class TestJacobian(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__( methodName, seed=1, ref_name='jacobian_test_data', generate=generate, device=device, dtype=dtype) def test(self): in_dim, linear_size = 8, 100 model = nn.Sequential( Flatten(), nn.Linear(3*in_dim**2, linear_size), nn.ReLU(), nn.Linear(linear_size, linear_size), nn.Tanh(), nn.Linear(linear_size, linear_size), nn.Sigmoid(), nn.Linear(linear_size, 10), ) model = model.to(device=self.default_device, dtype=self.default_dtype) x0 = torch.randn(1, 3, in_dim, in_dim, device=self.default_device, dtype=self.default_dtype) self.result = compute_jacobians(model, x0) self.check() def test_concat_jacobian(self): ''' Test JacobianOP with Concat operation. This needs some special handling in auto_LiRPA to make it work properly. (See parse_graph.py for details.) ''' class ConcatModule(nn.Module): def forward(self, x): return JacobianOP.apply(torch.cat([x, x], dim=1), x) concatmodel = ConcatModule().to(device=self.default_device, dtype=self.default_dtype) x0 = torch.randn(1, 5, device=self.default_device, dtype=self.default_dtype) BoundedModule(concatmodel, x0) print('Concat JacobianOP test passed.') if __name__ == '__main__': # Change to generate=True when genearting reference results testcase = TestJacobian(generate=False) testcase.setUp() testcase.test() ================================================ FILE: tests/test_language_models.py ================================================ """Test classes for Transformer and LSTM on language tasks""" import os import argparse import pickle import torch from auto_LiRPA.utils import logger parser = argparse.ArgumentParser() parser.add_argument('--gen_ref', action='store_true', help='generate reference results') parser.add_argument('--train', action='store_true', help='pre-train the models') parser.add_argument('--keep_results', action='store_true', help='keep intermediate results.') parser.add_argument('--load_results', action='store_true', help='load intermediate results without reruning.') args, unknown = parser.parse_known_args() def prepare_data(): os.system('cd ../examples/language;\ wget http://download.huan-zhang.com/datasets/language/data_language.tar.gz;\ tar xvf data_language.tar.gz') cmd_transformer_train = 'cd ../examples/language; \ DIR=model_transformer_test; \ python train.py --hidden_size=16 --embedding_size=16 --intermediate_size=16 --max_sent_length=16 \ --dir=$DIR --robust --method=IBP+backward_train \ --num_epochs=2 --num_epochs_all_nodes=1 --eps_start=2 --train' cmd_transformer_test = 'cd ../examples/language; \ python train.py --hidden_size=16 --embedding_size=16 --intermediate_size=16 --max_sent_length=16 \ --robust --method=IBP+backward --budget=1 --auto_test --eps=0.2 --load=../../tests/data/ckpt_transformer \ --device=cpu' cmd_lstm_train = 'cd ../examples/language; \ DIR=model_lstm_test; \ python train.py --hidden_size=16 --embedding_size=16 --max_sent_length=16 \ --dir=$DIR --model=lstm --lr=1e-3 --robust --method=IBP+backward_train --dropout=0.5 \ --num_epochs=2 --num_epochs_all_nodes=1 --eps_start=2 --train' cmd_lstm_test = 'cd ../examples/language; \ python train.py --model=lstm --hidden_size=16 --embedding_size=16 --max_sent_length=16 \ --robust --method=IBP+backward --budget=1 --auto_test --eps=0.2 --load=../../tests/data/ckpt_lstm \ --device=cpu' res_path = '../examples/language/res_test.pkl' """Pre-train a simple Transformer and LSTM respectively""" def train(): if os.path.exists("../examples/language/model_transformer_test"): os.system("rm -rf ../examples/language/model_transformer_test") if os.path.exists("../examples/language/model_lstm_test"): os.system("rm -rf ../examples/language/model_lstm_test") logger.info("\nTraining a Transformer") print(cmd_transformer_train) print() os.system(cmd_transformer_train) os.system("cp ../examples/language/model_transformer_test/ckpt_2 data/ckpt_transformer") logger.info("\nTraining an LSTM") print(cmd_lstm_train) print() os.system(cmd_lstm_train) os.system("cp ../examples/language/model_lstm_test/ckpt_2 data/ckpt_lstm") def read_res(): with open(res_path, 'rb') as file: return pickle.load(file) def evaluate(): if args.load_results: print("loading intermediate results...") with open("./tmp_language_results.pkl", "rb") as file: return pickle.load(file) logger.info('\nEvaluating the trained LSTM') print(cmd_lstm_test) print() os.system(cmd_lstm_test) res_lstm = read_res() logger.info('\nEvaluating the trained Transformer') print(cmd_transformer_test) print() os.system(cmd_transformer_test) res_transformer = read_res() os.system("rm {}".format(res_path)) if args.keep_results: with open("./tmp_language_results.pkl", "wb") as file: pickle.dump((res_transformer, res_lstm), file) print("intermediate results saved.") return res_transformer, res_lstm def gen_ref(): if args.train: train() res_transformer, res_lstm = evaluate() with open('data/language_test_data', 'wb') as file: pickle.dump((res_transformer, res_lstm), file) logger.info('Reference results saved') def check(): with open('data/language_test_data', 'rb') as file: res_transformer_ref, res_lstm_ref = pickle.load(file) res_transformer, res_lstm = evaluate() for res, res_ref in zip([res_transformer, res_lstm], [res_transformer_ref, res_lstm_ref]): for a, b in zip(res, res_ref): ta, tb = torch.tensor(a), torch.tensor(b) diff = torch.max(torch.abs(ta - tb)) assert diff < 1e-5, diff assert (torch.tensor(a) - torch.tensor(b)).pow(2).sum() < 1e-9 def test(): if not os.path.exists('../examples/language/data'): prepare_data() if args.gen_ref: gen_ref() else: check() logger.info("test_Language done") if __name__ == '__main__': test() ================================================ FILE: tests/test_linear_cnn_model.py ================================================ """Test bounds on a 1 layer CNN network.""" import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from test_linear_model import TestLinearModel from testcase import DEFAULT_DEVICE, DEFAULT_DTYPE input_dim = 8 out_channel = 2 N = 10 class LinearCNNModel(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv2d(1, out_channel, 3, stride=2, padding=1) def forward(self, x): x = self.conv(x) x = x.view(-1, input_dim //2 * input_dim // 2 * out_channel) return x class TestLinearCNNModel(TestLinearModel): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, device=device, dtype=dtype) self.original_model = LinearCNNModel().to(device=device, dtype=dtype) def compute_and_compare_bounds(self, eps, norm, IBP, method): input_data = torch.randn((N, 1, input_dim, input_dim)) model = BoundedModule(self.original_model, torch.empty_like(input_data), device=self.default_device) ptb = PerturbationLpNorm(norm=norm, eps=eps) ptb_data = BoundedTensor(input_data, ptb) pred = model(ptb_data) label = torch.argmax(pred, dim=1).cpu().detach().numpy() # Compute bounds. lb, ub = model.compute_bounds(IBP=IBP, method=method) # Compute reference. conv_weight, conv_bias = list(model.parameters()) conv_bias = conv_bias.view(1, out_channel, 1, 1) matrix_eye = torch.eye(input_dim * input_dim).view(input_dim * input_dim, 1, input_dim, input_dim) # Obtain equivalent weight and bias for convolution. weight = self.original_model.conv(matrix_eye) - conv_bias # Output is (batch, channel, weight, height). weight = weight.view(input_dim * input_dim, -1) # Dimension is (flattened_input, flattened_output). bias = conv_bias.repeat(1, 1, input_dim //2, input_dim //2).view(-1) flattend_data = input_data.view(N, -1) # Compute dual norm. if norm == 1: q = np.inf elif norm == np.inf: q = 1.0 else: q = 1.0 / (1.0 - (1.0 / norm)) # Manually compute bounds. norm = weight.t().norm(p=q, dim=1) expected_pred = flattend_data.matmul(weight) + bias expected_ub = eps * norm + expected_pred expected_lb = -eps * norm + expected_pred # Check equivalence. if method == 'backward' or method == 'forward': self.rtol = 1e-4 self.assertEqual(expected_pred, pred) self.assertEqual(expected_ub, ub) self.assertEqual(expected_lb, lb) ================================================ FILE: tests/test_linear_model.py ================================================ """Test bounds on a 1 layer linear network.""" import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE n_classes = 3 N = 10 class LinearModel(nn.Module): def __init__(self): super().__init__() self.fc = nn.Linear(256, n_classes) def forward(self, x): x = self.fc(x) return x class TestLinearModel(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=0, device=device, dtype=dtype) self.original_model = LinearModel().to(device=device, dtype=dtype) def compute_and_compare_bounds(self, eps, norm, IBP, method): input_data = torch.randn( (N, 256), device=self.default_device, dtype=self.default_dtype) model = BoundedModule(self.original_model, torch.empty_like(input_data), device=self.default_device) ptb = PerturbationLpNorm(norm=norm, eps=eps) ptb_data = BoundedTensor(input_data, ptb) pred = model(ptb_data) label = torch.argmax(pred, dim=1).cpu().detach().numpy() # Compute bounds. lb, ub = model.compute_bounds(IBP=IBP, method=method) # Compute dual norm. if norm == 1: q = np.inf elif norm == np.inf: q = 1.0 else: q = 1.0 / (1.0 - (1.0 / norm)) # Compute reference manually. weight, bias = list(model.parameters()) norm = weight.norm(p=q, dim=1) expected_pred = input_data.matmul(weight.t()) + bias expected_ub = eps * norm + expected_pred expected_lb = -eps * norm + expected_pred # Check equivalence. self.rtol = 1e-4 self.assertEqual(expected_pred, pred) self.assertEqual(expected_ub, ub) self.assertEqual(expected_lb, lb) def test_Linf_forward(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=0.3, norm=np.inf, IBP=False, method='forward') def test_Linf_backward(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=0.3, norm=np.inf, IBP=False, method='backward') def test_Linf_IBP(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=0.3, norm=np.inf, IBP=True, method=None) def test_Linf_backward_IBP(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=0.3, norm=np.inf, IBP=True, method='backward') def test_L2_forward(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=1.0, norm=2, IBP=False, method='forward') def test_L2_backward(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=1.0, norm=2, IBP=False, method='backward') def test_L2_IBP(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=1.0, norm=2, IBP=True, method=None) def test_L2_backward_IBP(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=1.0, norm=2, IBP=True, method='backward') def test_L1_forward(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=3.0, norm=1, IBP=False, method='forward') def test_L1_backward(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=3.0, norm=1, IBP=False, method='backward') def test_L1_IBP(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=3.0, norm=1, IBP=True, method=None) def test_L1_backward_IBP(self): with np.errstate(divide='ignore'): self.compute_and_compare_bounds(eps=3.0, norm=1, IBP=True, method='backward') ================================================ FILE: tests/test_maxpool.py ================================================ """Test max pooling.""" import torch import torch.nn as nn import torch.nn.functional as F from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class Model(nn.Module): def __init__(self, kernel_size=4, stride=4, padding=0, conv_padding=0): super(Model, self).__init__() self.n_n_conv2d = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 1, 'padding': conv_padding, 'kernel_size': (2, 2), 'stride': [1, 1], 'in_channels': 1, 'bias': True}) self.n_n_maxpool = nn.MaxPool2d(**{'kernel_size': [kernel_size, kernel_size], 'ceil_mode': False, 'stride': [stride, stride], 'padding': [padding, padding]}) self.n_n_conv2d_2 = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 1, 'padding': [conv_padding, conv_padding], 'kernel_size': (2, 2), 'stride': [1, 1], 'in_channels': 1, 'bias': True}) self.n_n_maxpool_2 = nn.MaxPool2d(**{'kernel_size': [kernel_size, kernel_size], 'ceil_mode': False, 'stride': [stride, stride], 'padding': [padding, padding]}) self.n_n_flatten_Flatten = nn.Flatten(**{'start_dim': 1}) self.n_n_dense = None self.n_n_activation_Flatten = nn.Flatten(**{'start_dim': 1}) def forward(self, *inputs): t_ImageInputLayer, = inputs t_conv2d = self.n_n_conv2d(t_ImageInputLayer) t_conv2d_relu = F.relu(t_conv2d) t_maxpool = self.n_n_maxpool(t_conv2d_relu)[:, :, :, :] t_conv2d_max = self.n_n_conv2d_2(t_maxpool) t_conv2d_max = F.relu(t_conv2d_max) # t_maxpool_2 = self.n_n_maxpool_2(t_conv2d_max) t_flatten_Transpose = t_conv2d_max.permute(*[0, 2, 3, 1]) t_flatten_Flatten = self.n_n_flatten_Flatten(t_flatten_Transpose) t_flatten_Unsqueeze = torch.unsqueeze(t_flatten_Flatten, 2) t_flatten_Unsqueeze = torch.unsqueeze(t_flatten_Unsqueeze, 3) if self.n_n_dense is None: self.n_n_dense = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 2, 'padding': [0, 0], 'kernel_size': (1, 1), 'stride': [1, 1], 'in_channels': t_flatten_Unsqueeze.shape[1], 'bias': True}) t_dense = self.n_n_dense(t_flatten_Unsqueeze) t_activation_Flatten = self.n_n_activation_Flatten(t_dense) return t_activation_Flatten class TestMaxPool(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name=None, generate=generate, device=device, dtype=dtype) def test(self): if self.default_dtype == torch.float64: data_path = 'data_64/' else: data_path = 'data/' N = 2 for kernel_size in [3,4]: for padding in [0,1]: for conv_padding in [0,1]: print(kernel_size, padding, kernel_size, conv_padding) model_ori = Model(kernel_size=kernel_size, padding=padding, stride=kernel_size, conv_padding=conv_padding).to( device=self.default_device, dtype=self.default_dtype) if not self.generate: data = torch.load(data_path + 'maxpool_test_data_{}-{}-{}-{}'.format(kernel_size, padding, kernel_size, conv_padding), weights_only=False) image = data['input'] model_ori(image) model_ori.load_state_dict(data['model']) else: image = torch.rand([N, 1, 28, 28]) model_ori(image) if self.generate: conv_mode = "matrix" else: conv_mode = "patches" model = BoundedModule(model_ori, image, device=self.default_device, bound_opts={"conv_mode": conv_mode}) eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(image, ptb) lb, ub = model.compute_bounds((image,)) if self.generate: torch.save( {'model': model_ori.state_dict(), 'input': image, 'lb': lb, 'ub': ub}, data_path + 'maxpool_test_data_{}-{}-{}-{}'.format(kernel_size, padding, kernel_size, conv_padding) ) if not self.generate: lb_ref = data['lb'] ub_ref = data['ub'] assert torch.allclose(lb, lb_ref, 1e-4) assert torch.allclose(ub, ub_ref, 1e-4) if __name__ == '__main__': testcase = TestMaxPool(generate=False) testcase.test() ================================================ FILE: tests/test_min_max.py ================================================ import os import torch import torch.nn as nn import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from auto_LiRPA.utils import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class Test_Model(nn.Module): def __init__(self): super(Test_Model, self).__init__() self.seq1 = nn.Sequential( nn.Conv2d(1, 16, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 4, stride=2, padding=1) ) self.seq2 = nn.Sequential( nn.Conv2d(1, 16, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 4, stride=2, padding=1) ) self.seq3 = nn.Sequential( nn.Conv2d(32, 8, 2, stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(8*4*4,100), nn.ReLU(), nn.Linear(100, 10) ) def forward(self, x): return self.seq3(torch.max(self.seq1(x), self.seq2(x))) class TestMinMax(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name='min_max_test_data', generate=generate, device=device, dtype=dtype) def test(self): self.result = [] for conv_mode in ['patches', 'matrix']: for use_shared_alpha in [True, False]: model = Test_Model().to(device=self.default_device, dtype=self.default_dtype) checkpoint = torch.load( os.path.join(os.path.dirname(__file__), '../examples/vision/pretrained/test_min_max.pth'), map_location=self.default_device) model.load_state_dict(checkpoint) test_data = torchvision.datasets.MNIST( './data', train=False, download=True, transform=torchvision.transforms.ToTensor()) N = 2 image = test_data.data[:N].view(N,1,28,28) image = image.to(device=self.default_device, dtype=self.default_dtype) / 255.0 lirpa_model = BoundedModule(model, torch.empty_like(image), device=image.device, bound_opts={"conv_mode": conv_mode}) eps = 0.3 ptb = PerturbationLpNorm(eps = eps) image = BoundedTensor(image, ptb) lirpa_model.set_bound_opts({ 'optimize_bound_args': { 'iteration': 5, 'lr_alpha': 0.1, 'use_shared_alpha': use_shared_alpha, } }) lb, ub = lirpa_model.compute_bounds(x=(image,), method='CROWN-Optimized') print(lb, ub) self.result.append((lb, ub)) self.setUp() self.rtol = 1e-4 self.check() if __name__ == "__main__": testcase = TestMinMax(generate=False) testcase.test() ================================================ FILE: tests/test_perturbation.py ================================================ """ Test different Perturbation classes""" import torch import torch.nn as nn import numpy as np from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm, PerturbationLinear from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE BATCH = 2 IN_DIM = 3 OUT_DIM = 4 class ToyModel(nn.Module): """Small model with two MatMuls and ReLU.""" def __init__(self): super().__init__() self.fc1 = nn.Linear(OUT_DIM, 8) self.fc2 = nn.Linear(8, OUT_DIM) self.relu = nn.ReLU() def forward(self, x): x = self.fc1(x) x = self.relu(x) x = self.fc2(x) return x class TestPerturbation(TestCase): """ Tests for: - PerturbationLinear - PerturbationLpNorm """ def __init__(self, methodName='runTest', seed=1, generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed, 'test_perturbation_data', generate, device=device, dtype=dtype) def test(self): device = self.default_device dtype = self.default_dtype model = ToyModel().to(device=device, dtype=dtype) # Prepare base input interval input_lb = torch.rand(BATCH, IN_DIM, device=device, dtype=dtype) input_ub = input_lb + torch.rand_like(input_lb) # ensure ub > lb self.result = [] # ================================================================= # Test PerturbationLinear # ================================================================= # Build A matrices lower_A = torch.randn(BATCH, OUT_DIM, IN_DIM, device=device, dtype=dtype) upper_A = lower_A + torch.rand_like(lower_A) # biases lower_b = torch.randn(BATCH, OUT_DIM, device=device, dtype=dtype) upper_b = lower_b + torch.rand_like(lower_b) # Manual concretization mid = ((input_lb + input_ub) / 2.0).unsqueeze(-1) # (B, IN_DIM, 1) diff = ((input_ub - input_lb) / 2.0).unsqueeze(-1) # (B, IN_DIM, 1) manual_L = (lower_A @ mid - torch.abs(lower_A) @ diff).squeeze(-1) + lower_b manual_U = (upper_A @ mid + torch.abs(upper_A) @ diff).squeeze(-1) + upper_b assert (manual_L < manual_U).all(), "Invalid manual bounds construction." ptb_linear = PerturbationLinear( lower_A=lower_A, upper_A=upper_A, lower_b=lower_b, upper_b=upper_b, input_lb=input_lb, input_ub=input_ub, x_L=manual_L, x_U=manual_U ) bounded_x = BoundedTensor((manual_L + manual_U) / 2, ptb_linear) lirpa_model = BoundedModule(model, bounded_x) lb_linear, ub_linear = lirpa_model.compute_bounds(bounded_x, method='backward') assert (lb_linear <= ub_linear).all(), "Invalid bounds from PerturbationLinear." self.result.append((lb_linear, ub_linear)) # ================================================================= # Test PerturbationLpNorm # ================================================================= # We directly use manual concretization here for testing ptb_linf = PerturbationLpNorm(x_L=manual_L, x_U=manual_U) bounded_x = BoundedTensor((manual_L + manual_U) / 2, ptb_linf) lirpa_model = BoundedModule(model, bounded_x) lb_linf, ub_linf = lirpa_model.compute_bounds(bounded_x, method='backward') assert (lb_linf <= ub_linf).all(), "Invalid bounds from PerturbationLpNorm." self.result.append((lb_linf, ub_linf)) # Notice that with the same x_L and x_U, PerturbationLinear should give # tighter bounds than PerturbationLpNorm. This is because # PerturbationLinear uses additional information (A matrices and biases). assert (lb_linear >= lb_linf).all() and (ub_linear <= ub_linf).all( ), "PerturbationLinear should give tighter bounds than PerturbationLpNorm." self.check() if __name__ == '__main__': testcase = TestPerturbation(generate=False) testcase.test() ================================================ FILE: tests/test_rectangle_patches.py ================================================ import sys import torch import numpy as np import torch.nn as nn import torch.nn.functional as F import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * sys.path.append('../examples/vision') from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class cnn_4layer_resnet(nn.Module): def __init__(self): super(cnn_4layer_resnet, self).__init__() self.conv1 = nn.Conv2d(3, 3, 4, stride=2, padding=1) self.bn = nn.BatchNorm2d(3) self.shortcut = nn.Conv2d(3, 3, 4, stride=2, padding=1) self.conv2 = nn.Conv2d(3, 3, 4, stride=2, padding=1) self.fc1 = nn.Linear(168, 10) def forward(self, x): x_ = x x = F.relu(self.conv1(self.bn(x))) x += self.shortcut(x_) x = F.relu(self.conv2(x)) x = x.view(x.size(0), -1) print(x.size()) x = self.fc1(x) return x class TestResnetPatches(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1234, ref_name='rectangle_patches_test_data', generate=generate, device=device, dtype=dtype) def test(self): model_oris = [ cnn_4layer_resnet(), ] self.result = [] if not self.generate: self.reference = torch.load( self.ref_path, map_location=self.default_device) for model_ori in model_oris: conv_mode = 'patches' # conv_mode can be set as 'matrix' or 'patches' normalize = torchvision.transforms.Normalize(mean = [0.4914, 0.4822, 0.4465], std = [0.2023, 0.1994, 0.2010]) test_data = torchvision.datasets.CIFAR10("./data", train=False, download=True, transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor(), normalize])) N = 1 n_classes = 10 image = torch.Tensor(test_data.data[:N]).reshape(N,3,32,32) image = image[:, :, :28, :] image = image.to(device=self.default_device, dtype=self.default_dtype) / 255.0 model_ori = model_ori.to( device=self.default_device, dtype=self.default_dtype) model = BoundedModule(model_ori, image, bound_opts={ "conv_mode": conv_mode}, device=self.default_device) ptb = PerturbationLpNorm(norm = np.inf, eps = 0.03) image = BoundedTensor(image, ptb) pred = model(image) lb, ub = model.compute_bounds(IBP=False, C=None, method='backward') self.result += [lb, ub] self.check() if __name__ == '__main__': # Change to generate=True when genearting reference results testcase = TestResnetPatches(generate=False) testcase.test() ================================================ FILE: tests/test_resnet_patches.py ================================================ import sys import torch import numpy as np import torchvision import models from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE sys.path.append('../examples/vision') class TestResnetPatches(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1234, ref_name='resnet_patches_test_data', generate=generate, device=device, dtype=dtype) def test(self): model_oris = [ models.model_resnet(width=1, mult=2), models.ResNet18(in_planes=2) ] self.result = [] for model_ori in model_oris: conv_mode = 'patches' # conv_mode can be set as 'matrix' or 'patches' normalize = torchvision.transforms.Normalize(mean = [0.4914, 0.4822, 0.4465], std = [0.2023, 0.1994, 0.2010]) test_data = torchvision.datasets.CIFAR10("./data", train=False, download=True, transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor(), normalize])) N = 1 n_classes = 10 image = torch.Tensor(test_data.data[:N]).reshape(N,3,32,32) image = image.to(device=self.default_device, dtype=self.default_dtype) / 255.0 model_ori = model_ori.to( device=self.default_device, dtype=self.default_dtype) model = BoundedModule(model_ori, image, bound_opts={"conv_mode": conv_mode}, device=self.default_device) ptb = PerturbationLpNorm(norm = np.inf, eps = 0.03) image = BoundedTensor(image, ptb) pred = model(image) lb, ub = model.compute_bounds(IBP=False, C=None, method='backward') self.result += [lb, ub] self.check() if __name__ == '__main__': # Change to generate=True when genearting reference results testcase = TestResnetPatches(generate=False) testcase.test() ================================================ FILE: tests/test_s_shaped.py ================================================ # pylint: disable=wrong-import-position """Test S-shaped activation functions.""" import torch import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class test_model(nn.Module): def __init__(self, act_func): super().__init__() self.act_func = act_func def forward(self, x): return self.act_func(x) def sigmoid(x): return torch.sigmoid(x) def sin(x): return torch.sin(x) def verify_bounds(model, input_lb, input_ub, lb, ub): """ Empirically verify that the model's output bounds are correct given input bounds. Args: model: The neural network model. input_lb: Lower bound of the input. input_ub: Upper bound of the input. lb: Computed lower bound of the output. ub: Computed upper bound of the output. """ n_samples = 100000 atol = 1e-5 inputs = torch.rand(n_samples, *input_lb.shape[1:]) * (input_ub - input_lb) + input_lb outputs = model(inputs) empirical_lb = outputs.min(dim=0).values empirical_ub = outputs.max(dim=0).values if not (empirical_lb - lb >= -atol).all(): max_violation = (lb - empirical_lb).max().item() raise AssertionError(f"Lower bound violated. Max violation: {max_violation}") if not (empirical_ub - ub <= atol).all(): max_violation = (empirical_ub - ub).max().item() raise AssertionError(f"Upper bound violated. Max violation: {max_violation}") print("Bounds verified successfully.") class TestSShaped(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__( methodName, seed=1, ref_name='s_shape_test_data', generate=generate, device=device, dtype=dtype) def _run_bound_test(self, model, input_lb, input_ub, methods): """Helper to compute and verify bounds.""" model = model.to(device=self.default_device, dtype=self.default_dtype) lirpa_model = BoundedModule(model, torch.empty_like(input_lb), device=self.default_device) ptb = PerturbationLpNorm(x_L=input_lb, x_U=input_ub) ptb_data = BoundedTensor(input_lb, ptb) for method in methods: lb, ub = lirpa_model.compute_bounds(x=(ptb_data,), method=method) verify_bounds(model, input_lb, input_ub, lb, ub) self.result.append((lb, ub)) def test(self): self.result = [] methods = ['CROWN', 'CROWN-OPTIMIZED'] # ----- Test BoundSin ----- model_sin = test_model(sin) start, end = -10, 10 n_intervals = end - start - 1 # Inputs as multiples of pi input_lb = torch.linspace(start, end - 1, n_intervals) * torch.pi input_ub = torch.linspace(start + 1, end, n_intervals) * torch.pi input_lb, input_ub = input_lb.unsqueeze(0), input_ub.unsqueeze(0) self._run_bound_test(model_sin, input_lb, input_ub, methods) # Inputs as multiples of pi / 2 self._run_bound_test(model_sin, input_lb / 2, input_ub / 2, methods) # ----- Test BoundSigmoid ----- model_sigmoid = test_model(sigmoid) input_lb = torch.tensor([[-2., -0.1]], device=self.default_device, dtype=self.default_dtype) input_ub = torch.tensor([[0.1, 2.]], device=self.default_device, dtype=self.default_dtype) self._run_bound_test(model_sigmoid, input_lb, input_ub, methods) # Check reference results self.check() if __name__ == '__main__': # Change to generate=True when generating reference results testcase = TestSShaped(generate=False) testcase.setUp() testcase.test() ================================================ FILE: tests/test_save_intermediate.py ================================================ import torch import torch.nn as nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import _to, TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class test_model(nn.Module): def __init__(self): super(test_model, self).__init__() self.model = nn.Sequential( nn.Flatten(), nn.Linear(3 * 32 * 32, 1000), nn.Sigmoid(), nn.Linear(1000, 500), nn.Linear(500, 200), nn.Linear(200, 100), nn.ReLU(), nn.Linear(100, 10) ) def forward(self, x): x = self.model(x) return x class TestSave(TestCase): def __init__(self, methodName='runTest', device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, device=device, dtype=dtype) def test(self, gen_ref=False): image = torch.randn(1, 3, 32, 32) image = image.to(device=self.default_device, dtype=self.default_dtype) / 255.0 model = test_model().to(device=self.default_device, dtype=self.default_dtype) bounded_model = BoundedModule( model, image, bound_opts={ 'optimize_bound_args': {'iteration': 2}, }, device=self.default_device) ptb = PerturbationLpNorm(eps=3/255) x = BoundedTensor(image, ptb) bounded_model.compute_bounds(x=(x,), method='CROWN-Optimized') if self.default_dtype == torch.float32: data_path = 'data/' elif self.default_dtype == torch.float64: data_path = 'data_64/' data_path += 'test_save_data' save_dict = bounded_model.save_intermediate( save_path=data_path if gen_ref else None) if gen_ref: torch.save(save_dict, data_path) return ref_dict = torch.load(data_path) ref_dict = _to( ref_dict, device=self.default_device, dtype=self.default_dtype) for node in ref_dict.keys(): assert torch.allclose(ref_dict[node][0], save_dict[node][0], atol=1e-5) assert torch.allclose(ref_dict[node][1], save_dict[node][1], atol=1e-5) if __name__ == '__main__': testcase = TestSave() testcase.test() ================================================ FILE: tests/test_simple_verification.py ================================================ """Test optimized bounds in simple_verification.""" import torch import torch.nn as nn import torchvision from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import PerturbationLpNorm from auto_LiRPA.utils import Flatten from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE # This simple model comes from https://github.com/locuslab/convex_adversarial def mnist_model(): model = nn.Sequential( nn.Conv2d(1, 16, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 4, stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(32*7*7,100), nn.ReLU(), nn.Linear(100, 10) ) return model class TestSimpleVerification(TestCase): def __init__(self, methodName='runTest', device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, device=device, dtype=dtype) def test(self): model = mnist_model() checkpoint = torch.load( '../examples/vision/pretrained/mnist_a_adv.pth', map_location=torch.device('cpu')) model.load_state_dict(checkpoint) model = model.to(device=self.default_device, dtype=self.default_dtype) test_data = torchvision.datasets.MNIST( './data', train=False, download=True, transform=torchvision.transforms.ToTensor()) N = 2 image = test_data.data[:N].view(N,1,28,28) image = image.to(device=self.default_device, dtype=self.default_dtype) / 255.0 lirpa_model = BoundedModule(model, torch.empty_like(image), device=self.default_device) ptb = PerturbationLpNorm(0.3) image = BoundedTensor(image, ptb) method = 'CROWN-Optimized (alpha-CROWN)' lirpa_model.set_bound_opts({'optimize_bound_args': {'iteration': 20, 'lr_alpha': 0.1}}) _, ub = lirpa_model.compute_bounds(x=(image,), method=method.split()[0]) self.assertEqual(ub[0][7], torch.tensor(12.5080)) if __name__ == '__main__': testcase = TestSimpleVerification() testcase.test() ================================================ FILE: tests/test_state_dict_name.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from auto_LiRPA import BoundedModule from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class FeatureExtraction(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 8, 4, stride=2, padding=1) self.conv2 = nn.Conv2d(8, 16, 4, stride=2, padding=1) self.fc1 = nn.Linear(784, 256) def forward(self, x): x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = x.view(-1, 784) x = F.relu(self.fc1(x)) return x class cnn_MNIST(nn.Module): def __init__(self): super().__init__() self.features = BoundedModule(FeatureExtraction(), torch.empty((1, 1, 28, 28))) self.fc = nn.Linear(256, 10) def forward(self, x): x = self.features(x) return self.fc(x) class TestStateDictName(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, device=device, dtype=dtype) def test(self): model = cnn_MNIST().to(device=self.default_device, dtype=self.default_dtype) state_dict = model.state_dict() dummy = torch.randn((1, 1, 28, 28)) ret1 = model(dummy) # create second model and load state_dict to test load_state_dict() whether works proper model = cnn_MNIST().to(device=self.default_device, dtype=self.default_dtype) model.load_state_dict(state_dict, strict=True) ret2 = model(dummy) self.assertEqual(ret1, ret2) if __name__ == '__main__': # Change to generate=True when genearting reference results testcase = TestStateDictName(generate=False) testcase.test() ================================================ FILE: tests/test_tensor_storage.py ================================================ import random import torch from complete_verifier.tensor_storage import StackTensorStorage, QueueTensorStorage from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class TestTensorStorage(TestCase): def __init__(self, methodName='runTest', device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, device=device, dtype=dtype) def test_content(self, seed=123): self.set_seed(seed) storage_classes_and_pop_behavior = [ ( StackTensorStorage, lambda tensor_list, num_pop: (tensor_list[-num_pop:], tensor_list[:-num_pop]) ), ( QueueTensorStorage, lambda tensor_list, num_pop: (tensor_list[:num_pop], tensor_list[num_pop:]) ) ] for storage_class, pop_behavior in storage_classes_and_pop_behavior: for concat_dim in [0, 1, 2]: # The call to `.size()` has side effects for `QueueTensorStorage`, because it will # cause a call to `.tensor()` which may change the internal storage. for check_size in [True, False]: stored_tensors = [] shape = [2,3,4] def make_random_tensor(): random_size = random.randint(1, 100) tensors = [] for _ in range(random_size): random_tensor = torch.randn( shape[:concat_dim] + shape[concat_dim+1:], device=self.default_device, dtype=self.default_dtype).unsqueeze(concat_dim) tensors.append(random_tensor) return torch.cat(tensors, dim=concat_dim), tensors s = storage_class(full_shape=shape, initial_size=16, switching_size=65536, concat_dim=concat_dim) for _ in range(1000): random_tensor, tensors = make_random_tensor() s.append(random_tensor) stored_tensors.extend(tensors) if check_size: assert s.size(concat_dim) == len(stored_tensors) num_pop = random.randint(1, 100) popped_tensors, stored_tensors = pop_behavior(stored_tensors, num_pop) popped_tensor = s.pop(num_pop) assert torch.allclose(popped_tensor, torch.cat(popped_tensors, dim=concat_dim)) if check_size: assert s.size(concat_dim) == len(stored_tensors) def test_tensor_call(self, seed=123): # The call to `.tensor()` has side effects for `QueueTensorStorage`, because it will # cause a call to `.size()` which may change the internal storage. self.set_seed(seed) pop_behavior = lambda tensor_list, num_pop: (tensor_list[:num_pop], tensor_list[num_pop:]) for concat_dim in [0, 1, 2]: stored_tensors = [] shape = [2,3,4] def make_random_tensor(): random_size = random.randint(1, 100) tensors = [] for _ in range(random_size): random_tensor = torch.randn(shape[:concat_dim] + shape[concat_dim+1:], dtype=self.default_dtype).unsqueeze(concat_dim) tensors.append(random_tensor) return torch.cat(tensors, dim=concat_dim), tensors s = QueueTensorStorage(full_shape=shape, initial_size=16, switching_size=16, concat_dim=concat_dim) for _ in range(1000): random_tensor, tensors = make_random_tensor() s.append(random_tensor) stored_tensors.extend(tensors) num_pop = random.randint(1, 10) _, stored_tensors = pop_behavior(stored_tensors, num_pop) _ = s.pop(num_pop) if s._usage_start + s.num_used > s._storage.size(concat_dim): storage_content = s.tensor() assert torch.allclose(storage_content, torch.cat(stored_tensors, dim=concat_dim)) def test_size_queue(self): for concat_dim in [0, 1, 2]: shape = [1,1,1] shape[concat_dim] = -1 # does no matter. zero_shape = shape.copy() zero_shape[concat_dim] = 0 def make_tensor(x): return torch.arange( 1, x+1, device=self.default_device, dtype=self.default_dtype).view(*shape) s = QueueTensorStorage(full_shape=shape, initial_size=16, switching_size=65536, concat_dim=concat_dim) s.append(make_tensor(1)) assert s.sum() == 1, s.tensor() s.append(make_tensor(3)) assert s.sum() == 1 + 6, s.tensor() s.append(make_tensor(5)) assert s.sum() == 1 + 6 + 15, s.tensor() t = s.pop(5) assert torch.allclose(t.squeeze(), torch.tensor( [1, 1, 2, 3, 1], device=self.default_device, dtype=self.default_dtype)) t = s.pop(0) assert t.shape == torch.Size(zero_shape) t = s.pop(-1) assert t.shape == torch.Size(zero_shape) s.append(make_tensor(100)) expected_sum = 1 + sum(range(1,4)) + sum(range(1,6)) - (1 + 1 + 2 + 3 + 1) + sum(range(1,101)) assert s.sum() == expected_sum, (s.sum(), expected_sum) t = s.pop(5) assert torch.allclose(t.squeeze(), torch.tensor( [2, 3, 4, 5, 1], device=self.default_device, dtype=self.default_dtype)), print(t) assert s.size(concat_dim) == 99, print(s.size()) assert s._storage.size(concat_dim) == 104, print(s._storage.size()) s.append(make_tensor(10)) assert s.size(concat_dim) == 109, print(s.size()) assert s._storage.size(concat_dim) == 208, print(s._storage.size()) s.append(make_tensor(32768)) assert s.size(concat_dim) == 32877, print(s.size()) assert s._storage.size(concat_dim) == 32877, print(s._storage.size()) s.pop(1) s.append(make_tensor(2)) assert s.size(concat_dim) == 32878, print(s.size()) assert s._storage.size(concat_dim) == 32877*2, print(s._storage.size()) s.append(make_tensor(32800)) s.append(make_tensor(100)) assert s._storage.size(concat_dim) == 32877*2+100*32, print(s._storage.size()) s.pop(100000) assert s._storage.size(concat_dim) == 32877*2+100*32, print(s._storage.size()) assert s.size(concat_dim) == 0, print(s.size()) t = s.pop(1) assert t.shape == torch.Size(zero_shape) t = s.pop(0) assert t.shape == torch.Size(zero_shape) t = s.pop(-1) assert t.shape == torch.Size(zero_shape) def test_size_stack(self): for concat_dim in [0, 1, 2]: shape = [1,1,1] shape[concat_dim] = -1 # does no matter. zero_shape = shape.copy() zero_shape[concat_dim] = 0 make_tensor = lambda x: torch.arange(1,x+1, dtype=self.default_dtype).view(*shape) s = StackTensorStorage(full_shape=shape, initial_size=16, switching_size=65536, concat_dim=concat_dim) s.append(make_tensor(1)) assert s.sum() == 1, print(s) s.append(make_tensor(3)) assert s.sum() == 1 + 6, print(s) s.append(make_tensor(5)) assert s.sum() == 1 + 6 + 15, print(s) t = s.pop(5) assert torch.allclose(t.squeeze(), torch.tensor( [1, 2, 3, 4, 5], device=self.default_device, dtype=self.default_dtype)), print(t) t = s.pop(0) assert t.shape == torch.Size(zero_shape) t = s.pop(-1) assert t.shape == torch.Size(zero_shape) s.append(make_tensor(100)) assert s.sum() == 1 + 6 + 50*101 t = s.pop(5) assert torch.allclose(t.squeeze(), torch.tensor( [96, 97, 98, 99, 100], device=self.default_device, dtype=self.default_dtype)), print(t) assert s.size(concat_dim) == 99, print(s.size()) assert s._storage.size(concat_dim) == 104, print(s._storage.size()) s.append(make_tensor(10)) assert s.size(concat_dim) == 109, print(s.size()) assert s._storage.size(concat_dim) == 208, print(s._storage.size()) s.append(make_tensor(32768)) assert s.size(concat_dim) == 32877, print(s.size()) assert s._storage.size(concat_dim) == 32877, print(s._storage.size()) s.pop(1) s.append(make_tensor(2)) assert s.size(concat_dim) == 32878, print(s.size()) assert s._storage.size(concat_dim) == 32877*2, print(s._storage.size()) s.append(make_tensor(32800)) s.append(make_tensor(100)) assert s._storage.size(concat_dim) == 32877*2+100*32, print(s._storage.size()) s.pop(100000) assert s._storage.size(concat_dim) == 32877*2+100*32, print(s._storage.size()) assert s.size(concat_dim) == 0, print(s.size()) t = s.pop(1) assert t.shape == torch.Size(zero_shape) t = s.pop(0) assert t.shape == torch.Size(zero_shape) t = s.pop(-1) assert t.shape == torch.Size(zero_shape) if __name__ == "__main__": testcase = TestTensorStorage() testcase.test_tensor_call() testcase.test_size_stack() testcase.test_size_queue() testcase.test_content() ================================================ FILE: tests/test_upsample.py ================================================ from collections import defaultdict from torch import nn from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class Model(nn.Module): def __init__(self, input_dim=5, image_size=4, scale_factor=2, conv_kernel_size=3, stride=1, padding=1, conv_in_channels=16, conv_out_channels=4): super(Model, self).__init__() self.conv_in_channels = conv_in_channels self.input_dim = input_dim self.image_size = image_size self.fc1 = nn.Linear(input_dim, conv_in_channels * image_size * image_size) self.upsample = nn.Upsample(scale_factor=(scale_factor, scale_factor), mode='nearest') # H = W = 4 * scale_factor now self.conv1 = nn.Conv2d(in_channels=conv_in_channels, out_channels=conv_out_channels, kernel_size=(conv_kernel_size, conv_kernel_size), stride=(stride, stride), padding=padding) # H = W = (4 * scale + 2 * pad - ker + s) // s size_after_conv = (4 * scale_factor + 2 * padding - conv_kernel_size + stride) // stride assert size_after_conv > 0, "0 size after convolution, please use more padding, more scale_factor," \ "smaller kernel, or smaller stride" self.relu = nn.ReLU() self.flatten = nn.Flatten() self.fc2 = nn.Linear(size_after_conv * size_after_conv * conv_out_channels, 1) # self.sigmoid = nn.Sigmoid() def forward(self, input_z): f1 = self.fc1(input_z) d1 = f1.reshape(-1, self.conv_in_channels, self.image_size, self.image_size) d2 = self.upsample(d1) d3 = self.conv1(d2) d4 = self.relu(d3) f2 = self.flatten(d4) f3 = self.fc2(f2) # out = self.sigmoid(f3) return f3 class ModelReducedCGAN(nn.Module): def __init__(self): """ The network has the same architecture with merged bn CGAN upsampling one except reduced channel nums """ super(ModelReducedCGAN, self).__init__() self.fc1 = nn.Linear(5, 32) self.up1 = nn.Upsample(scale_factor=2, mode='nearest') self.conv1 = nn.Conv2d(in_channels=2, out_channels=2, kernel_size=3, stride=1, padding=1) self.relu1 = nn.ReLU() self.up2 = nn.Upsample(scale_factor=2, mode='nearest') self.conv2 = nn.Conv2d(in_channels=2, out_channels=3, kernel_size=3, stride=1, padding=1) self.relu2 = nn.ReLU() self.up3 = nn.Upsample(scale_factor=2, mode='nearest') self.conv3 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=1, padding=1) self.relu3 = nn.ReLU() self.conv4 = nn.Conv2d(in_channels=4, out_channels=2, kernel_size=3, stride=1, padding=1) self.conv5 = nn.Conv2d(in_channels=2, out_channels=3, kernel_size=3, stride=2, padding=1) self.relu4 = nn.ReLU() self.conv6 = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, stride=2, padding=1) self.relu5 = nn.ReLU() self.conv7 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=2, padding=1) self.relu6 = nn.ReLU() self.conv8 = nn.Conv2d(in_channels=4, out_channels=4, kernel_size=3, stride=2, padding=1) self.relu7 = nn.ReLU() self.fc2 = nn.Linear(4 * 2 * 2, 1) self.sigmoid = nn.Sigmoid() def forward(self, input_z): f1 = self.fc1(input_z) f2 = f1.reshape(-1, 2, 4, 4) f3 = self.up1(f2) f4 = self.conv1(f3) f5 = self.relu1(f4) f6 = self.up2(f5) f7 = self.conv2(f6) f8 = self.relu2(f7) f9 = self.up3(f8) f10 = self.conv3(f9) f11 = self.relu3(f10) f12 = self.conv4(f11) f13 = self.conv5(f12) f14 = self.relu4(f13) f15 = self.conv6(f14) f16 = self.relu5(f15) f17 = self.conv7(f16) f18 = self.relu6(f17) f19 = self.conv8(f18) f20 = self.relu7(f19) f21 = f20.reshape(f20.shape[0], -1) f22 = self.fc2(f21) # f23 = self.sigmoid(f22) return f22 def recursive_allclose(a, b: dict, verbose=False, prefix=''): """ Recursively check whether every corresponding tensors in two dicts are close :param a: dict a :param b: dict b :param prefix: reserved for path tracking in recursive calling for error printing :return: bool: all_close or not """ tot_tensor = 0 tot_dict = 0 for k in a: if isinstance(a[k], torch.Tensor): if k == 'unstable_idx': continue if verbose: print(f'recursive_allclose(): Checking {prefix}{k}') assert k in b and isinstance(b[k], torch.Tensor) or isinstance(b[k], Patches), f'recursive_allclose(): Tensor not found in path {prefix}{k}' if isinstance(b[k], torch.Tensor): assert torch.allclose(a[k].reshape(-1), b[k].reshape(-1), 1e-4, 1e-5), f'recursive_allclose(): Inconsistency found in path {prefix}{k}' tot_tensor += 1 elif isinstance(a[k], dict): assert k in b and isinstance(b[k], dict), f'recursive_allclose(): dict not found in path {prefix}{k}' recursive_allclose(a[k], b[k], verbose, prefix + k) tot_dict += 1 tot_b_tensor = sum([1 if isinstance(v, torch.Tensor) or isinstance(v, Patches) and k != 'unstable_idx' else 0 for k, v in b.items()]) tot_b_dict = sum([1 if isinstance(v, dict) else 0 for v in b.values()]) assert tot_tensor == tot_b_tensor, f'recursive_allclose(): Extra tensors found in path {prefix}' assert tot_dict == tot_b_dict, f'recursive_allclose(): Extra recursive paths found in path {prefix}' return True class TestUpSample(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name=None, generate=generate, device=device, dtype=dtype) # self.device = device def test(self, seed=123): for kernel_size in [3,5]: for scaling_factor in [2,3,4]: for stride in [1,2]: for padding in [1]: self.test_instance(kernel_size, scaling_factor, stride, padding, seed=seed) def test_instance(self, kernel_size=3, scaling_factor=2, stride=1, padding=1, seed=123): self.set_seed(seed) print(f'kernel_size = {kernel_size}, scaling_factor = {scaling_factor}, stride = {stride}, padding = {padding}') random_input = torch.randn( (1, 5), device=self.default_device, dtype=self.default_dtype) * 1000. eps = 0.3 model_ori = Model(scale_factor=scaling_factor, conv_kernel_size=kernel_size, stride=stride, padding=padding).to(device=self.default_device, dtype=self.default_dtype) ptb = PerturbationLpNorm(norm=np.inf, eps=eps) z1_clean = random_input.detach().clone().requires_grad_(requires_grad=True) z1 = BoundedTensor(random_input, ptb) model_mat = BoundedModule(model_ori, (random_input,), device=self.default_device, bound_opts={"conv_mode": "matrix"}) pred_of_mat = model_mat(z1) lb_m, ub_m, A_m = model_mat.compute_bounds(return_A=True, needed_A_dict={model_mat.output_name[0]: model_mat.input_name[0]}, ) model_pat = BoundedModule(model_ori, (random_input,), device=self.default_device, bound_opts={"conv_mode": "patches"}) pred_of_patch = model_pat(z1) lb_p, ub_p, A_p = model_pat.compute_bounds(return_A=True, needed_A_dict={ model_pat.output_name[0]: model_pat.input_name[0]}, ) assert torch.allclose(pred_of_mat, pred_of_patch, 1e-5) assert torch.allclose(lb_m, lb_p, 1e-5) assert torch.allclose(ub_m, ub_p, 1e-5) assert recursive_allclose(A_m, A_p, verbose=True) class TestReducedCGAN(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1, ref_name=None, generate=generate, device=device, dtype=dtype) # self.device = device def test(self, seed=456): self.set_seed(seed) input = torch.tensor([[0.583, -0.97, -0.97, 0.598, 0.737]]) eps = 0.1 model_ori = ModelReducedCGAN().to( device=self.default_device, dtype=self.default_dtype) ptb = PerturbationLpNorm(norm=np.inf, eps=eps) z1_clean = input.detach().clone().requires_grad_(requires_grad=True) z1 = BoundedTensor(input, ptb) model_mat = BoundedModule(model_ori, (input,), device=self.default_device, bound_opts={"conv_mode": "matrix"}) pred_of_mat = model_mat(z1) needed_A_dict = defaultdict(set) for node in model_mat.nodes(): needed_A_dict[node.name] = set() lb_m, ub_m, A_m = model_mat.compute_bounds((z1,), return_A=True, needed_A_dict=needed_A_dict, method='crown') model_pat = BoundedModule(model_ori, (input,), device=self.default_device, bound_opts={"conv_mode": "patches", "sparse_features_alpha": False}) pred_of_patch = model_pat(z1) lb_p, ub_p, A_p = model_pat.compute_bounds((z1,), return_A=True, needed_A_dict=needed_A_dict, method='crown') # print(pred_of_mat, pred_of_patch) assert torch.allclose(pred_of_mat, pred_of_patch, 1e-5) assert torch.allclose(lb_m, lb_p, 1e-5) assert torch.allclose(ub_m, ub_p, 1e-5) assert recursive_allclose(A_m, A_p, verbose=True) if __name__ == '__main__': # should use device = 'cpu' for GitHub CI testcase = TestUpSample(generate=False) testcase.test(seed=123) # """ # following test is much stronger, but runs within 30s only on GPUs # so commented it out for CI testing now # required GPU memory: 1.5 GiB # """ testhardcase = TestReducedCGAN(generate=False) testhardcase.test(seed=456) ================================================ FILE: tests/test_vision_models.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from auto_LiRPA import BoundedModule, BoundedTensor from auto_LiRPA.perturbations import * from testcase import _to, TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE class cnn_4layer_test(nn.Module): def __init__(self): super(cnn_4layer_test, self).__init__() self.conv1 = nn.Conv2d(3, 3, 4, stride=2, padding=1) self.bn = nn.BatchNorm2d(3) self.shortcut = nn.Conv2d(3, 3, 4, stride=2, padding=1) self.conv2 = nn.Conv2d(3, 3, 4, stride=2, padding=1) self.fc1 = nn.Linear(192, 10) def forward(self, x): x_ = x x = F.relu(self.conv1(self.bn(x))) x += self.shortcut(x_) x = F.relu(self.conv2(x)) x = x.view(x.size(0), -1) x = self.fc1(x) return x class TestVisionModels(TestCase): def __init__(self, methodName='runTest', ref_name='vision_test_data', model=cnn_4layer_test(), generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, seed=1234, ref_name=ref_name, generate=generate, device=device, dtype=dtype) self.result = {} self.model = model.to(device=self.default_device, dtype=self.default_dtype) def setUp(self): super().setUp() if self.reference: self.reference = _to(self.reference, self.default_device) self.reference = _to(self.reference, self.default_device) if self.generate: # state_dict from an existing reference is needed self.reference = torch.load(self.ref_path) def verify_bounds(self, model, x, IBP, method, forward_ret, lb_name, ub_name): lb, ub = model(method_opt="compute_bounds", x=(x,), IBP=IBP, method=method) self.result[lb_name] = lb self.result[ub_name] = ub if method != 'CROWN-Optimized': # test gradient backward propagation # only when method is not "CROWN-Optimized" (in that case, lb and ub don't have gradient) loss = (ub - lb).abs().sum() loss.backward() grad = x.grad self.result[lb_name[:-2] + 'grad'] = grad.clone() if not self.generate: if method != 'CROWN-Optimized': assert torch.allclose(lb, self.reference[lb_name], 1e-4, atol=2e-7), (lb - self.reference[lb_name]).abs().max() assert torch.allclose(ub, self.reference[ub_name], 1e-4, atol=2e-7), (ub - self.reference[ub_name]).abs().max() assert ((lb - self.reference[lb_name]).pow(2).sum() < 1.3e-9), (lb - self.reference[lb_name]).pow(2).sum() assert ((ub - self.reference[ub_name]).pow(2).sum() < 1.3e-9), (ub - self.reference[ub_name]).pow(2).sum() if "same-slope" not in lb_name: assert torch.allclose(grad, self.reference[lb_name[:-2] + 'grad'], 1e-4, 1e-6), (grad - self.reference[lb_name[:-2] + 'grad']).abs().max() assert (grad - self.reference[lb_name[:-2] + 'grad']).pow(2).sum() < 1.e-6, (grad - self.reference[lb_name[:-2] + 'grad']).pow(2).sum() else: assert torch.allclose(lb, self.reference[lb_name], 1e-4, atol=5e-6), (lb - self.reference[lb_name]).abs().max() assert torch.allclose(ub, self.reference[ub_name], 1e-4, atol=5e-6), (ub - self.reference[ub_name]).abs().max() assert ((lb - self.reference[lb_name]).pow(2).sum() < 1.3e-9), (lb - self.reference[lb_name]).pow(2).sum() assert ((ub - self.reference[ub_name]).pow(2).sum() < 1.3e-9), (ub - self.reference[ub_name]).pow(2).sum() def test_bounds(self, bound_opts=None, optimize = True): if bound_opts is None: bound_opts = {'activation_bound_option': 'same-slope'} np.random.seed(123) # FIXME inconsistent seeds model_ori = self.model.eval() model_ori.load_state_dict(self.reference['model']) dummy_input = self.reference['data'].to(dtype=self.default_dtype, device=self.default_device) inputs = (dummy_input,) model = BoundedModule(model_ori, inputs, device=self.default_device) model.set_bound_opts({'optimize_bound_args': {'lr_alpha': 0.1}}) forward_ret = model(dummy_input) model_ori.eval() assert torch.allclose(model_ori(dummy_input), model(dummy_input), 1e-4, 1e-6) model_same_slope = BoundedModule(model_ori, inputs, device=self.default_device, bound_opts=bound_opts) model_same_slope.set_bound_opts({'optimize_bound_args': {'lr_alpha': 0.1}}) # Linf ptb = PerturbationLpNorm(norm=np.inf, eps=0.01) x = BoundedTensor(dummy_input, ptb) x.requires_grad_() self.verify_bounds(model, x, IBP=True, method=None, forward_ret=forward_ret, lb_name='l_inf_IBP_lb', ub_name='l_inf_IBP_ub') # IBP self.verify_bounds(model, x, IBP=True, method='backward', forward_ret=forward_ret, lb_name='l_inf_CROWN-IBP_lb', ub_name='l_inf_CROWN-IBP_ub') # CROWN-IBP self.verify_bounds(model, x, IBP=False, method='backward', forward_ret=forward_ret, lb_name='l_inf_CROWN_lb', ub_name='l_inf_CROWN_ub') # CROWN self.verify_bounds(model_same_slope, x, IBP=False, method='backward', forward_ret=forward_ret, lb_name='l_inf_CROWN-same-slope_lb', ub_name='l_inf_CROWN-same-slope_ub') # CROWN-same-slope if optimize: self.verify_bounds(model, x, IBP=False, method='CROWN-Optimized', forward_ret=forward_ret, lb_name='l_inf_CROWN-Optimized_lb', ub_name='l_inf_CROWN-Optimized_ub') # CROWN-Optimized self.verify_bounds(model_same_slope, x, IBP=False, method='CROWN-Optimized', forward_ret=forward_ret, lb_name='l_inf_CROWN-Optimized-same-slope_lb', ub_name='l_inf_CROWN-Optimized-same-slope_ub') # Crown-Optimized-same-slope # L2 ptb = PerturbationLpNorm(norm=2, eps=0.01) x = BoundedTensor(dummy_input, ptb) x.requires_grad_() self.verify_bounds(model, x, IBP=True, method=None, forward_ret=forward_ret, lb_name='l_2_IBP_lb', ub_name='l_2_IBP_ub') # IBP self.verify_bounds(model, x, IBP=True, method='backward', forward_ret=forward_ret, lb_name='l_2_CROWN-IBP_lb', ub_name='l_2_CROWN-IBP_ub') # CROWN-IBP self.verify_bounds(model, x, IBP=False, method='backward', forward_ret=forward_ret, lb_name='l_2_CROWN_lb', ub_name='l_2_CROWN_ub') # CROWN self.verify_bounds(model_same_slope, x, IBP=False, method='backward', forward_ret=forward_ret, lb_name='l_2_CROWN-same-slope_lb', ub_name='l_2_CROWN-same-slope_ub') # CROWN-same-slope if optimize: self.verify_bounds(model, x, IBP=False, method='CROWN-Optimized', forward_ret=forward_ret, lb_name='l_2_CROWN-Optimized_lb', ub_name='l_2_CROWN-Optimized_ub') # CROWN-Optimized self.verify_bounds(model_same_slope, x, IBP=False, method='CROWN-Optimized', forward_ret=forward_ret, lb_name='l_2_CROWN-Optimized-same-slope_lb', ub_name='l_2_CROWN-Optimized-same-slope_ub') # Crown-Optimized-same-slope if self.generate: self.result['data'] = self.reference['data'] self.result['model'] = self.reference['model'] self.save() if __name__ =="__main__": t = TestVisionModels(generate=False) # t = TestVisionModels() t.setUp() t.test_bounds() ================================================ FILE: tests/test_vision_models_hardtanh.py ================================================ import torch.nn as nn import torch.nn.functional as F from auto_LiRPA.perturbations import * from test_vision_models import TestVisionModels from testcase import DEFAULT_DEVICE, DEFAULT_DTYPE class cnn_4layer_test_hardtanh(nn.Module): def __init__(self, in_ch, in_dim, width=2, linear_size=256): super(cnn_4layer_test_hardtanh, self).__init__() self.conv1 = nn.Conv2d(in_ch, 4 * width, 4, stride=2, padding=1) self.conv2 = nn.Conv2d(4 * width, 8 * width, 4, stride=2, padding=1) self.fc1 = nn.Linear(8 * width * (in_dim // 4) * (in_dim // 4), linear_size) self.fc2 = nn.Linear(linear_size, 10) def forward(self, x): x = F.hardtanh(self.conv1(x)) x = F.hardtanh(self.conv2(x)) x = torch.flatten(x, 1) x = F.hardtanh(self.fc1(x)) x = self.fc2(x) return x class TestCustomVisionModel(TestVisionModels): def __init__(self, methodName='runTest', model=cnn_4layer_test_hardtanh(in_ch=1, in_dim=28), generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName, 'vision_clip_test_data', model, generate, device=device, dtype=dtype) def test_bounds(self, bound_opts=None, optimize=False): if bound_opts is None: bound_opts = {'hardtanh': 'same-slope'} super().test_bounds(bound_opts=bound_opts, optimize=optimize) if __name__ == "__main__": t = TestCustomVisionModel() t.setUp() t.test_bounds() ================================================ FILE: tests/test_weight_perturbation.py ================================================ import copy import subprocess import numpy as np from testcase import TestCase, DEFAULT_DEVICE, DEFAULT_DTYPE import sys sys.path.append('../examples/vision') import models from auto_LiRPA import BoundedModule from auto_LiRPA.perturbations import * class TestWeightPerturbation(TestCase): def __init__(self, methodName='runTest', generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__( methodName, seed=1234, ref_name='weight_perturbation_test_data', generate=generate, device=device, dtype=dtype) self.result = {} def test_training(self): # python weight_perturbation_training.py --device cpu --scheduler_opts start=1,length=100 --num_epochs 1 --truncate_data 5 ret = subprocess.run( ['python', 'weight_perturbation_training.py', '--device', 'cpu', '--scheduler_opts', 'start=1,length=100', '--num_epochs', '1', '--truncate_data', '5'], cwd='../examples/vision', capture_output=True) self.assertEqual(ret.returncode, 0, ret.stderr) res_test = ret.stdout.decode().split('\n')[-2].split(' ') assert abs(float(res_test[-3].split('=')[1]) - 2.246) < 0.01 def verify_bounds(self, model, x, IBP, method, forward_ret, lb_name, ub_name): lb, ub = model(method_opt="compute_bounds", x=(x,), IBP=IBP, method=method) self.result[lb_name] = lb.detach().data.clone() self.result[ub_name] = ub.detach().data.clone() # test gradient backward propagation loss = (ub - lb).abs().sum() loss.backward() # gradient w.r.t input only grad = x.grad self.result[lb_name+'_grad'] = grad.detach().data.clone() if not self.generate: assert torch.allclose(self.reference[lb_name], self.result[lb_name], 1e-4, 1e-6) assert torch.allclose(self.reference[ub_name], self.result[ub_name], 1e-4, 1e-6) assert ((self.reference[lb_name] - self.result[lb_name]).pow(2).sum() < 1e-8) assert ((self.reference[ub_name] - self.result[ub_name]).pow(2).sum() < 1e-8) assert torch.allclose(self.reference[lb_name+'_grad'], self.result[lb_name + '_grad'], 1e-4, 1e-6) assert ((self.reference[lb_name + '_grad'] - self.result[lb_name + '_grad']).pow(2).sum() < 1e-8) def test_perturbation(self): np.random.seed(123) # FIXME This seed is inconsistent with other seeds (1234) model_ori = models.Models['mlp_3layer_weight_perturb'](pert_weight=True, pert_bias=True).eval() self.result['model'] = model_ori.state_dict() self.result['data'] = torch.randn(8, 1, 28, 28) model_ori.load_state_dict(self.result['model']) state_dict = copy.deepcopy(model_ori.state_dict()) dummy_input = self.result['data'].requires_grad_() inputs = (dummy_input,) model = BoundedModule(model_ori, inputs, bound_opts={ 'sparse_intermediate_bounds': False, 'sparse_conv_intermediate_bounds': False, 'sparse_intermediate_bounds_with_ibp': False}, device=self.default_device) forward_ret = model(dummy_input) model_ori.eval() assert torch.isclose(model_ori(dummy_input), model_ori(dummy_input), 1e-8).all() def verify_model(pert_weight=True, pert_bias=True, norm=np.inf, lb_name='', ub_name=''): model_ori_ = models.Models['mlp_3layer_weight_perturb'](pert_weight=pert_weight, pert_bias=pert_bias, norm=norm).eval() model_ori_.load_state_dict(state_dict) model_ = BoundedModule(model_ori_, inputs, bound_opts={ 'sparse_intermediate_bounds': False, 'sparse_conv_intermediate_bounds': False, 'sparse_intermediate_bounds_with_ibp': False}) model_.ptb = model_ori.ptb self.verify_bounds(model_, dummy_input, IBP=True, method='backward', forward_ret=forward_ret, lb_name=lb_name + '_CROWN-IBP', ub_name=ub_name + '_CROWN-IBP') # CROWN-IBP self.verify_bounds(model_, dummy_input, IBP=False, method='backward', forward_ret=forward_ret, lb_name=lb_name + '_CROWN', ub_name=ub_name + '_CROWN') # CROWN # Linf verify_model(pert_weight=True, pert_bias=True, norm=np.inf, lb_name='l_inf_weights_bias_lb', ub_name='l_inf_weights_bias_ub') verify_model(pert_weight=True, pert_bias=False, norm=np.inf, lb_name='l_inf_weights_lb', ub_name='l_inf_weights_ub') verify_model(pert_weight=False, pert_bias=True, norm=np.inf, lb_name='l_inf_bias_lb', ub_name='l_inf_bias_ub') # L2 verify_model(pert_weight=True, pert_bias=True, norm=2, lb_name='l_2_weights_bias_lb', ub_name='l_2_weights_bias_ub') verify_model(pert_weight=True, pert_bias=False, norm=2, lb_name='l_2_weights_lb', ub_name='l_2_weights_ub') verify_model(pert_weight=False, pert_bias=True, norm=2, lb_name='l_2_bias_lb', ub_name='l_2_bias_ub') if self.generate: self.save() if __name__ == '__main__': testcase = TestWeightPerturbation(generate=False) testcase.setUp() testcase.reference = testcase._to(testcase.reference, testcase.default_device) testcase.reference = testcase._to(testcase.reference, testcase.default_dtype) testcase.test_perturbation() testcase.test_training() ================================================ FILE: tests/testcase.py ================================================ import unittest import random import torch import numpy as np DEFAULT_DEVICE = 'cpu' DEFAULT_DTYPE = torch.float32 class TestCase(unittest.TestCase): """Superclass for unit test cases in auto_LiRPA.""" def __init__(self, methodName='runTest', seed=1, ref_name=None, generate=False, device=DEFAULT_DEVICE, dtype=DEFAULT_DTYPE): super().__init__(methodName) self.addTypeEqualityFunc(np.ndarray, '_assert_array_equal') self.addTypeEqualityFunc(torch.Tensor, '_assert_tensor_equal') self.rtol = 1e-5 self.atol = 1e-6 self.default_dtype = dtype self.default_device = device set_default_dtype_device(dtype, device) self.set_seed(seed) data_path = 'data_64/' if dtype == torch.float64 else 'data/' self.ref_path = data_path + ref_name if ref_name else None self.generate = generate self.setUp() def set_seed(self, seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) random.seed(seed) np.random.seed(seed) def setUp(self): """Load the reference result if it exists.""" if self.generate: self.reference = None else: self.reference = torch.load(self.ref_path, weights_only=False) if self.ref_path else None def save(self): """Save result for future comparison.""" print('Saving result to', self.ref_path) torch.save(self.result, self.ref_path) def check(self): """Save or check the results. This function can be called at the end of each test. If `self.generate == True`, save results for future comparison; otherwise, compare the current results `self.result` with the loaded reference `self.reference`. Results are expected to be a list or tuple of `torch.Tensor` instances. """ if self.generate: self.save() else: self.result = _to( self.result, device=self.default_device, dtype=self.default_dtype) self.reference = _to( self.reference, device=self.default_device, dtype=self.default_dtype) self._assert_equal(self.result, self.reference) def _assert_equal(self, a, b): assert type(a) == type(b) if isinstance(a, (list, tuple)): for a_, b_ in zip(a, b): self._assert_equal(a_, b_) else: self.assertEqual(a, b) def _assert_array_equal(self, a, b, msg=None): if not a.shape == b.shape: if msg is None: msg = f"Shapes are not equal: {a.shape} {b.shape}" raise self.failureException(msg) if not np.allclose(a, b, rtol=self.rtol, atol=self.atol): if msg is None: msg = f"Arrays are not equal:\n{a}\n{b}, max diff: {np.max(np.abs(a - b))}" raise self.failureException(msg) def _assert_tensor_equal(self, a, b, msg=None): if not a.shape == b.shape: if msg is None: msg = f"Shapes are not equal: {a.shape} {b.shape}" raise self.failureException(msg) if not torch.allclose(a, b, rtol=self.rtol, atol=self.atol): if msg is None: msg = f"Tensors are not equal:\n{a}\n{b}, max diff: {torch.max(torch.abs(a - b))}" raise self.failureException(msg) def _to(obj, device=None, dtype=None, inplace=False): """ Move all tensors in the object to a specified dest (device or dtype). The inplace=True option is available for dict.""" if obj is None: return obj elif isinstance(obj, torch.Tensor): return obj.to(device=device if device is not None else obj.device, dtype=dtype if dtype is not None else obj.dtype) elif isinstance(obj, tuple): return tuple([_to(item, device=device, dtype=dtype) for item in obj]) elif isinstance(obj, list): return [_to(item, device=device, dtype=dtype) for item in obj] elif isinstance(obj, dict): if inplace: for k, v in obj.items(): obj[k] = _to(v, device=device, dtype=dtype, inplace=True) return obj else: return {k: _to(v, device=device, dtype=dtype) for k, v in obj.items()} else: raise NotImplementedError(f"Unsupported type: {type(obj)}") def set_default_dtype_device(dtype=DEFAULT_DTYPE, device=DEFAULT_DEVICE): """Utility function to set default dtype and device.""" torch.set_default_dtype(dtype) torch.set_default_device(torch.device(device)) __all__ = ['TestCase', 'DEFAULT_DEVICE', 'DEFAULT_DTYPE', '_to', 'set_default_dtype_device']