Repository: a1600012888/YOPO-You-Only-Propagate-Once Branch: master Commit: b8ae668be829 Files: 74 Total size: 156.5 KB Directory structure: gitextract_trzjd1fv/ ├── .gitignore ├── README.md ├── experiments/ │ ├── CIFAR10/ │ │ ├── pre-res18.pgd10/ │ │ │ ├── config.py │ │ │ ├── dataset.py │ │ │ ├── eval.py │ │ │ ├── network.py │ │ │ └── train.py │ │ ├── pre-res18.yopo-5-3/ │ │ │ ├── config.py │ │ │ ├── dataset.py │ │ │ ├── eval.py │ │ │ ├── loss.py │ │ │ ├── network.py │ │ │ ├── train.py │ │ │ └── training_function.py │ │ ├── wide34.natural/ │ │ │ ├── config.py │ │ │ ├── dataset.py │ │ │ ├── eval.py │ │ │ ├── network.py │ │ │ └── train.py │ │ ├── wide34.pgd10/ │ │ │ ├── config.py │ │ │ ├── dataset.py │ │ │ ├── eval.py │ │ │ ├── network.py │ │ │ └── train.py │ │ └── wide34.yopo-5-3/ │ │ ├── config.py │ │ ├── dataset.py │ │ ├── eval.py │ │ ├── loss.py │ │ ├── network.py │ │ ├── train.py │ │ └── training_function.py │ ├── CIFAR10-TRADES/ │ │ ├── baseline.res-pre18.TRADES.10step/ │ │ │ ├── config.py │ │ │ ├── network.py │ │ │ ├── trades.py │ │ │ └── train_trades_cifar10.py │ │ ├── pre-res18.TRADES-YOPO-2-5/ │ │ │ ├── config.py │ │ │ ├── dataset.py │ │ │ ├── loss.py │ │ │ ├── network.py │ │ │ ├── train.py │ │ │ └── training_function.py │ │ └── pre-res18.TRADES-YOPO-3-4/ │ │ ├── config.py │ │ ├── dataset.py │ │ ├── loss.py │ │ ├── network.py │ │ ├── train.py │ │ └── training_function.py │ └── MNIST/ │ ├── YOPO-5-10/ │ │ ├── config.py │ │ ├── dataset.py │ │ ├── eval.py │ │ ├── loss.py │ │ ├── network.py │ │ ├── train.py │ │ └── training_function.py │ └── pgd40/ │ ├── config.py │ ├── dataset.py │ ├── eval.py │ ├── network.py │ └── train.py └── lib/ ├── __init__.py ├── attack/ │ ├── __init__.py │ ├── attack_base.py │ └── pgd.py ├── base_model/ │ ├── __init__.py │ ├── cifar_resnet18.py │ ├── network.py │ ├── preact_resnet.py │ ├── small_cnn.py │ └── wide_resnet.py ├── training/ │ ├── __init__.py │ ├── config.py │ └── train.py └── utils/ ├── __init__.py └── misc.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.checkpoint* **data/ *__pycache__* *mig* **ttt*/ **/*mig*/ **log/** **/log/** *events* *.txt *.idea/ ================================================ FILE: README.md ================================================ # YOPO (You Only Propagate Once: Accelerating Adversarial Training via Maximal Principle) Code for our [paper](https://arxiv.org/abs/1905.00877): "You Only Propagate Once: Accelerating Adversarial Training via Maximal Principle" by [Dinghuai Zhang](https://zdhnarsil.github.io), [Tianyuan Zhang](http://tianyuanzhang.com), [Yiping Lu](https://web.stanford.edu/~yplu/), [Zhanxing Zhu](https://sites.google.com/view/zhanxingzhu/), [Bin Dong](http://bicmr.pku.edu.cn/~dongbin/). Our paper has been accepted by **NeurIPS2019**. ![The Pipeline of YOPO](/images/pipeline.jpg) ## Prerequisites * Pytorch==1.0.1, torchvision * Python 3.5 * tensorboardX * easydict * tqdm ## Intall ```bash git clone https://github.com/a1600012888/YOPO-You-Only-Propagate-Once.git cd YOPO-You-Only-Propagate-Once pip3 install -r requirements.txt --user ``` ## How to run our code ### Natural training and PGD training * normal training: `experiments/CIFAR10/wide34.natural` * PGD adversarial training: `experiments/CIFAR10/wide34.pgd10` run `python train.py -d ` You can change all the hyper-parameters in `config.py`. And change network in `network.py` Actually code in above mentioned director is very **flexible** and can be easiliy modified. It can be used as a **template**. ### YOPO training Go to directory `experiments/CIFAR10/wide34.yopo-5-3` run `python train.py -d ` You can change all the hyper-parameters in `config.py`. And change network in `network.py` Runing this code for the first time will dowload the dataset in `./experiments/CIFAR10/data/`, you can modify the path in `dataset.py` ## Miscellaneous A C++ implementation by [Nitin Shyamkumar](https://scholar.google.com/citations?user=lF0ZyBQAAAAJ&hl=en) is provided [here](https://github.com/nitinshyamk/yopo-inference)! Thank you Nitin for your work! The mainbody of `experiments/CIFAR10-TRADES/baseline.res-pre18.TRADES.10step` is written according to [TRADES official repo](https://github.com/yaodongyu/TRADES) A tensorflow implementation provided by [Runtian Zhai](http://www.runtianz.cn/) is provided [here](https://colab.research.google.com/drive/1hglbkT4Tzf8BOkvX185jFmAND9M67zoZ#scrollTo=OMyffsWl1b4y). The implemetation of the ["For Free"](https://arxiv.org/abs/1904.12843) paper is also included. It turns out that our YOPO is faster than "For Free" (detailed results will come soon). Thanks for Runtian's help! ## Cite ``` @article{zhang2019you, title={You Only Propagate Once: Accelerating Adversarial Training via Maximal Principle}, author={Zhang, Dinghuai and Zhang, Tianyuan and Lu, Yiping and Zhu, Zhanxing and Dong, Bin}, journal={arXiv preprint arXiv:1905.00877}, year={2019} } ``` ================================================ FILE: experiments/CIFAR10/pre-res18.pgd10/config.py ================================================ from easydict import EasyDict import sys import os import argparse import numpy as np import torch def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 105 val_interval = 5 create_optimizer = SGDOptimizerMaker(lr =5e-2, momentum = 0.9, weight_decay = 5e-4) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [75, 90, 100], gamma = 0.1) create_loss_function = torch.nn.CrossEntropyLoss create_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 10, norm = np.inf, mean = torch.tensor(np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std = torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 20, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() # About data # C.inp_chn = 1 # C.num_class = 10 parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/CIFAR10/pre-res18.pgd10/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform_train) #trainset = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) testset = torchvision.datasets.CIFAR10(root=root, train=False, download=True, transform=transform_test) #testset = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/CIFAR10/pre-res18.pgd10/eval.py ================================================ from config import config from dataset import create_test_dataset from network import create_network from training.train import eval_one_epoch from utils.misc import load_checkpoint import argparse import torch import numpy as np import os parser = argparse.ArgumentParser() parser.add_argument('--resume', '--resume', default='log/models/last.checkpoint', type=str, metavar='PATH', help='path to latest checkpoint (default:log/last.checkpoint)') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') args = parser.parse_args() DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) ds_val = create_test_dataset(512) AttackMethod = config.create_evaluation_attack_method(DEVICE) if os.path.isfile(args.resume): load_checkpoint(args.resume, net) print('Evaluating') clean_acc, adv_acc = eval_one_epoch(net, ds_val, DEVICE, AttackMethod) print('clean acc -- {} adv acc -- {}'.format(clean_acc, adv_acc)) ================================================ FILE: experiments/CIFAR10/pre-res18.pgd10/network.py ================================================ '''Pre-activation ResNet in PyTorch. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Identity Mappings in Deep Residual Networks. arXiv:1603.05027 ''' import torch import torch.nn as nn import torch.nn.functional as F class PreActBlock(nn.Module): '''Pre-activation version of the BasicBlock.''' expansion = 1 def __init__(self, in_planes, planes, stride=1): super(PreActBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class PreActResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(PreActResNet, self).__init__() self.in_planes = 64 self.other_layers = nn.ModuleList() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.layer_one = self.conv1 self.other_layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.other_layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.other_layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.other_layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = GlobalpoolFC(512 * block.expansion, num_classes) self.other_layers.append(self.linear) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.other_layers.append(layers[-1]) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): x = self.layer_one(x) self.layer_one_out = x self.layer_one_out.requires_grad_() self.layer_one_out.retain_grad() x = self.layer_one_out for layer in self.other_layers: x = layer(x) ''' out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) return out ''' return x class GlobalpoolFC(nn.Module): def __init__(self, num_in, num_class): super(GlobalpoolFC, self).__init__() self.pool = nn.AdaptiveAvgPool2d(output_size=1) self.fc = nn.Linear(num_in, num_class) def forward(self, x): y = self.pool(x) y = y.reshape(y.shape[0], -1) y = self.fc(y) return y def PreActResNet18(): return PreActResNet(PreActBlock, [2, 2, 2, 2]) def PreActResNet34(): return PreActResNet(PreActBlock, [3, 4, 6, 3]) class PreActBottleneck(nn.Module): '''Pre-activation version of the original Bottleneck module.''' expansion = 4 def __init__(self, in_planes, planes, stride=1): super(PreActBottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out = self.conv3(F.relu(self.bn3(out))) out += shortcut return out def create_network(): return PreActResNet18() def test(): net = PreActResNet18() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: experiments/CIFAR10/pre-res18.pgd10/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import train_one_epoch, eval_one_epoch import torch import json import time import numpy as np from tensorboardX import SummaryWriter import argparse import os from collections import OrderedDict DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) optimizer = config.create_optimizer(net.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) TrainAttack = config.create_attack_method(DEVICE) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) train_one_epoch(net, ds_train, optimizer, criterion, DEVICE, descrip_str, TrainAttack, adv_coef = args.adv_coef) if config.val_interval > 0 and now_epoch % config.val_interval == 0: eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: experiments/CIFAR10/pre-res18.yopo-5-3/config.py ================================================ from easydict import EasyDict import sys import os import argparse import numpy as np import torch from loss import CrossEntropyWithWeightPenlty def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 36 val_interval = 2 weight_decay = 5e-4 inner_iters = 3 K = 5 sigma = 2 / 255.0 eps = 8 / 255.0 create_optimizer = SGDOptimizerMaker(lr =1e-1 * 2 / K, momentum = 0.9, weight_decay = 5e-4) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [30, 34, 36], gamma = 0.1) create_loss_function = torch.nn.CrossEntropyLoss #create_attack_method = \ # IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 10, norm = np.inf, # mean = torch.tensor(np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), # std = torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) create_attack_method = None create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 20, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() # About data # C.inp_chn = 1 # C.num_class = 10 parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/CIFAR10/pre-res18.yopo-5-3/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform_train) #trainset = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) testset = torchvision.datasets.CIFAR10(root=root, train=False, download=True, transform=transform_test) #testset = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/CIFAR10/pre-res18.yopo-5-3/eval.py ================================================ from config import config from dataset import create_test_dataset from network import create_network from training.train import eval_one_epoch from utils.misc import load_checkpoint import argparse import torch import numpy as np import os parser = argparse.ArgumentParser() parser.add_argument('--resume', '--resume', default='log/models/last.checkpoint', type=str, metavar='PATH', help='path to latest checkpoint (default:log/last.checkpoint)') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') args = parser.parse_args() DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) ds_val = create_test_dataset(512) AttackMethod = config.create_evaluation_attack_method(DEVICE) if os.path.isfile(args.resume): load_checkpoint(args.resume, net) print('Evaluating') clean_acc, adv_acc = eval_one_epoch(net, ds_val, DEVICE, AttackMethod) print('clean acc -- {} adv acc -- {}'.format(clean_acc, adv_acc)) ================================================ FILE: experiments/CIFAR10/pre-res18.yopo-5-3/loss.py ================================================ import torch import torch.nn as nn from torch.nn.modules.loss import _Loss import torch.nn.functional as F class Hamiltonian(_Loss): def __init__(self, layer, reg_cof = 1e-4): super(Hamiltonian, self).__init__() self.layer = layer self.reg_cof = 0 def forward(self, x, p): y = self.layer(x) #l2 = cal_l2_norm(self.layer) #print(y.shape, p.shape) H = torch.sum(y * p) #H = H - self.reg_cof * l2 return H class CrossEntropyWithWeightPenlty(_Loss): def __init__(self, module, DEVICE, reg_cof = 1e-4): super(CrossEntropyWithWeightPenlty, self).__init__() self.reg_cof = reg_cof self.criterion = nn.CrossEntropyLoss().to(DEVICE) self.module = module #print(modules, 'dwadaQ!') def __call__(self, pred, label): cross_loss = self.criterion(pred, label) weight_loss = 0 #for module in self.module: # print(module) # weight_loss = weight_loss + cal_l2_norm(module) weight_loss = cal_l2_norm(self.module) loss = cross_loss + self.reg_cof * weight_loss return loss def cal_l2_norm(layer: torch.nn.Module): loss = 0. for name, param in layer.named_parameters(): if name == 'weight': loss = loss + 0.5 * torch.norm(param,) ** 2 return loss ================================================ FILE: experiments/CIFAR10/pre-res18.yopo-5-3/network.py ================================================ '''Pre-activation ResNet in PyTorch. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Identity Mappings in Deep Residual Networks. arXiv:1603.05027 ''' import torch import torch.nn as nn import torch.nn.functional as F class PreActBlock(nn.Module): '''Pre-activation version of the BasicBlock.''' expansion = 1 def __init__(self, in_planes, planes, stride=1): super(PreActBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class PreActResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(PreActResNet, self).__init__() self.in_planes = 64 self.other_layers = nn.ModuleList() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.layer_one = self.conv1 self.other_layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.other_layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.other_layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.other_layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = GlobalpoolFC(512 * block.expansion, num_classes) self.other_layers.append(self.linear) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.other_layers.append(layers[-1]) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): x = self.layer_one(x) self.layer_one_out = x self.layer_one_out.requires_grad_() self.layer_one_out.retain_grad() x = self.layer_one_out for layer in self.other_layers: x = layer(x) return x class GlobalpoolFC(nn.Module): def __init__(self, num_in, num_class): super(GlobalpoolFC, self).__init__() self.pool = nn.AdaptiveAvgPool2d(output_size=1) self.fc = nn.Linear(num_in, num_class) def forward(self, x): y = self.pool(x) y = y.reshape(y.shape[0], -1) y = self.fc(y) return y def PreActResNet18(): return PreActResNet(PreActBlock, [2, 2, 2, 2]) def PreActResNet34(): return PreActResNet(PreActBlock, [3, 4, 6, 3]) class PreActBottleneck(nn.Module): '''Pre-activation version of the original Bottleneck module.''' expansion = 4 def __init__(self, in_planes, planes, stride=1): super(PreActBottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out = self.conv3(F.relu(self.bn3(out))) out += shortcut return out def create_network(): return PreActResNet18() def test(): net = PreActResNet18() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: experiments/CIFAR10/pre-res18.yopo-5-3/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import eval_one_epoch from loss import Hamiltonian, CrossEntropyWithWeightPenlty from training_function import train_one_epoch, FastGradientLayerOneTrainer import torch import json import numpy as np from tensorboardX import SummaryWriter import argparse import torch.nn as nn import torch.optim as optim import os from collections import OrderedDict DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True writer = SummaryWriter(log_dir=config.log_dir) net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) #criterion = CrossEntropyWithWeightPenlty(net.other_layers, DEVICE, config.weight_decay)#.to(DEVICE) #ce_criterion = nn.CrossEntropyLoss().to(DEVICE) optimizer = config.create_optimizer(net.other_layers.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) ## Make Layer One trainner This part of code should be writen in config.py Hamiltonian_func = Hamiltonian(net.layer_one, config.weight_decay) layer_one_optimizer = optim.SGD(net.layer_one.parameters(), lr = lr_scheduler.get_lr()[0], momentum=0.9, weight_decay=5e-4) lyaer_one_optimizer_lr_scheduler = optim.lr_scheduler.MultiStepLR(layer_one_optimizer, milestones = [30, 34, 36], gamma = 0.1) LayerOneTrainer = FastGradientLayerOneTrainer(Hamiltonian_func, layer_one_optimizer, config.inner_iters, config.sigma, config.eps) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) #TrainAttack = config.create_attack_method(DEVICE) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) acc, yofoacc = train_one_epoch(net, ds_train, optimizer, criterion, LayerOneTrainer, config.K, DEVICE, descrip_str) tb_train_dic = {'Acc':acc, 'YofoAcc':yofoacc} print(tb_train_dic) writer.add_scalars('Train', tb_train_dic, now_epoch) if config.val_interval > 0 and now_epoch % config.val_interval == 0: acc, advacc = eval_one_epoch(net, ds_val, DEVICE, EvalAttack) tb_val_dic = {'Acc': acc, 'AdvAcc': advacc} writer.add_scalars('Val', tb_val_dic, now_epoch) lr_scheduler.step() lyaer_one_optimizer_lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: experiments/CIFAR10/pre-res18.yopo-5-3/training_function.py ================================================ import torch import torch.nn as nn from config import config from loss import Hamiltonian, cal_l2_norm from utils.misc import torch_accuracy, AvgMeter from collections import OrderedDict import torch from tqdm import tqdm class FastGradientLayerOneTrainer(object): def __init__(self, Hamiltonian_func, param_optimizer, inner_steps=2, sigma = 0.008, eps = 0.03): self.inner_steps = inner_steps self.sigma = sigma self.eps = eps self.Hamiltonian_func = Hamiltonian_func self.param_optimizer = param_optimizer def step(self, inp, p, eta): ''' Perform Iterative Sign Gradient on eta ret: inp + eta ''' p = p.detach() for i in range(self.inner_steps): tmp_inp = inp + eta tmp_inp = torch.clamp(tmp_inp, 0, 1) H = self.Hamiltonian_func(tmp_inp, p) eta_grad_sign = torch.autograd.grad(H, eta, only_inputs=True, retain_graph=False)[0].sign() eta = eta - eta_grad_sign * self.sigma eta = torch.clamp(eta, -1.0 * self.eps, self.eps) eta = torch.clamp(inp + eta, 0.0, 1.0) - inp eta = eta.detach() eta.requires_grad_() eta.retain_grad() #self.param_optimizer.zero_grad() yofo_inp = eta + inp yofo_inp = torch.clamp(yofo_inp, 0, 1) loss = -1.0 * self.Hamiltonian_func(yofo_inp, p) loss.backward() #self.param_optimizer.step() #self.param_optimizer.zero_grad() return yofo_inp, eta def train_one_epoch(net, batch_generator, optimizer, criterion, LayerOneTrainner, K, DEVICE=torch.device('cuda:0'),descrip_str='Training'): ''' :param attack_freq: Frequencies of training with adversarial examples. -1 indicates natural training :param AttackMethod: the attack method, None represents natural training :return: None #(clean_acc, adv_acc) ''' net.train() pbar = tqdm(batch_generator) yofoacc = -1 cleanacc = -1 cleanloss = -1 pbar.set_description(descrip_str) for i, (data, label) in enumerate(pbar): data = data.to(DEVICE) label = label.to(DEVICE) eta = torch.FloatTensor(*data.shape).uniform_(-config.eps, config.eps) eta = eta.to(label.device) eta.requires_grad_() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() for j in range(K): #optimizer.zero_grad() pbar_dic = OrderedDict() TotalLoss = 0 pred = net(data + eta.detach()) loss = criterion(pred, label) TotalLoss = TotalLoss + loss # wgrad = net.conv1.weight.grad #bgrad = net.conv1.bias.grad TotalLoss.backward() # net.conv1.weight.grad = wgrad #net.conv1.bias.grad = bgrad #param = next(net.parameters()) #grad_mean = torch.mean(param.grad) #optimizer.step() #optimizer.zero_grad() p = -1.0 * net.layer_one_out.grad yofo_inp, eta = LayerOneTrainner.step(data, p, eta) with torch.no_grad(): if j == 0: acc = torch_accuracy(pred, label, (1,)) cleanacc = acc[0].item() cleanloss = loss.item() if j == K - 1: yofo_pred = net(yofo_inp) yofoacc = torch_accuracy(yofo_pred, label, (1,))[0].item() #pbar_dic['grad'] = '{}'.format(grad_mean) optimizer.step() LayerOneTrainner.param_optimizer.step() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() pbar_dic['Acc'] = '{:.2f}'.format(cleanacc) pbar_dic['loss'] = '{:.2f}'.format(cleanloss) pbar_dic['YofoAcc'] = '{:.2f}'.format(yofoacc) pbar.set_postfix(pbar_dic) return cleanacc, yofoacc ================================================ FILE: experiments/CIFAR10/wide34.natural/config.py ================================================ from easydict import EasyDict import sys import os import argparse import numpy as np import torch def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 105 val_interval = 10 create_optimizer = SGDOptimizerMaker(lr =1e-1, momentum = 0.9, weight_decay = 2e-4) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [75, 90, 100], gamma = 0.1) create_loss_function = torch.nn.CrossEntropyLoss #create_attack_method = \ create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 20, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() # About data # C.inp_chn = 1 # C.num_class = 10 parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/CIFAR10/wide34.natural/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform_train) #trainset = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) testset = torchvision.datasets.CIFAR10(root=root, train=False, download=True, transform=transform_test) #testset = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/CIFAR10/wide34.natural/eval.py ================================================ from config import config from dataset import create_test_dataset from network import create_network from training.train import eval_one_epoch from utils.misc import load_checkpoint import argparse import torch import numpy as np import os parser = argparse.ArgumentParser() parser.add_argument('--resume', '--resume', default='log/models/last.checkpoint', type=str, metavar='PATH', help='path to latest checkpoint (default:log/last.checkpoint)') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') args = parser.parse_args() DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) ds_val = create_test_dataset(512) AttackMethod = config.create_evaluation_attack_method(DEVICE) if os.path.isfile(args.resume): load_checkpoint(args.resume, net) print('Evaluating') clean_acc, adv_acc = eval_one_epoch(net, ds_val, DEVICE, AttackMethod) print('clean acc -- {} adv acc -- {}'.format(clean_acc, adv_acc)) ================================================ FILE: experiments/CIFAR10/wide34.natural/network.py ================================================ import config from base_model.wide_resnet import WideResNet def create_network(): return WideResNet(34) def test(): net = create_network() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: experiments/CIFAR10/wide34.natural/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import train_one_epoch, eval_one_epoch import torch import json import time import numpy as np from tensorboardX import SummaryWriter import argparse import os from collections import OrderedDict DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) optimizer = config.create_optimizer(net.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) TrainAttack = config.create_attack_method(DEVICE) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) train_one_epoch(net, ds_train, optimizer, criterion, DEVICE, descrip_str, TrainAttack, adv_coef = args.adv_coef) if config.val_interval > 0 and now_epoch % config.val_interval == 0: eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: experiments/CIFAR10/wide34.pgd10/config.py ================================================ from easydict import EasyDict import sys import os import argparse import numpy as np import torch def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 105 val_interval = 10 create_optimizer = SGDOptimizerMaker(lr =1e-1, momentum = 0.9, weight_decay = 2e-4) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [75, 90, 100], gamma = 0.1) create_loss_function = torch.nn.CrossEntropyLoss create_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 10, norm = np.inf, mean = torch.tensor(np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std = torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 20, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() # About data # C.inp_chn = 1 # C.num_class = 10 parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/CIFAR10/wide34.pgd10/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform_train) #trainset = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) testset = torchvision.datasets.CIFAR10(root=root, train=False, download=True, transform=transform_test) #testset = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/CIFAR10/wide34.pgd10/eval.py ================================================ from config import config from dataset import create_test_dataset from network import create_network from training.train import eval_one_epoch from utils.misc import load_checkpoint import argparse import torch import numpy as np import os parser = argparse.ArgumentParser() parser.add_argument('--resume', '--resume', default='log/models/last.checkpoint', type=str, metavar='PATH', help='path to latest checkpoint (default:log/last.checkpoint)') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') args = parser.parse_args() DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) ds_val = create_test_dataset(512) AttackMethod = config.create_evaluation_attack_method(DEVICE) if os.path.isfile(args.resume): load_checkpoint(args.resume, net) print('Evaluating') clean_acc, adv_acc = eval_one_epoch(net, ds_val, DEVICE, AttackMethod) print('clean acc -- {} adv acc -- {}'.format(clean_acc, adv_acc)) ================================================ FILE: experiments/CIFAR10/wide34.pgd10/network.py ================================================ import config from base_model.wide_resnet import WideResNet def create_network(): return WideResNet(34) def test(): net = create_network() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: experiments/CIFAR10/wide34.pgd10/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import train_one_epoch, eval_one_epoch import torch import json import time import numpy as np from tensorboardX import SummaryWriter import argparse import os from collections import OrderedDict DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) optimizer = config.create_optimizer(net.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) TrainAttack = config.create_attack_method(DEVICE) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) train_one_epoch(net, ds_train, optimizer, criterion, DEVICE, descrip_str, TrainAttack, adv_coef = args.adv_coef) if config.val_interval > 0 and now_epoch % config.val_interval == 0: eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: experiments/CIFAR10/wide34.yopo-5-3/config.py ================================================ from easydict import EasyDict import sys import os import argparse import numpy as np import torch from loss import CrossEntropyWithWeightPenlty def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 36 val_interval = 2 weight_decay = 5e-4 inner_iters = 3 K = 5 sigma = 2 / 255.0 eps = 8 / 255.0 create_optimizer = SGDOptimizerMaker(lr =1e-1 * 4 / K, momentum = 0.9, weight_decay = 5e-4) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [30, 34, 36], gamma = 0.1) create_loss_function = torch.nn.CrossEntropyLoss #create_attack_method = \ # IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 10, norm = np.inf, # mean = torch.tensor(np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), # std = torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) create_attack_method = None create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 20, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() # About data # C.inp_chn = 1 # C.num_class = 10 parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/CIFAR10/wide34.yopo-5-3/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform_train) #trainset = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) testset = torchvision.datasets.CIFAR10(root=root, train=False, download=True, transform=transform_test) #testset = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/CIFAR10/wide34.yopo-5-3/eval.py ================================================ from config import config from dataset import create_test_dataset from network import create_network from training.train import eval_one_epoch from utils.misc import load_checkpoint import argparse import torch import numpy as np import os parser = argparse.ArgumentParser() parser.add_argument('--resume', '--resume', default='log/models/last.checkpoint', type=str, metavar='PATH', help='path to latest checkpoint (default:log/last.checkpoint)') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') args = parser.parse_args() DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) ds_val = create_test_dataset(512) AttackMethod = config.create_evaluation_attack_method(DEVICE) if os.path.isfile(args.resume): load_checkpoint(args.resume, net) print('Evaluating') clean_acc, adv_acc = eval_one_epoch(net, ds_val, DEVICE, AttackMethod) print('clean acc -- {} adv acc -- {}'.format(clean_acc, adv_acc)) ================================================ FILE: experiments/CIFAR10/wide34.yopo-5-3/loss.py ================================================ import torch import torch.nn as nn from torch.nn.modules.loss import _Loss import torch.nn.functional as F class Hamiltonian(_Loss): def __init__(self, layer, reg_cof = 1e-4): super(Hamiltonian, self).__init__() self.layer = layer self.reg_cof = 0 def forward(self, x, p): y = self.layer(x) #l2 = cal_l2_norm(self.layer) #print(y.shape, p.shape) H = torch.sum(y * p) #H = H - self.reg_cof * l2 return H class CrossEntropyWithWeightPenlty(_Loss): def __init__(self, module, DEVICE, reg_cof = 1e-4): super(CrossEntropyWithWeightPenlty, self).__init__() self.reg_cof = reg_cof self.criterion = nn.CrossEntropyLoss().to(DEVICE) self.module = module #print(modules, 'dwadaQ!') def __call__(self, pred, label): cross_loss = self.criterion(pred, label) weight_loss = 0 #for module in self.module: # print(module) # weight_loss = weight_loss + cal_l2_norm(module) weight_loss = cal_l2_norm(self.module) loss = cross_loss + self.reg_cof * weight_loss return loss def cal_l2_norm(layer: torch.nn.Module): loss = 0. for name, param in layer.named_parameters(): if name == 'weight': loss = loss + 0.5 * torch.norm(param,) ** 2 return loss ================================================ FILE: experiments/CIFAR10/wide34.yopo-5-3/network.py ================================================ import config from base_model.wide_resnet import WideResNet def create_network(): return WideResNet(34) def test(): net = create_network() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: experiments/CIFAR10/wide34.yopo-5-3/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import eval_one_epoch from loss import Hamiltonian, CrossEntropyWithWeightPenlty from training_function import train_one_epoch, FastGradientLayerOneTrainer import torch import json import numpy as np from tensorboardX import SummaryWriter import argparse import torch.nn as nn import torch.optim as optim import os from collections import OrderedDict DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True writer = SummaryWriter(log_dir=config.log_dir) net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) #criterion = CrossEntropyWithWeightPenlty(net.other_layers, DEVICE, config.weight_decay)#.to(DEVICE) #ce_criterion = nn.CrossEntropyLoss().to(DEVICE) optimizer = config.create_optimizer(net.other_layers.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) ## Make Layer One trainner This part of code should be writen in config.py Hamiltonian_func = Hamiltonian(net.layer_one, config.weight_decay) layer_one_optimizer = optim.SGD(net.layer_one.parameters(), lr = lr_scheduler.get_lr()[0], momentum=0.9, weight_decay=5e-4) lyaer_one_optimizer_lr_scheduler = optim.lr_scheduler.MultiStepLR(layer_one_optimizer, milestones = [30, 34, 36], gamma = 0.1) LayerOneTrainer = FastGradientLayerOneTrainer(Hamiltonian_func, layer_one_optimizer, config.inner_iters, config.sigma, config.eps) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) #TrainAttack = config.create_attack_method(DEVICE) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) acc, yofoacc = train_one_epoch(net, ds_train, optimizer, criterion, LayerOneTrainer, config.K, DEVICE, descrip_str) tb_train_dic = {'Acc':acc, 'YofoAcc':yofoacc} print(tb_train_dic) writer.add_scalars('Train', tb_train_dic, now_epoch) if config.val_interval > 0 and now_epoch % config.val_interval == 0: acc, advacc = eval_one_epoch(net, ds_val, DEVICE, EvalAttack) tb_val_dic = {'Acc': acc, 'AdvAcc': advacc} writer.add_scalars('Val', tb_val_dic, now_epoch) lr_scheduler.step() lyaer_one_optimizer_lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: experiments/CIFAR10/wide34.yopo-5-3/training_function.py ================================================ import torch import torch.nn as nn from config import config from loss import Hamiltonian, cal_l2_norm from utils.misc import torch_accuracy, AvgMeter from collections import OrderedDict import torch from tqdm import tqdm class FastGradientLayerOneTrainer(object): def __init__(self, Hamiltonian_func, param_optimizer, inner_steps=2, sigma = 0.008, eps = 0.03): self.inner_steps = inner_steps self.sigma = sigma self.eps = eps self.Hamiltonian_func = Hamiltonian_func self.param_optimizer = param_optimizer def step(self, inp, p, eta): ''' Perform Iterative Sign Gradient on eta ret: inp + eta ''' p = p.detach() for i in range(self.inner_steps): tmp_inp = inp + eta tmp_inp = torch.clamp(tmp_inp, 0, 1) H = self.Hamiltonian_func(tmp_inp, p) eta_grad_sign = torch.autograd.grad(H, eta, only_inputs=True, retain_graph=False)[0].sign() eta = eta - eta_grad_sign * self.sigma eta = torch.clamp(eta, -1.0 * self.eps, self.eps) eta = torch.clamp(inp + eta, 0.0, 1.0) - inp eta = eta.detach() eta.requires_grad_() eta.retain_grad() #self.param_optimizer.zero_grad() yofo_inp = eta + inp yofo_inp = torch.clamp(yofo_inp, 0, 1) loss = -1.0 * self.Hamiltonian_func(yofo_inp, p) loss.backward() #self.param_optimizer.step() #self.param_optimizer.zero_grad() return yofo_inp, eta def train_one_epoch(net, batch_generator, optimizer, criterion, LayerOneTrainner, K, DEVICE=torch.device('cuda:0'),descrip_str='Training'): ''' :param attack_freq: Frequencies of training with adversarial examples. -1 indicates natural training :param AttackMethod: the attack method, None represents natural training :return: None #(clean_acc, adv_acc) ''' net.train() pbar = tqdm(batch_generator) yofoacc = -1 cleanacc = -1 cleanloss = -1 pbar.set_description(descrip_str) for i, (data, label) in enumerate(pbar): data = data.to(DEVICE) label = label.to(DEVICE) eta = torch.FloatTensor(*data.shape).uniform_(-config.eps, config.eps) eta = eta.to(label.device) eta.requires_grad_() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() for j in range(K): #optimizer.zero_grad() pbar_dic = OrderedDict() TotalLoss = 0 pred = net(data + eta.detach()) loss = criterion(pred, label) TotalLoss = TotalLoss + loss wgrad = net.conv1.weight.grad #bgrad = net.conv1.bias.grad TotalLoss.backward() net.conv1.weight.grad = wgrad #net.conv1.bias.grad = bgrad #param = next(net.parameters()) #grad_mean = torch.mean(param.grad) #optimizer.step() #optimizer.zero_grad() p = -1.0 * net.layer_one_out.grad yofo_inp, eta = LayerOneTrainner.step(data, p, eta) with torch.no_grad(): if j == 0: acc = torch_accuracy(pred, label, (1,)) cleanacc = acc[0].item() cleanloss = loss.item() if j == K - 1: yofo_pred = net(yofo_inp) yofoacc = torch_accuracy(yofo_pred, label, (1,))[0].item() #pbar_dic['grad'] = '{}'.format(grad_mean) optimizer.step() LayerOneTrainner.param_optimizer.step() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() pbar_dic['Acc'] = '{:.2f}'.format(cleanacc) pbar_dic['loss'] = '{:.2f}'.format(cleanloss) pbar_dic['YofoAcc'] = '{:.2f}'.format(yofoacc) pbar.set_postfix(pbar_dic) return cleanacc, yofoacc ================================================ FILE: experiments/CIFAR10-TRADES/baseline.res-pre18.TRADES.10step/config.py ================================================ import sys import os import argparse import numpy as np import torch def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): create_optimizer = None #SGDOptimizerMaker(lr =1e-1 * 5 / K, momentum = 0.9, weight_decay = 5e-4) create_lr_scheduler = None #PieceWiseConstantLrSchedulerMaker(milestones = [35, 40, 45], gamma = 0.1) # create_loss_function = None #torch.nn.CrossEntropyLoss create_attack_method = None create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 20, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() parser = argparse.ArgumentParser(description='PyTorch CIFAR TRADES Adversarial Training') parser.add_argument('--batch-size', type=int, default=200, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--test-batch-size', type=int, default=256, metavar='N', help='input batch size for testing (default: 128)') parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument('--weight-decay', '--wd', default=2e-4, type=float, metavar='W') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--epsilon', default=0.031, help='perturbation') parser.add_argument('--num-steps', default=10, help='perturb number of steps') parser.add_argument('--step-size', default=0.007, help='perturb step size') parser.add_argument('--beta', default=1.0, help='regularization, i.e., 1/lambda in TRADES') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=100, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--model-dir', default='./model-cifar-wideResNet', help='directory of model for saving checkpoint') parser.add_argument('--save-freq', '-s', default=5, type=int, metavar='N', help='save frequency') parser.add_argument('-d', default=0, type=int, help='which gpu to use') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/CIFAR10-TRADES/baseline.res-pre18.TRADES.10step/network.py ================================================ import config from base_model.wide_resnet import WideResNet from base_model.preact_resnet import PreActResNet18 def create_network(): # return WideResNet(34) return PreActResNet18() def test(): net = create_network() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: experiments/CIFAR10-TRADES/baseline.res-pre18.TRADES.10step/trades.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from config import config from utils.misc import torch_accuracy, AvgMeter def squared_l2_norm(x): flattened = x.view(x.shape[0], -1) return (flattened ** 2).sum(1) def l2_norm(x): return squared_l2_norm(x).sqrt() def trades_loss(model, x_natural, y, optimizer, device, step_size=0.003, epsilon=0.031, perturb_steps=10, beta=1.0, distance='l_inf'): # define KL-loss criterion_kl = nn.KLDivLoss(size_average=False) model.eval() batch_size = len(x_natural) # generate adversarial example x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach().to(device) if distance == 'l_inf': # logits_natural = model(x_natural).detach() for _ in range(perturb_steps): x_adv.requires_grad_() with torch.enable_grad(): loss_kl = criterion_kl(F.log_softmax(model(x_adv), dim=1), F.softmax(model(x_natural), dim=1)) # loss_kl = criterion_kl(F.log_softmax(model(x_adv), dim=1), # F.softmax(logits_natural, dim=1)) grad = torch.autograd.grad(loss_kl, [x_adv])[0] x_adv = x_adv.detach() + step_size * torch.sign(grad.detach()) x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon) x_adv = torch.clamp(x_adv, 0.0, 1.0) elif distance == 'l_2': for _ in range(perturb_steps): x_adv.requires_grad_() with torch.enable_grad(): loss_kl = criterion_kl(F.log_softmax(model(x_adv), dim=1), F.softmax(model(x_natural), dim=1)) grad = torch.autograd.grad(loss_kl, [x_adv])[0] for idx_batch in range(batch_size): grad_idx = grad[idx_batch] grad_idx_norm = l2_norm(grad_idx) grad_idx /= (grad_idx_norm + 1e-8) x_adv[idx_batch] = x_adv[idx_batch].detach() + step_size * grad_idx eta_x_adv = x_adv[idx_batch] - x_natural[idx_batch] norm_eta = l2_norm(eta_x_adv) if norm_eta > epsilon: eta_x_adv = eta_x_adv * epsilon / l2_norm(eta_x_adv) x_adv[idx_batch] = x_natural[idx_batch] + eta_x_adv x_adv = torch.clamp(x_adv, 0.0, 1.0) else: x_adv = torch.clamp(x_adv, 0.0, 1.0) model.train() x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False) # zero gradient optimizer.zero_grad() # calculate robust loss logits = model(x_natural) adv_logits = model(x_adv) loss_natural = F.cross_entropy(logits, y) loss_robust = (1.0 / batch_size) * criterion_kl(F.log_softmax(adv_logits, dim=1), F.softmax(logits, dim=1)) loss = loss_natural + beta * loss_robust cleanacc = torch_accuracy(logits, y, (1,))[0].item() tradesacc = torch_accuracy(adv_logits, y, (1,))[0].item() return loss, loss_natural.item(), loss_robust.item(), cleanacc, tradesacc ================================================ FILE: experiments/CIFAR10-TRADES/baseline.res-pre18.TRADES.10step/train_trades_cifar10.py ================================================ from __future__ import print_function import os from tqdm import tqdm from collections import OrderedDict from time import time import json import argparse import torch import torch.nn as nn import torch.nn.functional as F import torchvision import torch.optim as optim from torchvision import datasets, transforms from config import config, args from network import create_network from trades import trades_loss from training.train import eval_one_epoch from utils.misc import torch_accuracy, AvgMeter # settings model_dir = args.model_dir if not os.path.exists(model_dir): os.makedirs(model_dir) use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device('cuda:{}'.format(args.d) if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # setup data loader transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) trainset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, **kwargs) testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test) test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=False, **kwargs) def train(args, model, device, train_loader, optimizer, epoch, descrip_str='Training'): model.train() pbar = tqdm(train_loader) pbar.set_description(descrip_str) CleanAccMeter = AvgMeter() TradesAccMeter = AvgMeter() for batch_idx, (data, target) in enumerate(pbar): data, target = data.to(device), target.to(device) optimizer.zero_grad() # calculate robust loss loss, cleanloss, klloss, cleanacc, tradesacc = trades_loss(model=model, x_natural=data, y=target, optimizer=optimizer, device=device, step_size=args.step_size, epsilon=args.epsilon, perturb_steps=args.num_steps, beta=args.beta,) loss.backward() optimizer.step() CleanAccMeter.update(cleanacc) TradesAccMeter.update(tradesacc) pbar_dic = OrderedDict() pbar_dic['cleanloss'] = '{:.3f}'.format(cleanloss) pbar_dic['klloss'] = '{:.3f}'.format(klloss) pbar_dic['CleanAcc'] = '{:.2f}'.format(CleanAccMeter.mean) pbar_dic['TradesAcc'] = '{:.2f}'.format(TradesAccMeter.mean) pbar.set_postfix(pbar_dic) def adjust_learning_rate(optimizer, epoch): """decrease the learning rate""" lr = args.lr if epoch >= 75: lr = args.lr * 0.1 elif epoch >= 90: lr = args.lr * 0.01 elif epoch >= 100: lr = args.lr * 0.001 for param_group in optimizer.param_groups: param_group['lr'] = lr def main(): model = create_network().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) EvalAttack = config.create_evaluation_attack_method(device) now_train_time = 0 for epoch in range(1, args.epochs + 1): # adjust learning rate for SGD adjust_learning_rate(optimizer, epoch) s_time = time() descrip_str = 'Training epoch: {}/{}'.format(epoch, args.epochs) # adversarial training train(args, model, device, train_loader, optimizer, epoch, descrip_str) now_train_time += time() - s_time acc, advacc = eval_one_epoch(model, test_loader, device, EvalAttack) # save checkpoint if epoch % args.save_freq == 0: torch.save(model.state_dict(), os.path.join(config.model_dir, 'model-wideres-epoch{}.pt'.format(epoch))) if __name__ == '__main__': main() ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-2-5/config.py ================================================ from easydict import EasyDict import sys import os import argparse import numpy as np import torch def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 105 val_interval = 10 weight_decay = 5e-4 inner_iters = 5 K = 2 sigma = 0.007 eps = 0.031 create_optimizer = SGDOptimizerMaker(lr=2e-1, momentum = 0.9, weight_decay = weight_decay) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [70, 90, 100], gamma = 0.1) create_loss_function = torch.nn.CrossEntropyLoss create_attack_method = None create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 20, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-2-5/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) trainset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), ]) testset = torchvision.datasets.CIFAR10(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-2-5/loss.py ================================================ import torch import torch.nn as nn from torch.nn.modules.loss import _Loss import torch.nn.functional as F class Hamiltonian(_Loss): def __init__(self, layer, reg_cof = 1e-4): super(Hamiltonian, self).__init__() self.layer = layer self.reg_cof = 0 def forward(self, x, p): y = self.layer(x) H = torch.sum(y * p) return H class CrossEntropyWithWeightPenlty(_Loss): def __init__(self, module, DEVICE, reg_cof = 1e-4): super(CrossEntropyWithWeightPenlty, self).__init__() self.reg_cof = reg_cof self.criterion = nn.CrossEntropyLoss().to(DEVICE) self.module = module def __call__(self, pred, label): cross_loss = self.criterion(pred, label) weight_loss = cal_l2_norm(self.module) loss = cross_loss + self.reg_cof * weight_loss return loss def cal_l2_norm(layer: torch.nn.Module): loss = 0. for name, param in layer.named_parameters(): if name == 'weight': loss = loss + 0.5 * torch.norm(param,) ** 2 return loss ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-2-5/network.py ================================================ '''Pre-activation ResNet in PyTorch. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Identity Mappings in Deep Residual Networks. arXiv:1603.05027 ''' import torch import torch.nn as nn import torch.nn.functional as F class PreActBlock(nn.Module): '''Pre-activation version of the BasicBlock.''' expansion = 1 def __init__(self, in_planes, planes, stride=1): super(PreActBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class PreActResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(PreActResNet, self).__init__() self.in_planes = 64 self.other_layers = nn.ModuleList() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.layer_one = self.conv1 self.other_layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.other_layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.other_layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.other_layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = GlobalpoolFC(512 * block.expansion, num_classes) self.other_layers.append(self.linear) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.other_layers.append(layers[-1]) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): x = self.layer_one(x) self.layer_one_out = x self.layer_one_out.requires_grad_() self.layer_one_out.retain_grad() x = self.layer_one_out for layer in self.other_layers: x = layer(x) return x class GlobalpoolFC(nn.Module): def __init__(self, num_in, num_class): super(GlobalpoolFC, self).__init__() self.pool = nn.AdaptiveAvgPool2d(output_size=1) self.fc = nn.Linear(num_in, num_class) def forward(self, x): y = self.pool(x) y = y.reshape(y.shape[0], -1) y = self.fc(y) return y def PreActResNet18(): return PreActResNet(PreActBlock, [2, 2, 2, 2]) def PreActResNet34(): return PreActResNet(PreActBlock, [3, 4, 6, 3]) class PreActBottleneck(nn.Module): '''Pre-activation version of the original Bottleneck module.''' expansion = 4 def __init__(self, in_planes, planes, stride=1): super(PreActBottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out = self.conv3(F.relu(self.bn3(out))) out += shortcut return out def create_network(): return PreActResNet18() def test(): net = PreActResNet18() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-2-5/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import eval_one_epoch from loss import Hamiltonian, CrossEntropyWithWeightPenlty from training_function import train_one_epoch, FastGradientLayerOneTrainer import torch import torch.optim as optim import os DEVICE = torch.device('cuda:{}'.format(args.d)) net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) optimizer = config.create_optimizer(net.other_layers.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) Hamiltonian_func = Hamiltonian(net.layer_one, config.weight_decay) layer_one_optimizer = optim.SGD(net.layer_one.parameters(), lr = lr_scheduler.get_lr()[0], momentum=0.9, weight_decay=5e-4) lyaer_one_optimizer_lr_scheduler = optim.lr_scheduler.MultiStepLR(layer_one_optimizer, milestones = [70, 90, 100], gamma = 0.1) LayerOneTrainer = FastGradientLayerOneTrainer(Hamiltonian_func, layer_one_optimizer, config.inner_iters, config.sigma, config.eps) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) acc, yofoacc = train_one_epoch(net, ds_train, optimizer, criterion, LayerOneTrainer, config.K, DEVICE, descrip_str) acc, advacc = eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() lyaer_one_optimizer_lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-2-5/training_function.py ================================================ import torch import torch.nn as nn from config import config from loss import Hamiltonian, cal_l2_norm import torch.nn.functional as F from utils.misc import torch_accuracy, AvgMeter from collections import OrderedDict import torch from tqdm import tqdm class FastGradientLayerOneTrainer(object): def __init__(self, Hamiltonian_func, param_optimizer, inner_steps=2, sigma = 0.008, eps = 0.03): self.inner_steps = inner_steps self.sigma = sigma self.eps = eps self.Hamiltonian_func = Hamiltonian_func self.param_optimizer = param_optimizer def step(self, inp, p, eta): ''' Perform Iterative Sign Gradient on eta ret: inp + eta ''' p = p.detach() for i in range(self.inner_steps): tmp_inp = inp + eta tmp_inp = torch.clamp(tmp_inp, 0, 1) H = self.Hamiltonian_func(tmp_inp, p) eta_grad = torch.autograd.grad(H, eta, only_inputs=True, retain_graph=False)[0] eta_grad_sign = eta_grad.sign() eta = eta - eta_grad_sign * self.sigma eta = torch.clamp(eta, -1.0 * self.eps, self.eps) eta = torch.clamp(inp + eta, 0.0, 1.0) - inp eta = eta.detach() eta.requires_grad_() eta.retain_grad() yofo_inp = eta + inp yofo_inp = torch.clamp(yofo_inp, 0, 1) loss = -1.0 * (self.Hamiltonian_func(yofo_inp, p) - config.weight_decay * cal_l2_norm(self.Hamiltonian_func.layer)) loss.backward() return yofo_inp, eta def train_one_epoch(net, batch_generator, optimizer, criterion, LayerOneTrainner, K, DEVICE=torch.device('cuda:0'),descrip_str='Training'): net.train() pbar = tqdm(batch_generator) yofoacc = -1 pbar.set_description(descrip_str) trades_criterion = torch.nn.KLDivLoss(size_average=False) #.to(DEVICE) for i, (data, label) in enumerate(pbar): data = data.to(DEVICE) label = label.to(DEVICE) net.eval() eta = 0.001 * torch.randn(data.shape).cuda().detach().to(DEVICE) eta.requires_grad_() raw_soft_label = F.softmax(net(data), dim=1).detach() for j in range(K): pred = net(data + eta.detach()) with torch.enable_grad(): loss = trades_criterion(F.log_softmax(pred, dim = 1), raw_soft_label)#raw_soft_label.detach()) p = -1.0 * torch.autograd.grad(loss, [net.layer_one_out, ])[0] yofo_inp, eta = LayerOneTrainner.step(data, p, eta) with torch.no_grad(): if j == K - 1: yofo_pred = net(yofo_inp) yofo_loss = criterion(yofo_pred, label) yofoacc = torch_accuracy(yofo_pred, label, (1,))[0].item() net.train() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() raw_pred = net(data) acc = torch_accuracy(raw_pred, label, (1,)) clean_acc = acc[0].item() clean_loss = criterion(raw_pred, label) adv_pred = net(torch.clamp(data + eta.detach(), 0.0, 1.0)) kl_loss = trades_criterion(F.log_softmax(adv_pred, dim=1), F.softmax(raw_pred, dim=1)) / data.shape[0] loss = clean_loss + kl_loss loss.backward() optimizer.step() LayerOneTrainner.param_optimizer.step() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() pbar_dic = OrderedDict() pbar_dic['Acc'] = '{:.2f}'.format(clean_acc) pbar_dic['cleanloss'] = '{:.3f}'.format(clean_loss.item()) pbar_dic['klloss'] = '{:.3f}'.format(kl_loss.item()) pbar_dic['YofoAcc'] = '{:.2f}'.format(yofoacc) pbar_dic['Yofoloss'] = '{:.3f}'.format(yofo_loss.item()) pbar.set_postfix(pbar_dic) return clean_acc, yofoacc ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-3-4/config.py ================================================ from easydict import EasyDict import sys import os import argparse import numpy as np import torch from loss import CrossEntropyWithWeightPenlty def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 105 val_interval = 10 weight_decay = 5e-4 inner_iters = 4 K = 3 sigma = 0.007 eps = 0.031 create_optimizer = SGDOptimizerMaker(lr =2e-1, momentum = 0.9, weight_decay = weight_decay) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [70, 90, 100], gamma = 0.1) create_loss_function = torch.nn.CrossEntropyLoss create_attack_method = None create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 8/255.0, sigma = 2/255.0, nb_iters = 20, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-3-4/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) trainset = torchvision.datasets.CIFAR10(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), ]) testset = torchvision.datasets.CIFAR10(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-3-4/loss.py ================================================ import torch import torch.nn as nn from torch.nn.modules.loss import _Loss import torch.nn.functional as F class Hamiltonian(_Loss): def __init__(self, layer, reg_cof = 1e-4): super(Hamiltonian, self).__init__() self.layer = layer self.reg_cof = 0 def forward(self, x, p): y = self.layer(x) H = torch.sum(y * p) return H class CrossEntropyWithWeightPenlty(_Loss): def __init__(self, module, DEVICE, reg_cof = 1e-4): super(CrossEntropyWithWeightPenlty, self).__init__() self.reg_cof = reg_cof self.criterion = nn.CrossEntropyLoss().to(DEVICE) self.module = module def __call__(self, pred, label): cross_loss = self.criterion(pred, label) weight_loss = cal_l2_norm(self.module) loss = cross_loss + self.reg_cof * weight_loss return loss def cal_l2_norm(layer: torch.nn.Module): loss = 0. for name, param in layer.named_parameters(): if name == 'weight': loss = loss + 0.5 * torch.norm(param,) ** 2 return loss ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-3-4/network.py ================================================ '''Pre-activation ResNet in PyTorch. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Identity Mappings in Deep Residual Networks. arXiv:1603.05027 ''' import torch import torch.nn as nn import torch.nn.functional as F class PreActBlock(nn.Module): '''Pre-activation version of the BasicBlock.''' expansion = 1 def __init__(self, in_planes, planes, stride=1): super(PreActBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class PreActResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(PreActResNet, self).__init__() self.in_planes = 64 self.other_layers = nn.ModuleList() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.layer_one = self.conv1 self.other_layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.other_layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.other_layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.other_layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = GlobalpoolFC(512 * block.expansion, num_classes) self.other_layers.append(self.linear) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.other_layers.append(layers[-1]) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): x = self.layer_one(x) self.layer_one_out = x self.layer_one_out.requires_grad_() self.layer_one_out.retain_grad() x = self.layer_one_out for layer in self.other_layers: x = layer(x) return x class GlobalpoolFC(nn.Module): def __init__(self, num_in, num_class): super(GlobalpoolFC, self).__init__() self.pool = nn.AdaptiveAvgPool2d(output_size=1) self.fc = nn.Linear(num_in, num_class) def forward(self, x): y = self.pool(x) y = y.reshape(y.shape[0], -1) y = self.fc(y) return y def PreActResNet18(): return PreActResNet(PreActBlock, [2, 2, 2, 2]) def PreActResNet34(): return PreActResNet(PreActBlock, [3, 4, 6, 3]) class PreActBottleneck(nn.Module): '''Pre-activation version of the original Bottleneck module.''' expansion = 4 def __init__(self, in_planes, planes, stride=1): super(PreActBottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out = self.conv3(F.relu(self.bn3(out))) out += shortcut return out def create_network(): return PreActResNet18() def test(): net = PreActResNet18() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-3-4/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import eval_one_epoch from loss import Hamiltonian, CrossEntropyWithWeightPenlty from training_function import train_one_epoch, FastGradientLayerOneTrainer import torch import torch.optim as optim import os DEVICE = torch.device('cuda:{}'.format(args.d)) net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) optimizer = config.create_optimizer(net.other_layers.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) Hamiltonian_func = Hamiltonian(net.layer_one, config.weight_decay) layer_one_optimizer = optim.SGD(net.layer_one.parameters(), lr = lr_scheduler.get_lr()[0], momentum=0.9, weight_decay=5e-4) lyaer_one_optimizer_lr_scheduler = optim.lr_scheduler.MultiStepLR(layer_one_optimizer, milestones = [70, 90, 100], gamma = 0.1) LayerOneTrainer = FastGradientLayerOneTrainer(Hamiltonian_func, layer_one_optimizer, config.inner_iters, config.sigma, config.eps) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) acc, yofoacc = train_one_epoch(net, ds_train, optimizer, criterion, LayerOneTrainer, config.K, DEVICE, descrip_str) acc, advacc = eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() lyaer_one_optimizer_lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: experiments/CIFAR10-TRADES/pre-res18.TRADES-YOPO-3-4/training_function.py ================================================ import torch import torch.nn as nn from config import config from loss import Hamiltonian, cal_l2_norm import torch.nn.functional as F from utils.misc import torch_accuracy, AvgMeter from collections import OrderedDict import torch from tqdm import tqdm class FastGradientLayerOneTrainer(object): def __init__(self, Hamiltonian_func, param_optimizer, inner_steps=2, sigma = 0.008, eps = 0.03): self.inner_steps = inner_steps self.sigma = sigma self.eps = eps self.Hamiltonian_func = Hamiltonian_func self.param_optimizer = param_optimizer def step(self, inp, p, eta): p = p.detach() for i in range(self.inner_steps): tmp_inp = inp + eta tmp_inp = torch.clamp(tmp_inp, 0, 1) H = self.Hamiltonian_func(tmp_inp, p) eta_grad = torch.autograd.grad(H, eta, only_inputs=True, retain_graph=False)[0] eta_grad_sign = eta_grad.sign() eta = eta - eta_grad_sign * self.sigma eta = torch.clamp(eta, -1.0 * self.eps, self.eps) eta = torch.clamp(inp + eta, 0.0, 1.0) - inp eta = eta.detach() eta.requires_grad_() eta.retain_grad() yofo_inp = eta + inp yofo_inp = torch.clamp(yofo_inp, 0, 1) loss = -1.0 * (self.Hamiltonian_func(yofo_inp, p) - config.weight_decay * cal_l2_norm(self.Hamiltonian_func.layer)) loss.backward() return yofo_inp, eta def train_one_epoch(net, batch_generator, optimizer, criterion, LayerOneTrainner, K, DEVICE=torch.device('cuda:0'),descrip_str='Training'): net.train() pbar = tqdm(batch_generator) yofoacc = -1 pbar.set_description(descrip_str) trades_criterion = torch.nn.KLDivLoss(size_average=False) #.to(DEVICE) for i, (data, label) in enumerate(pbar): data = data.to(DEVICE) label = label.to(DEVICE) net.eval() eta = 0.001 * torch.randn(data.shape).cuda().detach().to(DEVICE) eta.requires_grad_() raw_soft_label = F.softmax(net(data), dim=1).detach() for j in range(K): pred = net(data + eta.detach()) with torch.enable_grad(): loss = trades_criterion(F.log_softmax(pred, dim = 1), raw_soft_label)#raw_soft_label.detach()) p = -1.0 * torch.autograd.grad(loss, [net.layer_one_out, ])[0] yofo_inp, eta = LayerOneTrainner.step(data, p, eta) with torch.no_grad(): if j == K - 1: yofo_pred = net(yofo_inp) yofo_loss = criterion(yofo_pred, label) yofoacc = torch_accuracy(yofo_pred, label, (1,))[0].item() net.train() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() raw_pred = net(data) acc = torch_accuracy(raw_pred, label, (1,)) clean_acc = acc[0].item() clean_loss = criterion(raw_pred, label) adv_pred = net(torch.clamp(data + eta.detach(), 0.0, 1.0)) kl_loss = trades_criterion(F.log_softmax(adv_pred, dim=1), F.softmax(raw_pred, dim=1)) / data.shape[0] loss = clean_loss + kl_loss loss.backward() optimizer.step() LayerOneTrainner.param_optimizer.step() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() pbar_dic = OrderedDict() pbar_dic['Acc'] = '{:.2f}'.format(clean_acc) pbar_dic['cleanloss'] = '{:.3f}'.format(clean_loss.item()) pbar_dic['klloss'] = '{:.3f}'.format(kl_loss.item()) pbar_dic['YofoAcc'] = '{:.2f}'.format(yofoacc) pbar_dic['Yofoloss'] = '{:.3f}'.format(yofo_loss.item()) pbar.set_postfix(pbar_dic) return clean_acc, yofoacc ================================================ FILE: experiments/MNIST/YOPO-5-10/config.py ================================================ import sys import os import argparse import numpy as np import torch def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 40 val_interval = 1 weight_decay = 5e-4 inner_iters = 10 K = 5 sigma = 0.01 eps = 0.3 create_optimizer = SGDOptimizerMaker(lr =1e-2 / K, momentum = 0.9, weight_decay = weight_decay) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [30, 35, 39], gamma = 0.1) create_loss_function = None create_attack_method = None create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 0.3, sigma = 0.01, nb_iters = 40, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/MNIST/YOPO-5-10/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.ToTensor(), ]) trainset = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), ]) testset = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/MNIST/YOPO-5-10/eval.py ================================================ from config import config from dataset import create_test_dataset from network import create_network from training.train import eval_one_epoch from utils.misc import load_checkpoint import argparse import torch import numpy as np import os parser = argparse.ArgumentParser() parser.add_argument('--resume', '--resume', default='log/models/last.checkpoint', type=str, metavar='PATH', help='path to latest checkpoint (default:log/last.checkpoint)') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') args = parser.parse_args() DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) ds_val = create_test_dataset(512) AttackMethod = config.create_evaluation_attack_method(DEVICE) if os.path.isfile(args.resume): load_checkpoint(args.resume, net) print('Evaluating') clean_acc, adv_acc = eval_one_epoch(net, ds_val, DEVICE, AttackMethod) print('clean acc -- {} adv acc -- {}'.format(clean_acc, adv_acc)) ================================================ FILE: experiments/MNIST/YOPO-5-10/loss.py ================================================ import torch import torch.nn as nn from torch.nn.modules.loss import _Loss import torch.nn.functional as F class Hamiltonian(_Loss): def __init__(self, layer, reg_cof = 1e-4): super(Hamiltonian, self).__init__() self.layer = layer self.reg_cof = 0 def forward(self, x, p): y = self.layer(x) H = torch.sum(y * p) return H class CrossEntropyWithWeightPenlty(_Loss): def __init__(self, module, DEVICE, reg_cof = 1e-4): super(CrossEntropyWithWeightPenlty, self).__init__() self.reg_cof = reg_cof self.criterion = nn.CrossEntropyLoss().to(DEVICE) self.module = module def __call__(self, pred, label): cross_loss = self.criterion(pred, label) weight_loss = cal_l2_norm(self.module) loss = cross_loss + self.reg_cof * weight_loss return loss def cal_l2_norm(layer: torch.nn.Module): loss = 0. for name, param in layer.named_parameters(): if name == 'weight': loss = loss + 0.5 * torch.norm(param,) ** 2 return loss ================================================ FILE: experiments/MNIST/YOPO-5-10/network.py ================================================ import config from collections import OrderedDict import torch.nn as nn class SmallCNN(nn.Module): def __init__(self, drop=0.5): super(SmallCNN, self).__init__() self.num_channels = 1 self.num_labels = 10 activ = nn.ReLU(True) self.conv1 = nn.Conv2d(self.num_channels, 32, 3) self.layer_one = nn.Sequential(OrderedDict([ ('conv1', self.conv1), ('relu1', activ),])) self.feature_extractor = nn.Sequential(OrderedDict([ ('conv2', nn.Conv2d(32, 32, 3)), ('relu2', activ), ('maxpool1', nn.MaxPool2d(2, 2)), ('conv3', nn.Conv2d(32, 64, 3)), ('relu3', activ), ('conv4', nn.Conv2d(64, 64, 3)), ('relu4', activ), ('maxpool2', nn.MaxPool2d(2, 2)), ])) self.classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(64 * 4 * 4, 200)), ('relu1', activ), ('drop', nn.Dropout(drop)), ('fc2', nn.Linear(200, 200)), ('relu2', activ), ('fc3', nn.Linear(200, self.num_labels)), ])) self.other_layers = nn.ModuleList() self.other_layers.append(self.feature_extractor) self.other_layers.append(self.classifier) for m in self.modules(): if isinstance(m, (nn.Conv2d)): nn.init.kaiming_normal_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) nn.init.constant_(self.classifier.fc3.weight, 0) nn.init.constant_(self.classifier.fc3.bias, 0) def forward(self, input): y = self.layer_one(input) self.layer_one_out = y self.layer_one_out.requires_grad_() self.layer_one_out.retain_grad() features = self.feature_extractor(y) logits = self.classifier(features.view(-1, 64 * 4 * 4)) return logits def create_network(): return SmallCNN() def test(): net = create_network() y = net((torch.randn(1, 1, 28, 28))) print(y.size()) ================================================ FILE: experiments/MNIST/YOPO-5-10/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import eval_one_epoch from loss import Hamiltonian, CrossEntropyWithWeightPenlty from training_function import train_one_epoch, FastGradientLayerOneTrainer import torch import json import numpy as np # from tensorboardX import SummaryWriter import torch.nn as nn import torch.optim as optim import os import time DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True # writer = SummaryWriter(log_dir=config.log_dir) net = create_network() net.to(DEVICE) criterion = CrossEntropyWithWeightPenlty(net.other_layers, DEVICE, config.weight_decay)#.to(DEVICE) optimizer = config.create_optimizer(net.other_layers.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) Hamiltonian_func = Hamiltonian(net.layer_one, config.weight_decay) layer_one_optimizer = optim.SGD(net.layer_one.parameters(), lr = lr_scheduler.get_lr()[0], momentum=0.9, weight_decay=5e-4) lyaer_one_optimizer_lr_scheduler = optim.lr_scheduler.MultiStepLR(layer_one_optimizer, milestones = [15, 19], gamma = 0.1) LayerOneTrainer = FastGradientLayerOneTrainer(Hamiltonian_func, layer_one_optimizer, config.inner_iters, config.sigma, config.eps) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) now_train_time = 0 while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) s_time = time.time() acc, yofoacc = train_one_epoch(net, ds_train, optimizer, criterion, LayerOneTrainer, config.K, DEVICE, descrip_str) now_train_time = now_train_time + time.time() - s_time tb_train_dic = {'Acc':acc, 'YofoAcc':yofoacc} print(tb_train_dic) # writer.add_scalars('Train', tb_train_dic, now_epoch) if config.val_interval > 0 and now_epoch % config.val_interval == 0: acc, advacc = eval_one_epoch(net, ds_val, DEVICE, EvalAttack) tb_val_dic = {'Acc': acc, 'AdvAcc': advacc} # writer.add_scalars('Val', tb_val_dic, now_epoch) tb_val_dic['time'] = now_train_time log_str = json.dumps(tb_val_dic) with open('time.log', 'a') as f: f.write(log_str+ '\n') lr_scheduler.step() lyaer_one_optimizer_lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: experiments/MNIST/YOPO-5-10/training_function.py ================================================ import torch import torch.nn as nn from config import config from loss import Hamiltonian, cal_l2_norm from utils.misc import torch_accuracy, AvgMeter from collections import OrderedDict import torch from tqdm import tqdm class FastGradientLayerOneTrainer(object): def __init__(self, Hamiltonian_func, param_optimizer, inner_steps=2, sigma = 0.008, eps = 0.03): self.inner_steps = inner_steps self.sigma = sigma self.eps = eps self.Hamiltonian_func = Hamiltonian_func self.param_optimizer = param_optimizer def step(self, inp, p, eta): ''' Perform Iterative Sign Gradient on eta ret: inp + eta ''' p = p.detach() for i in range(self.inner_steps): tmp_inp = inp + eta tmp_inp = torch.clamp(tmp_inp, 0, 1) H = self.Hamiltonian_func(tmp_inp, p) eta_grad_sign = torch.autograd.grad(H, eta, only_inputs=True, retain_graph=False)[0].sign() eta = eta - eta_grad_sign * self.sigma eta = torch.clamp(eta, -1.0 * self.eps, self.eps) eta = torch.clamp(inp + eta, 0.0, 1.0) - inp eta = eta.detach() eta.requires_grad_() eta.retain_grad() #self.param_optimizer.zero_grad() yofo_inp = eta + inp yofo_inp = torch.clamp(yofo_inp, 0, 1) loss = -1.0 * self.Hamiltonian_func(yofo_inp, p) loss.backward() #self.param_optimizer.step() #self.param_optimizer.zero_grad() return yofo_inp, eta def train_one_epoch(net, batch_generator, optimizer, criterion, LayerOneTrainner, K, DEVICE=torch.device('cuda:0'),descrip_str='Training'): ''' :param attack_freq: Frequencies of training with adversarial examples. -1 indicates natural training :param AttackMethod: the attack method, None represents natural training :return: None #(clean_acc, adv_acc) ''' net.train() pbar = tqdm(batch_generator) yofoacc = -1 cleanacc = -1 cleanloss = -1 pbar.set_description(descrip_str) for i, (data, label) in enumerate(pbar): data = data.to(DEVICE) label = label.to(DEVICE) eta = torch.FloatTensor(*data.shape).uniform_(-config.eps, config.eps) eta = eta.to(label.device) eta.requires_grad_() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() for j in range(K): pbar_dic = OrderedDict() TotalLoss = 0 pred = net(data + eta.detach()) loss = criterion(pred, label) TotalLoss = TotalLoss + loss wgrad = net.conv1.weight.grad TotalLoss.backward() net.conv1.weight.grad = wgrad p = -1.0 * net.layer_one_out.grad yofo_inp, eta = LayerOneTrainner.step(data, p, eta) with torch.no_grad(): if j == 0: acc = torch_accuracy(pred, label, (1,)) cleanacc = acc[0].item() cleanloss = loss.item() if j == K - 1: yofo_pred = net(yofo_inp) yofoacc = torch_accuracy(yofo_pred, label, (1,))[0].item() optimizer.step() LayerOneTrainner.param_optimizer.step() optimizer.zero_grad() LayerOneTrainner.param_optimizer.zero_grad() pbar_dic['Acc'] = '{:.2f}'.format(cleanacc) pbar_dic['loss'] = '{:.2f}'.format(cleanloss) pbar_dic['YofoAcc'] = '{:.2f}'.format(yofoacc) pbar.set_postfix(pbar_dic) return cleanacc, yofoacc ================================================ FILE: experiments/MNIST/pgd40/config.py ================================================ import sys import os import argparse import numpy as np import torch def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path) abs_current_path = os.path.realpath('./') root_path = os.path.join('/', *abs_current_path.split(os.path.sep)[:-3]) lib_dir = os.path.join(root_path, 'lib') add_path(lib_dir) from training.config import TrainingConfigBase, SGDOptimizerMaker, \ PieceWiseConstantLrSchedulerMaker, IPGDAttackMethodMaker class TrainingConfing(TrainingConfigBase): lib_dir = lib_dir num_epochs = 56 val_interval = 1 create_optimizer = SGDOptimizerMaker(lr =1e-1, momentum = 0.9, weight_decay = 5e-4) create_lr_scheduler = PieceWiseConstantLrSchedulerMaker(milestones = [50, 55], gamma = 0.1) create_loss_function = torch.nn.CrossEntropyLoss create_attack_method = \ IPGDAttackMethodMaker(eps = 0.3, sigma = 0.01, nb_iters = 40, norm = np.inf, mean = torch.tensor(np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std = torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) create_evaluation_attack_method = \ IPGDAttackMethodMaker(eps = 0.3, sigma = 0.01, nb_iters = 40, norm = np.inf, mean=torch.tensor( np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std=torch.tensor(np.array([1]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis])) config = TrainingConfing() parser = argparse.ArgumentParser() parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-b', '--batch_size', default=256, type=int, metavar='N', help='mini-batch size') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') parser.add_argument('-adv_coef', default=1.0, type = float, help = 'Specify the weight for adversarial loss') parser.add_argument('--auto-continue', default=False, action = 'store_true', help = 'Continue from the latest checkpoint') args = parser.parse_args() if __name__ == '__main__': pass ================================================ FILE: experiments/MNIST/pgd40/dataset.py ================================================ import torch import torchvision import torchvision.transforms as transforms import numpy as np def create_train_dataset(batch_size = 128, root = '../data'): transform_train = transforms.Compose([ transforms.ToTensor(), ]) trainset = torchvision.datasets.MNIST(root=root, train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) return trainloader def create_test_dataset(batch_size = 128, root = '../data'): transform_test = transforms.Compose([ transforms.ToTensor(), ]) testset = torchvision.datasets.MNIST(root=root, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) return testloader if __name__ == '__main__': print(create_train_dataset()) print(create_test_dataset()) ================================================ FILE: experiments/MNIST/pgd40/eval.py ================================================ from config import config from dataset import create_test_dataset from network import create_network from training.train import eval_one_epoch from utils.misc import load_checkpoint import argparse import torch import numpy as np import os parser = argparse.ArgumentParser() parser.add_argument('--resume', '--resume', default='log/models/last.checkpoint', type=str, metavar='PATH', help='path to latest checkpoint (default:log/last.checkpoint)') parser.add_argument('-d', type=int, default=0, help='Which gpu to use') args = parser.parse_args() DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) ds_val = create_test_dataset(512) AttackMethod = config.create_evaluation_attack_method(DEVICE) if os.path.isfile(args.resume): load_checkpoint(args.resume, net) print('Evaluating') clean_acc, adv_acc = eval_one_epoch(net, ds_val, DEVICE, AttackMethod) print('clean acc -- {} adv acc -- {}'.format(clean_acc, adv_acc)) ================================================ FILE: experiments/MNIST/pgd40/network.py ================================================ import config from base_model.small_cnn import SmallCNN def create_network(): return SmallCNN() def test(): net = create_network() y = net((torch.randn(1, 1, 28, 28))) print(y.size()) ================================================ FILE: experiments/MNIST/pgd40/train.py ================================================ from config import config, args from dataset import create_train_dataset, create_test_dataset from network import create_network from utils.misc import save_args, save_checkpoint, load_checkpoint from training.train import train_one_epoch, eval_one_epoch import torch import os DEVICE = torch.device('cuda:{}'.format(args.d)) torch.backends.cudnn.benchmark = True net = create_network() net.to(DEVICE) criterion = config.create_loss_function().to(DEVICE) optimizer = config.create_optimizer(net.parameters()) lr_scheduler = config.create_lr_scheduler(optimizer) ds_train = create_train_dataset(args.batch_size) ds_val = create_test_dataset(args.batch_size) TrainAttack = config.create_attack_method(DEVICE) EvalAttack = config.create_evaluation_attack_method(DEVICE) now_epoch = 0 if args.auto_continue: args.resume = os.path.join(config.model_dir, 'last.checkpoint') if args.resume is not None and os.path.isfile(args.resume): now_epoch = load_checkpoint(args.resume, net, optimizer,lr_scheduler) while True: if now_epoch > config.num_epochs: break now_epoch = now_epoch + 1 descrip_str = 'Training epoch:{}/{} -- lr:{}'.format(now_epoch, config.num_epochs, lr_scheduler.get_lr()[0]) train_one_epoch(net, ds_train, optimizer, criterion, DEVICE, descrip_str, TrainAttack, adv_coef = args.adv_coef) if config.val_interval > 0 and now_epoch % config.val_interval == 0: eval_one_epoch(net, ds_val, DEVICE, EvalAttack) lr_scheduler.step() save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name = os.path.join(config.model_dir, 'epoch-{}.checkpoint'.format(now_epoch))) ================================================ FILE: lib/__init__.py ================================================ ================================================ FILE: lib/attack/__init__.py ================================================ from .attack_base import clip_eta ================================================ FILE: lib/attack/attack_base.py ================================================ import torch import numpy as np from abc import ABCMeta, abstractmethod, abstractproperty class AttackBase(metaclass=ABCMeta): @abstractmethod def attack(self, net, inp, label, target = None): ''' :param inp: batched images :param target: specify the indexes of target class, None represents untargeted attack :return: batched adversaril images ''' pass @abstractmethod def to(self, device): pass def clip_eta(eta, norm, eps, DEVICE = torch.device('cuda:0')): ''' helper functions to project eta into epsilon norm ball :param eta: Perturbation tensor (should be of size(N, C, H, W)) :param norm: which norm. should be in [1, 2, np.inf] :param eps: epsilon, bound of the perturbation :return: Projected perturbation ''' assert norm in [1, 2, np.inf], "norm should be in [1, 2, np.inf]" with torch.no_grad(): avoid_zero_div = torch.tensor(1e-12).to(DEVICE) eps = torch.tensor(eps).to(DEVICE) one = torch.tensor(1.0).to(DEVICE) if norm == np.inf: eta = torch.clamp(eta, -eps, eps) else: normalize = torch.norm(eta.reshape(eta.size(0), -1), p = norm, dim = -1, keepdim = False) normalize = torch.max(normalize, avoid_zero_div) normalize.unsqueeze_(dim = -1) normalize.unsqueeze_(dim=-1) normalize.unsqueeze_(dim=-1) factor = torch.min(one, eps / normalize) eta = eta * factor return eta def test_clip(): a = torch.rand((10, 3, 28, 28)).cuda() epss = [0.1, 0.5, 1] norms = [1, 2, np.inf] for e, n in zip(epss, norms): print(e, n) c = clip_eta(a, n, e, True) print(c) if __name__ == '__main__': test_clip() ================================================ FILE: lib/attack/pgd.py ================================================ ''' Reference: [1] Towards Deep Learning Models Resistant to Adversarial Attacks Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, Adrian Vladu arXiv:1706.06083v3 ''' import torch import numpy as np import os import sys father_dir = os.path.join('/', *os.path.realpath(__file__).split(os.path.sep)[:-2]) if not father_dir in sys.path: sys.path.append(father_dir) from attack.attack_base import AttackBase, clip_eta class IPGD(AttackBase): # ImageNet pre-trained mean and std # _mean = torch.tensor(np.array([0.485, 0.456, 0.406]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]) # _std = torch.tensor(np.array([0.229, 0.224, 0.225]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]) # _mean = torch.tensor(np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]) # _std = torch.tensor(np.array([1.0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]) def __init__(self, eps = 6 / 255.0, sigma = 3 / 255.0, nb_iter = 20, norm = np.inf, DEVICE = torch.device('cpu'), mean = torch.tensor(np.array([0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), std = torch.tensor(np.array([1.0]).astype(np.float32)[np.newaxis, :, np.newaxis, np.newaxis]), random_start = True): ''' :param eps: maximum distortion of adversarial examples :param sigma: single step size :param nb_iter: number of attack iterations :param norm: which norm to bound the perturbations ''' self.eps = eps self.sigma = sigma self.nb_iter = nb_iter self.norm = norm self.criterion = torch.nn.CrossEntropyLoss().to(DEVICE) self.DEVICE = DEVICE self._mean = mean.to(DEVICE) self._std = std.to(DEVICE) self.random_start = random_start def single_attack(self, net, inp, label, eta, target = None): ''' Given the original image and the perturbation computed so far, computes a new perturbation. :param net: :param inp: original image :param label: :param eta: perturbation computed so far :return: a new perturbation ''' adv_inp = inp + eta #net.zero_grad() pred = net(adv_inp) if target is not None: targets = torch.sum(pred[:, target]) grad_sign = torch.autograd.grad(targets, adv_in, only_inputs=True, retain_graph = False)[0].sign() else: loss = self.criterion(pred, label) grad_sign = torch.autograd.grad(loss, adv_inp, only_inputs=True, retain_graph = False)[0].sign() adv_inp = adv_inp + grad_sign * (self.sigma / self._std) tmp_adv_inp = adv_inp * self._std + self._mean tmp_inp = inp * self._std + self._mean tmp_adv_inp = torch.clamp(tmp_adv_inp, 0, 1) ## clip into 0-1 #tmp_adv_inp = (tmp_adv_inp - self._mean) / self._std tmp_eta = tmp_adv_inp - tmp_inp tmp_eta = clip_eta(tmp_eta, norm=self.norm, eps=self.eps, DEVICE=self.DEVICE) eta = tmp_eta/ self._std return eta def attack(self, net, inp, label, target = None): if self.random_start: eta = torch.FloatTensor(*inp.shape).uniform_(-self.eps, self.eps) else: eta = torch.zeros_like(inp) eta = eta.to(self.DEVICE) eta = (eta - self._mean) / self._std net.eval() inp.requires_grad = True eta.requires_grad = True for i in range(self.nb_iter): eta = self.single_attack(net, inp, label, eta, target) #print(i) #print(eta.max()) adv_inp = inp + eta tmp_adv_inp = adv_inp * self._std + self._mean tmp_adv_inp = torch.clamp(tmp_adv_inp, 0, 1) adv_inp = (tmp_adv_inp - self._mean) / self._std return adv_inp def to(self, device): self.DEVICE = device self._mean = self._mean.to(device) self._std = self._std.to(device) self.criterion = self.criterion.to(device) def test_IPGD(): pass if __name__ == '__main__': test_IPGD() ================================================ FILE: lib/base_model/__init__.py ================================================ ================================================ FILE: lib/base_model/cifar_resnet18.py ================================================ ''' ResNet in PyTorch.absFor Pre-activation ResNet, see 'preact_resnet.py'. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Deep Residual Learning for Image Recognition. arXiv:1512.03385 Note: cifar_resnet18 constructs the same model with that from https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py ''' import torch import torch.nn as nn import torch.nn.functional as F class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_planes, planes, stride=1): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion*planes) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) out += self.shortcut(x) out = F.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_planes, planes, stride=1): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(self.expansion*planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion*planes) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = F.relu(self.bn2(self.conv2(out))) out = self.bn3(self.conv3(out)) out += self.shortcut(x) out = F.relu(out) return out class ResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(ResNet, self).__init__() self.in_planes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = nn.Linear(512*block.expansion, num_classes) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) return out def cifar_resnet18(*args, **kargs): return ResNet(BasicBlock, [2,2,2,2]) def ResNet34(): return ResNet(BasicBlock, [3,4,6,3]) def ResNet50(): return ResNet(Bottleneck, [3,4,6,3]) def ResNet101(): return ResNet(Bottleneck, [3,4,23,3]) def ResNet152(): return ResNet(Bottleneck, [3,8,36,3]) def test(): net = ResNet18() y = net(torch.randn(1,3,32,32)) print(y.size()) if __name__ == '__main__': test() ================================================ FILE: lib/base_model/network.py ================================================ '''Pre-activation ResNet in PyTorch. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Identity Mappings in Deep Residual Networks. arXiv:1603.05027 ''' import torch import torch.nn as nn import torch.nn.functional as F class PreActBlock(nn.Module): '''Pre-activation version of the BasicBlock.''' expansion = 1 def __init__(self, in_planes, planes, stride=1): super(PreActBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class PreActResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(PreActResNet, self).__init__() self.in_planes = 64 self.other_layers = nn.ModuleList() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.layer_one = self.conv1 self.other_layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.other_layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.other_layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.other_layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = GlobalpoolFC(512 * block.expansion, num_classes) self.other_layers.append(self.linear) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.other_layers.append(layers[-1]) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): x = self.layer_one(x) self.layer_one_out = x self.layer_one_out.requires_grad_() self.layer_one_out.retain_grad() x = self.layer_one_out for layer in self.other_layers: x = layer(x) ''' out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) return out ''' return x class GlobalpoolFC(nn.Module): def __init__(self, num_in, num_class): super(GlobalpoolFC, self).__init__() self.pool = nn.AdaptiveAvgPool2d(output_size=1) self.fc = nn.Linear(num_in, num_class) def forward(self, x): y = self.pool(x) y = y.reshape(y.shape[0], -1) y = self.fc(y) return y def PreActResNet18(): return PreActResNet(PreActBlock, [2, 2, 2, 2]) def PreActResNet34(): return PreActResNet(PreActBlock, [3, 4, 6, 3]) class PreActBottleneck(nn.Module): '''Pre-activation version of the original Bottleneck module.''' expansion = 4 def __init__(self, in_planes, planes, stride=1): super(PreActBottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out = self.conv3(F.relu(self.bn3(out))) out += shortcut return out def create_network(): return PreActResNet18() def test(): net = PreActResNet18() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: lib/base_model/preact_resnet.py ================================================ '''Pre-activation ResNet in PyTorch. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Identity Mappings in Deep Residual Networks. arXiv:1603.05027 ''' import torch import torch.nn as nn import torch.nn.functional as F class PreActBlock(nn.Module): '''Pre-activation version of the BasicBlock.''' expansion = 1 def __init__(self, in_planes, planes, stride=1): super(PreActBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class PreActResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(PreActResNet, self).__init__() self.in_planes = 64 self.layers = nn.ModuleList() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.layers.append(self.conv1) self.is_trainable = [True] self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = GlobalpoolFC(512 * block.expansion, num_classes) self.layers.append(self.linear) self.is_trainable.append(True) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.layers.append(layers[-1]) self.is_trainable.append(True) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): self.inputs = [] self.inputs.append(x) for layer in self.layers: x = layer(x) self.inputs.append(x) ''' out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) return out ''' return x class GlobalpoolFC(nn.Module): def __init__(self, num_in, num_class): super(GlobalpoolFC, self).__init__() self.pool = nn.AdaptiveAvgPool2d(output_size=1) self.fc = nn.Linear(num_in, num_class) def forward(self, x): y = self.pool(x) y = y.reshape(y.shape[0], -1) y = self.fc(y) return y def PreActResNet18(): return PreActResNet(PreActBlock, [2, 2, 2, 2]) def PreActResNet34(): return PreActResNet(PreActBlock, [3, 4, 6, 3]) class PreActBottleneck(nn.Module): '''Pre-activation version of the original Bottleneck module.''' expansion = 4 def __init__(self, in_planes, planes, stride=1): super(PreActBottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out = self.conv3(F.relu(self.bn3(out))) out += shortcut return out def test(): net = PreActResNet18() y = net((torch.randn(1, 3, 32, 32))) print(y.size()) ================================================ FILE: lib/base_model/small_cnn.py ================================================ ''' this code is from https://github.com/yaodongyu/TRADES/blob/master/models/small_cnn.py @article{Zhang2019theoretically, author = {Hongyang Zhang and Yaodong Yu and Jiantao Jiao and Eric P. Xing and Laurent El Ghaoui and Michael I. Jordan}, title = {Theoretically Principled Trade-off between Robustness and Accuracy}, journal = {arXiv preprint arXiv:1901.08573}, year = {2019} } ''' from collections import OrderedDict import torch.nn as nn class SmallCNN(nn.Module): def __init__(self, drop=0.5): super(SmallCNN, self).__init__() self.num_channels = 1 self.num_labels = 10 activ = nn.ReLU(True) self.conv1 = nn.Conv2d(self.num_channels, 32, 3) self.layer_one = nn.Sequential(OrderedDict([ ('conv1', self.conv1), ('relu1', activ),])) self.feature_extractor = nn.Sequential(OrderedDict([ ('conv2', nn.Conv2d(32, 32, 3)), ('relu2', activ), ('maxpool1', nn.MaxPool2d(2, 2)), ('conv3', nn.Conv2d(32, 64, 3)), ('relu3', activ), ('conv4', nn.Conv2d(64, 64, 3)), ('relu4', activ), ('maxpool2', nn.MaxPool2d(2, 2)), ])) self.classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(64 * 4 * 4, 200)), ('relu1', activ), ('drop', nn.Dropout(drop)), ('fc2', nn.Linear(200, 200)), ('relu2', activ), ('fc3', nn.Linear(200, self.num_labels)), ])) self.other_layers = nn.ModuleList() self.other_layers.append(self.feature_extractor) self.other_layers.append(self.classifier) for m in self.modules(): if isinstance(m, (nn.Conv2d)): nn.init.kaiming_normal_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) nn.init.constant_(self.classifier.fc3.weight, 0) nn.init.constant_(self.classifier.fc3.bias, 0) def forward(self, input): y = self.layer_one(input) self.layer_one_out = y self.layer_one_out.requires_grad_() self.layer_one_out.retain_grad() features = self.feature_extractor(y) logits = self.classifier(features.view(-1, 64 * 4 * 4)) return logits def create_network(): return SmallCNN() def test(): net = create_network() y = net((torch.randn(1, 1, 28, 28))) print(y.size()) ================================================ FILE: lib/base_model/wide_resnet.py ================================================ ''' This code is from https://github.com/yaodongyu/TRADES/blob/master/models/wideresnet.py/ @article{Zhang2019theoretically, author = {Hongyang Zhang and Yaodong Yu and Jiantao Jiao and Eric P. Xing and Laurent El Ghaoui and Michael I. Jordan}, title = {Theoretically Principled Trade-off between Robustness and Accuracy}, journal = {arXiv preprint arXiv:1901.08573}, year = {2019} } ''' import torch import torch.nn as nn import torch.nn.functional as F import math class BasicBlock(nn.Module): def __init__(self, in_planes, out_planes, stride, dropRate=0.0): super(BasicBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.relu1 = nn.ReLU(inplace=True) self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_planes) self.relu2 = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False) self.droprate = dropRate self.equalInOut = (in_planes == out_planes) self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) or None def forward(self, x): if not self.equalInOut: x = self.relu1(self.bn1(x)) else: out = self.relu1(self.bn1(x)) out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) if self.droprate > 0: out = F.dropout(out, p=self.droprate, training=self.training) out = self.conv2(out) return torch.add(x if self.equalInOut else self.convShortcut(x), out) class NetworkBlock(nn.Module): def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0): super(NetworkBlock, self).__init__() self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate) def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate): layers = [] for i in range(int(nb_layers)): layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate)) return nn.Sequential(*layers) def forward(self, x): return self.layer(x) class WideResNet(nn.Module): def __init__(self, depth=28, num_classes=10, widen_factor=10, dropRate=0.0): super(WideResNet, self).__init__() nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor] assert ((depth - 4) % 6 == 0) n = (depth - 4) / 6 block = BasicBlock # 1st conv before any network block self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, padding=1, bias=False) self.layer_one = self.conv1 self.other_layers = nn.ModuleList() # 1st block self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate) self.other_layers.append(self.block1) # 1st sub-block self.sub_block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate) self.other_layers.append(self.sub_block1) # 2nd block self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate) self.other_layers.append(self.block2) # 3rd block self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate) self.other_layers.append(self.block3) # global average pooling and classifier self.bn1 = nn.BatchNorm2d(nChannels[3]) self.other_layers.append(self.bn1) self.relu = nn.ReLU(inplace=True) self.fc = nn.Linear(nChannels[3], num_classes) self.other_layers.append(self.fc) self.nChannels = nChannels[3] ''' for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_() ''' def forward(self, x, ret_cls1=True): out = self.conv1(x) self.layer_one_out = out self.layer_one_out.requires_grad_() self.layer_one_out.retain_grad() out = self.block1(out) out = self.block2(out) out = self.block3(out) out = self.relu(self.bn1(out)) out = F.avg_pool2d(out, 8) out = out.view(-1, self.nChannels) y = self.fc(out) return y def create_network(): net = WideResNet() return net if __name__ == '__main__': net = create_network() print(net) ================================================ FILE: lib/training/__init__.py ================================================ ================================================ FILE: lib/training/config.py ================================================ from abc import ABCMeta, abstractproperty, abstractmethod from typing import Tuple, List, Dict import os import sys import torch class TrainingConfigBase(metaclass=ABCMeta): ''' Base class for training ''' # directory handling @property def abs_current_dir(self): return os.path.realpath('./') @property def log_dir(self): if not os.path.exists('./log'): os.mkdir('./log') return os.path.join(self.abs_current_dir, 'log') @property def model_dir(self): log_dir = self.log_dir model_dir = os.path.join(log_dir, 'models') #print(model_dir) if not os.path.exists(model_dir): os.mkdir(model_dir) return model_dir @abstractproperty def lib_dir(self): pass # training setting @abstractproperty def num_epochs(self): pass @property def val_interval(self): ''' Specify how many epochs between two validation steps Return <= 0 means no validation phase ''' return 0 @abstractmethod def create_optimizer(self, params) -> torch.optim.Optimizer: ''' params (iterable): iterable of parameters to optimize or dicts defining parameter groups ''' pass @abstractmethod def create_lr_scheduler(self, optimizer:torch.optim.Optimizer) -> torch.optim.lr_scheduler._LRScheduler: pass @abstractmethod def create_loss_function(self) -> torch.nn.modules.loss._Loss: pass def create_attack_method(self, *inputs): ''' Perform adversarial training against xxx adversary Return None means natural training ''' return None # Evaluation Setting def create_evaluation_attack_method(self, *inputs): ''' evaluating the robustness of model against xxx adversary Return None means only measuring clean accuracy ''' return None class SGDOptimizerMaker(object): def __init__(self, lr = 0.1, momentum = 0.9, weight_decay = 1e-4): self.lr = lr self.momentum = momentum self.weight_decay = weight_decay def __call__(self, params): return torch.optim.SGD(params, lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) class PieceWiseConstantLrSchedulerMaker(object): def __init__(self, milestones:List[int], gamma:float = 0.1): self.milestones = milestones self.gamma = gamma def __call__(self, optimizer): return torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.milestones, gamma=self.gamma) class IPGDAttackMethodMaker(object): def __init__(self, eps, sigma, nb_iters, norm, mean, std): self.eps = eps self.sigma = sigma self.nb_iters = nb_iters self.norm = norm self.mean = mean self.std = std def __call__(self, DEVICE): father_dir = os.path.join('/', *os.path.realpath(__file__).split(os.path.sep)[:-2]) # print(father_dir) if not father_dir in sys.path: sys.path.append(father_dir) from attack.pgd import IPGD return IPGD(self.eps, self.sigma, self.nb_iters, self.norm, DEVICE, self.mean, self.std) class LambdaLrSchedulerMaker(object): def __init__(self, func, last_epoch = -1): assert callable(func) self.func = func self.last_epoch = last_epoch def __call__(self, parameters): from torch.optim.lr_scheduler import LambdaLR lr_schduler = LambdaLR(parameters, self.func, self.last_epoch) return lr_schduler ================================================ FILE: lib/training/train.py ================================================ import os import sys father_dir = os.path.join('/', *os.path.realpath(__file__).split(os.path.sep)[:-2]) #print(father_dir) if not father_dir in sys.path: sys.path.append(father_dir) from utils.misc import torch_accuracy, AvgMeter from collections import OrderedDict import torch from tqdm import tqdm def train_one_epoch(net, batch_generator, optimizer, criterion, DEVICE=torch.device('cuda:0'), descrip_str='Training', AttackMethod = None, adv_coef = 1.0): ''' :param attack_freq: Frequencies of training with adversarial examples. -1 indicates natural training :param AttackMethod: the attack method, None represents natural training :return: None #(clean_acc, adv_acc) ''' net.train() pbar = tqdm(batch_generator) advacc = -1 advloss = -1 cleanacc = -1 cleanloss = -1 pbar.set_description(descrip_str) for i, (data, label) in enumerate(pbar): data = data.to(DEVICE) label = label.to(DEVICE) optimizer.zero_grad() pbar_dic = OrderedDict() TotalLoss = 0 if AttackMethod is not None: adv_inp = AttackMethod.attack(net, data, label) optimizer.zero_grad() net.train() pred = net(adv_inp) loss = criterion(pred, label) acc = torch_accuracy(pred, label, (1,)) advacc = acc[0].item() advloss = loss.item() #TotalLoss = TotalLoss + loss * adv_coef (loss * adv_coef).backward() pred = net(data) loss = criterion(pred, label) #TotalLoss = TotalLoss + loss loss.backward() #TotalLoss.backward() #param = next(net.parameters()) #grad_mean = torch.mean(param.grad) optimizer.step() acc = torch_accuracy(pred, label, (1,)) cleanacc = acc[0].item() cleanloss = loss.item() #pbar_dic['grad'] = '{}'.format(grad_mean) pbar_dic['Acc'] = '{:.2f}'.format(cleanacc) pbar_dic['loss'] = '{:.2f}'.format(cleanloss) pbar_dic['AdvAcc'] = '{:.2f}'.format(advacc) pbar_dic['Advloss'] = '{:.2f}'.format(advloss) pbar.set_postfix(pbar_dic) def eval_one_epoch(net, batch_generator, DEVICE=torch.device('cuda:0'), AttackMethod = None): net.eval() pbar = tqdm(batch_generator) clean_accuracy = AvgMeter() adv_accuracy = AvgMeter() pbar.set_description('Evaluating') for (data, label) in pbar: data = data.to(DEVICE) label = label.to(DEVICE) with torch.no_grad(): pred = net(data) acc = torch_accuracy(pred, label, (1,)) clean_accuracy.update(acc[0].item()) if AttackMethod is not None: adv_inp = AttackMethod.attack(net, data, label) with torch.no_grad(): pred = net(adv_inp) acc = torch_accuracy(pred, label, (1,)) adv_accuracy.update(acc[0].item()) pbar_dic = OrderedDict() pbar_dic['CleanAcc'] = '{:.2f}'.format(clean_accuracy.mean) pbar_dic['AdvAcc'] = '{:.2f}'.format(adv_accuracy.mean) pbar.set_postfix(pbar_dic) adv_acc = adv_accuracy.mean if AttackMethod is not None else 0 return clean_accuracy.mean, adv_acc ================================================ FILE: lib/utils/__init__.py ================================================ ================================================ FILE: lib/utils/misc.py ================================================ import math import os from typing import Tuple, List, Dict import torch def torch_accuracy(output, target, topk=(1,)) -> List[torch.Tensor]: ''' param output, target: should be torch Variable ''' # assert isinstance(output, torch.cuda.Tensor), 'expecting Torch Tensor' # assert isinstance(target, torch.Tensor), 'expecting Torch Tensor' # print(type(output)) topn = max(topk) batch_size = output.size(0) _, pred = output.topk(topn, 1, True, True) pred = pred.t() is_correct = pred.eq(target.view(1, -1).expand_as(pred)) ans = [] for i in topk: is_correct_i = is_correct[:i].view(-1).float().sum(0, keepdim=True) ans.append(is_correct_i.mul_(100.0 / batch_size)) return ans class AvgMeter(object): ''' Computing mean ''' name = 'No name' def __init__(self, name='No name'): self.name = name self.reset() def reset(self): self.sum = 0 self.mean = 0 self.num = 0 self.now = 0 def update(self, mean_var, count=1): if math.isnan(mean_var): mean_var = 1e6 print('Avgmeter getting Nan!') self.now = mean_var self.num += count self.sum += mean_var * count self.mean = float(self.sum) / self.num def save_args(args, save_dir = None): if save_dir == None: param_path = os.path.join(args.resume, "params.json") else: param_path = os.path.join(save_dir, 'params.json') #logger.info("[*] MODEL dir: %s" % args.resume) #logger.info("[*] PARAM path: %s" % param_path) with open(param_path, 'w') as fp: json.dump(args.__dict__, fp, indent=4, sort_keys=True) def mkdir(path): if not os.path.exists(path): print('creating dir {}'.format(path)) os.mkdir(path) def save_checkpoint(now_epoch, net, optimizer, lr_scheduler, file_name): checkpoint = {'epoch': now_epoch, 'state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'lr_scheduler_state_dict':lr_scheduler.state_dict()} if os.path.exists(file_name): print('Overwriting {}'.format(file_name)) torch.save(checkpoint, file_name) link_name = os.path.join('/', *file_name.split(os.path.sep)[:-1], 'last.checkpoint') #print(link_name) make_symlink(source = file_name, link_name=link_name) def load_checkpoint(file_name, net = None, optimizer = None, lr_scheduler = None): if os.path.isfile(file_name): print("=> loading checkpoint '{}'".format(file_name)) check_point = torch.load(file_name) if net is not None: print('Loading network state dict') net.load_state_dict(check_point['state_dict']) if optimizer is not None: print('Loading optimizer state dict') optimizer.load_state_dict(check_point['optimizer_state_dict']) if lr_scheduler is not None: print('Loading lr_scheduler state dict') lr_scheduler.load_state_dict(check_point['lr_scheduler_state_dict']) return check_point['epoch'] else: print("=> no checkpoint found at '{}'".format(file_name)) def make_symlink(source, link_name): ''' Note: overwriting enabled! ''' if os.path.exists(link_name): #print("Link name already exist! Removing '{}' and overwriting".format(link_name)) os.remove(link_name) if os.path.exists(source): os.symlink(source, link_name) return else: print('Source path not exists') #print('SymLink Wrong!') def add_path(path): if path not in sys.path: print('Adding {}'.format(path)) sys.path.append(path)