Repository: PatrickZH/DeepCore Branch: main Commit: 74ec709f5ccb Files: 49 Total size: 192.7 KB Directory structure: gitextract_96mbtddl/ ├── LICENSE.md ├── README.md ├── deepcore/ │ ├── __init__.py │ ├── datasets/ │ │ ├── __init__.py │ │ ├── cifar10.py │ │ ├── cifar100.py │ │ ├── fashionmnist.py │ │ ├── imagenet.py │ │ ├── mnist.py │ │ ├── qmnist.py │ │ ├── svhn.py │ │ └── tinyimagenet.py │ ├── methods/ │ │ ├── __init__.py │ │ ├── cal.py │ │ ├── contextualdiversity.py │ │ ├── coresetmethod.py │ │ ├── craig.py │ │ ├── deepfool.py │ │ ├── earlytrain.py │ │ ├── forgetting.py │ │ ├── full.py │ │ ├── glister.py │ │ ├── gradmatch.py │ │ ├── grand.py │ │ ├── herding.py │ │ ├── kcentergreedy.py │ │ ├── methods_utils/ │ │ │ ├── __init__.py │ │ │ ├── cossim.py │ │ │ ├── euclidean.py │ │ │ ├── submodular_function.py │ │ │ └── submodular_optimizer.py │ │ ├── submodular.py │ │ ├── uncertainty.py │ │ └── uniform.py │ └── nets/ │ ├── __init__.py │ ├── alexnet.py │ ├── inceptionv3.py │ ├── lenet.py │ ├── mlp.py │ ├── mobilenetv3.py │ ├── nets_utils/ │ │ ├── __init__.py │ │ ├── parallel.py │ │ └── recorder.py │ ├── resnet.py │ ├── vgg.py │ └── wideresnet.py ├── main.py ├── requirements.txt └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE.md ================================================ MIT License Copyright (c) 2023 ZHAO, BO Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # DeepCore: A Comprehensive Library for Coreset Selection in Deep Learning [PDF](https://arxiv.org/pdf/2204.08499.pdf) ### Introduction To advance the research of coreset selection in deep learning, we contribute a code library named **DeepCore**, an extensive and extendable code library, for coreset selection in deep learning, reproducing dozens of popular and advanced coreset selection methods and enabling a fair comparison of different methods in the same experimental settings. **DeepCore** is highly modular, allowing to add new architectures, datasets, methods and learning scenarios easily. It is built on PyTorch. ### Coreset Methods We list the methods in DeepCore according to the categories in our original paper, they are 1) geometry based methods Contextual Diversity (CD), Herding and k-Center Greedy; 2) uncertainty scores; 3) error based methods Forgetting and GraNd score ; 4) decision boundary based methods Cal and DeepFool ; 5) gradient matching based methods Craig and GradMatch ; 6) bilevel optimiza- tion methods Glister ; and 7) Submodularity based Methods (GC) and Facility Location (FL) functions. we also have Random selection as the baseline. ### Datasets It contains a series of other popular computer vision datasets, namely MNIST, QMNIST, FashionMNIST, SVHN, CIFAR10, CIFAR100 and TinyImageNet and ImageNet. ### Models They are two-layer fully connected MLP, LeNet , AlexNet, VGG, Inception-v3, ResNet, WideResNet and MobileNet-v3. ### Example Selecting with Glister and training on the coreset with fraction 0.1. ```sh CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.1 --dataset CIFAR10 --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Glister --model InceptionV3 --lr 0.1 -sp ./result --batch 128 ``` Resuming interuppted training with argument ```--resume```. ```sh CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.1 --dataset CIFAR10 --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Glister --model InceptionV3 --lr 0.1 -sp ./result --batch 128 --resume "CIFAR10_InceptionV3_Glister_exp0_epoch200_2022-02-05 21:31:53.762903_0.1_unknown.ckpt" ``` Batch size can be seperatedly assigned for both selection and training. ```sh CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.5 --dataset ImageNet --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Cal --model MobileNetV3Large --lr 0.1 -sp ./result -tb 256 -sb 128 ``` Argument ```--uncertainty``` to choose uncertainty scores. ```sh CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.1 --dataset CIFAR10 --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Uncertainty --model ResNet18 --lr 0.1 -sp ./result --batch 128 --uncertainty Entropy ``` Argument ```--submodular``` to choose submodular function, e.g. ```GraphCut```, ```FacilityLocation``` or ```LogDeterminant```. You may also specify the type of greedy algorithm to use when maximizing functions with argument ```--submodular_greedy```, for example ```NaiveGreedy```, ```LazyGreedy```, ```StochasticGreedy```, etc. ```sh CUDA_VISIBLE_DEVICES=0 python -u main.py --fraction 0.1 --dataset CIFAR10 --data_path ~/datasets --num_exp 5 --workers 10 --optimizer SGD -se 10 --selection Submodular --model ResNet18 --lr 0.1 -sp ./result --batch 128 --submodular GraphCut --submodular_greedy NaiveGreedy ``` ### Extend DeepCore is highly modular and scalable. It allows to add new architectures, datasets and selection methods easily, to help coreset methods to be evaluated in a richer set of scenarios, and also to facilitate new methods for comparison. Here is an example for datasets. To add a new dataset, you need implement a function whose input is the data path and outputs are number of channels, size of image, number of classes, names of classes, mean, std and training and testing dataset inherited from ```torch.utils.data.Dataset```. ```python from torchvision import datasets, transforms def MNIST(data_path): channel = 1 im_size = (28, 28) num_classes = 10 mean = [0.1307] std = [0.3081] transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) dst_train = datasets.MNIST(data_path, train=True, download=True, transform=transform) dst_test = datasets.MNIST(data_path, train=False, download=True, transform=transform) class_names = [str(c) for c in range(num_classes)] return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test ``` This is an example for implementing network architecture. ```python import torch.nn as nn import torch.nn.functional as F from torch import set_grad_enabled from .nets_utils import EmbeddingRecorder class MLP(nn.Module): def __init__(self, channel, num_classes, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): if pretrained: raise NotImplementedError("torchvison pretrained models not available.") super(MLP, self).__init__() self.fc_1 = nn.Linear(im_size[0] * im_size[1] * channel, 128) self.fc_2 = nn.Linear(128, 128) self.fc_3 = nn.Linear(128, num_classes) self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad def get_last_layer(self): return self.fc_3 def forward(self, x): with set_grad_enabled(not self.no_grad): out = x.view(x.size(0), -1) out = F.relu(self.fc_1(out)) out = F.relu(self.fc_2(out)) out = self.embedding_recorder(out) out = self.fc_3(out) return out ``` To implement the new coreset method, you need to inherit the new method from the ```CoresetMethod``` class and return the selected indices via the ```select``` method. ```python class CoresetMethod(object): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, **kwargs): if fraction <= 0.0 or fraction > 1.0: raise ValueError("Illegal Coreset Size.") self.dst_train = dst_train self.num_classes = len(dst_train.classes) self.fraction = fraction self.random_seed = random_seed self.index = [] self.args = args self.n_train = len(dst_train) self.coreset_size = round(self.n_train * fraction) def select(self, **kwargs): return ``` ### References 1. Agarwal, S., Arora, H., Anand, S., Arora, C.: Contextual diversity for active learning. In: ECCV. pp. 137–153. Springer (2020) 2. Coleman, C., Yeh, C., Mussmann, S., Mirzasoleiman, B., Bailis, P., Liang, P., Leskovec, J., Zaharia, M.: Selection via proxy: Efficient data selection for deep learning. In: ICLR (2019) 3. Ducoffe, M., Precioso, F.: Adversarial active learning for deep networks: a margin based approach. arXiv preprint arXiv:1802.09841 (2018) 4. Iyer, R., Khargoankar, N., Bilmes, J., Asanani, H.: Submodular combinatorial information measures with applications in machine learning. In: Algorithmic Learning Theory. pp. 722–754. PMLR (2021) 5. Killamsetty, K., Durga, S., Ramakrishnan, G., De, A., Iyer, R.: Grad-match: Gradient matching based data subset selection for efficient deep model training. In: ICML. pp. 5464–5474 (2021) 6. Killamsetty, K., Sivasubramanian, D., Ramakrishnan, G., Iyer, R.: Glister: Generalization based data subset selection for efficient and robust learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (2021) 7. Margatina, K., Vernikos, G., Barrault, L., Aletras, N.: Active learning by acquiring contrastive examples. arXiv preprint arXiv:2109.03764 (2021) 8. Mirzasoleiman, B., Bilmes, J., Leskovec, J.: Coresets for data-efficient training of machine learning models. In: ICML. PMLR (2020) 9. Paul, M., Ganguli, S., Dziugaite, G.K.: Deep learning on a data diet: Finding important examples early in training. arXiv preprint arXiv:2107.07075 (2021) 10. Sener, O., Savarese, S.: Active learning for convolutional neural networks: A coreset approach. In: ICLR (2018) 11. Toneva, M., Sordoni, A., des Combes, R.T., Trischler, A., Bengio, Y., Gordon, G.J.: An empirical study of example forgetting during deep neural network learning. In: ICLR (2018) 12. Welling, M.: Herding dynamical weights to learn. In: Proceedings of the 26th Annual International Conference on Machine Learning. pp. 1121–1128 (2009) ================================================ FILE: deepcore/__init__.py ================================================ # __init__.py ================================================ FILE: deepcore/datasets/__init__.py ================================================ from .cifar10 import * from .cifar100 import * from .fashionmnist import * from .imagenet import * from .mnist import * from .qmnist import * from .svhn import * from .tinyimagenet import * ================================================ FILE: deepcore/datasets/cifar10.py ================================================ from torchvision import datasets, transforms from torch import tensor, long def CIFAR10(data_path): channel = 3 im_size = (32, 32) num_classes = 10 mean = [0.4914, 0.4822, 0.4465] std = [0.2470, 0.2435, 0.2616] transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) dst_train = datasets.CIFAR10(data_path, train=True, download=True, transform=transform) dst_test = datasets.CIFAR10(data_path, train=False, download=True, transform=transform) class_names = dst_train.classes dst_train.targets = tensor(dst_train.targets, dtype=long) dst_test.targets = tensor(dst_test.targets, dtype=long) return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test ================================================ FILE: deepcore/datasets/cifar100.py ================================================ from torchvision import datasets, transforms from torch import tensor, long def CIFAR100(data_path): channel = 3 im_size = (32, 32) num_classes = 100 mean = [0.5071, 0.4865, 0.4409] std = [0.2673, 0.2564, 0.2762] transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) dst_train = datasets.CIFAR100(data_path, train=True, download=True, transform=transform) dst_test = datasets.CIFAR100(data_path, train=False, download=True, transform=transform) class_names = dst_train.classes dst_train.targets = tensor(dst_train.targets, dtype=long) dst_test.targets = tensor(dst_test.targets, dtype=long) return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test ================================================ FILE: deepcore/datasets/fashionmnist.py ================================================ from torchvision import datasets, transforms def FashionMNIST(data_path): channel = 1 im_size = (28, 28) num_classes = 10 mean = [0.2861] std = [0.3530] transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) dst_train = datasets.FashionMNIST(data_path, train=True, download=True, transform=transform) dst_test = datasets.FashionMNIST(data_path, train=False, download=True, transform=transform) class_names = dst_train.classes return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test ================================================ FILE: deepcore/datasets/imagenet.py ================================================ from torchvision import datasets, transforms from torch import tensor, long def ImageNet(data_path): channel = 3 im_size = (224, 224) num_classes = 1000 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = transforms.Normalize(mean, std) dst_train = datasets.ImageNet(data_path, split="train", transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) dst_test = datasets.ImageNet(data_path, split="val", transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) class_names = dst_train.classes dst_train.targets = tensor(dst_train.targets, dtype=long) dst_test.targets = tensor(dst_test.targets, dtype=long) return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test ================================================ FILE: deepcore/datasets/mnist.py ================================================ from torchvision import datasets, transforms import numpy as np def MNIST(data_path, permuted=False, permutation_seed=None): channel = 1 im_size = (28, 28) num_classes = 10 mean = [0.1307] std = [0.3081] transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) if permuted: np.random.seed(permutation_seed) pixel_permutation = np.random.permutation(28 * 28) transform = transforms.Compose( [transform, transforms.Lambda(lambda x: x.view(-1, 1)[pixel_permutation].view(1, 28, 28))]) dst_train = datasets.MNIST(data_path, train=True, download=True, transform=transform) dst_test = datasets.MNIST(data_path, train=False, download=True, transform=transform) class_names = [str(c) for c in range(num_classes)] return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test def permutedMNIST(data_path, permutation_seed=None): return MNIST(data_path, True, permutation_seed) ================================================ FILE: deepcore/datasets/qmnist.py ================================================ from torchvision import datasets, transforms def QMNIST(data_path): channel = 1 im_size = (28, 28) num_classes = 10 mean = [0.1308] std = [0.3088] transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) dst_train = datasets.QMNIST(data_path, train=True, download=True, transform=transform) dst_test = datasets.QMNIST(data_path, train=False, download=True, transform=transform) class_names = [str(c) for c in range(num_classes)] dst_train.targets = dst_train.targets[:, 0] dst_test.targets = dst_test.targets[:, 0] dst_train.compat = False dst_test.compat = False return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test ================================================ FILE: deepcore/datasets/svhn.py ================================================ from torchvision import datasets, transforms from torch import tensor, long def SVHN(data_path): channel = 3 im_size = (32, 32) num_classes = 10 mean = [0.4377, 0.4438, 0.4728] std = [0.1980, 0.2010, 0.1970] transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) dst_train = datasets.SVHN(data_path, split='train', download=True, transform=transform) dst_test = datasets.SVHN(data_path, split='test', download=True, transform=transform) class_names = [str(c) for c in range(num_classes)] dst_train.classes = list(class_names) dst_test.classes = list(class_names) dst_train.targets = tensor(dst_train.labels, dtype=long) dst_test.targets = tensor(dst_test.labels, dtype=long) return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test ================================================ FILE: deepcore/datasets/tinyimagenet.py ================================================ from torchvision import datasets, transforms import os import requests import zipfile def TinyImageNet(data_path, downsize=True): if not os.path.exists(os.path.join(data_path, "tiny-imagenet-200")): url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip" # 248MB print("Downloading Tiny-ImageNet") r = requests.get(url, stream=True) with open(os.path.join(data_path, "tiny-imagenet-200.zip"), "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) print("Unziping Tiny-ImageNet") with zipfile.ZipFile(os.path.join(data_path, "tiny-imagenet-200.zip")) as zf: zf.extractall(path=data_path) channel = 3 im_size = (32, 32) if downsize else (64, 64) num_classes = 200 mean = (0.4802, 0.4481, 0.3975) std = (0.2770, 0.2691, 0.2821) transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) if downsize: transform = transforms.Compose([transforms.Resize(32), transform]) dst_train = datasets.ImageFolder(root=os.path.join(data_path, 'tiny-imagenet-200/train'), transform=transform) dst_test = datasets.ImageFolder(root=os.path.join(data_path, 'tiny-imagenet-200/test'), transform=transform) class_names = dst_train.classes return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test ================================================ FILE: deepcore/methods/__init__.py ================================================ from .cal import * from .contextualdiversity import * from .coresetmethod import * from .craig import * from .deepfool import * from .earlytrain import * from .forgetting import * from .full import * from .glister import * from .grand import * from .gradmatch import * from .herding import * from .kcentergreedy import * from .submodular import * from .uncertainty import * from .uniform import * ================================================ FILE: deepcore/methods/cal.py ================================================ from .earlytrain import EarlyTrain from .methods_utils.euclidean import euclidean_dist_pair_np from .methods_utils.cossim import cossim_pair_np import numpy as np import torch from .. import nets from copy import deepcopy from torchvision import transforms class Cal(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True, metric="euclidean", neighbors: int = 10, pretrain_model: str = "ResNet18", **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) self.balance = balance assert neighbors > 0 and neighbors < 100 self.neighbors = neighbors if metric == "euclidean": self.metric = euclidean_dist_pair_np elif metric == "cossim": self.metric = lambda a, b: -1. * cossim_pair_np(a, b) elif callable(metric): self.metric = metric else: self.metric = euclidean_dist_pair_np self.pretrain_model = pretrain_model def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) def find_knn(self): """ Find k-nearest-neighbor data points with the pretrained embedding model :return: knn matrix """ # Initialize pretrained model model = nets.__dict__[self.pretrain_model](channel=self.args.channel, num_classes=self.args.num_classes, im_size=(224, 224), record_embedding=True, no_grad=True, pretrained=True).to(self.args.device) model.eval() # Resize dst_train to 224*224 if self.args.im_size[0] != 224 or self.args.im_size[1] != 224: dst_train = deepcopy(self.dst_train) dst_train.transform = transforms.Compose([dst_train.transform, transforms.Resize(224)]) else: dst_train = self.dst_train # Calculate the distance matrix and return knn results if self.balance: knn = [] for c in range(self.args.num_classes): class_index = np.arange(self.n_train)[self.dst_train.targets == c] # Start recording embedding vectors embdeddings = [] batch_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(dst_train, class_index), batch_size=self.args.selection_batch, num_workers=self.args.workers) batch_num = len(batch_loader) for i, (aa, _) in enumerate(batch_loader): if i % self.args.print_freq == 0: print("| Caculating embeddings for batch [%3d/%3d]" % (i + 1, batch_num)) model(aa.to(self.args.device)) embdeddings.append(model.embedding_recorder.embedding.flatten(1).cpu().numpy()) embdeddings = np.concatenate(embdeddings, axis=0) knn.append(np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)]) return knn else: # Start recording embedding vectors embdeddings = [] batch_loader = torch.utils.data.DataLoader(dst_train, batch_size=self.args.selection_batch ,num_workers=self.args.workers) batch_num = len(batch_loader) for i, (aa, _) in enumerate(batch_loader): if i % self.args.print_freq == 0: print("| Caculating embeddings for batch [%3d/%3d]" % (i + 1, batch_num)) model(aa.to(self.args.device)) embdeddings.append(model.embedding_recorder.embedding.flatten(1).cpu().numpy()) embdeddings = np.concatenate(embdeddings, axis=0) return np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)] def calc_kl(self, knn, index=None): self.model.eval() self.model.no_grad = True sample_num = self.n_train if index is None else len(index) probs = np.zeros([sample_num, self.args.num_classes]) batch_loader = torch.utils.data.DataLoader( self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) batch_num = len(batch_loader) for i, (inputs, _) in enumerate(batch_loader): probs[i * self.args.selection_batch:(i + 1) * self.args.selection_batch] = torch.nn.functional.softmax( self.model(inputs.to(self.args.device)), dim=1).detach().cpu() s = np.zeros(sample_num) for i in range(0, sample_num, self.args.selection_batch): if i % self.args.print_freq == 0: print("| Caculating KL-divergence for batch [%3d/%3d]" % (i // self.args.selection_batch + 1, batch_num)) aa = np.expand_dims(probs[i:(i + self.args.selection_batch)], 1).repeat(self.neighbors, 1) bb = probs[knn[i:(i + self.args.selection_batch)], :] s[i:(i + self.args.selection_batch)] = np.mean( np.sum(0.5 * aa * np.log(aa / bb) + 0.5 * bb * np.log(bb / aa), axis=2), axis=1) self.model.no_grad = False return s def finish_run(self): scores=[] if self.balance: selection_result = np.array([], dtype=np.int32) for c, knn in zip(range(self.args.num_classes), self.knn): class_index = np.arange(self.n_train)[self.dst_train.targets == c] scores.append(self.calc_kl(knn, class_index)) selection_result = np.append(selection_result, class_index[np.argsort( #self.calc_kl(knn, class_index))[::1][:round(self.fraction * len(class_index))]]) scores[-1])[::1][:round(self.fraction * len(class_index))]]) else: selection_result = np.argsort(self.calc_kl(self.knn))[::1][:self.coreset_size] return {"indices": selection_result, "scores":scores} def select(self, **kwargs): self.knn = self.find_knn() selection_result = self.run() return selection_result ================================================ FILE: deepcore/methods/contextualdiversity.py ================================================ from .kcentergreedy import kCenterGreedy import torch # Acknowlegement to: # https://github.com/sharat29ag/CDAL class ContextualDiversity(kCenterGreedy): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True, already_selected=[], torchvision_pretrain: bool = False, **kwargs): super(ContextualDiversity, self).__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, balance=balance, already_selected=already_selected, torchvision_pretrain=torchvision_pretrain, **kwargs) self.metric = self._metric def _metric(self, a_output, b_output): with torch.no_grad(): # Overload self.metric function for kCenterGreedy Algorithm aa = a_output.view(a_output.shape[0], 1, a_output.shape[1]).repeat(1, b_output.shape[0], 1) bb = b_output.view(1, b_output.shape[0], b_output.shape[1]).repeat(a_output.shape[0], 1, 1) return torch.sum(0.5 * aa * torch.log(aa / bb) + 0.5 * bb * torch.log(bb / aa), dim=2) def construct_matrix(self, index=None): self.model.eval() self.model.no_grad = True sample_num = self.n_train if index is None else len(index) matrix = torch.zeros([sample_num, self.args.num_classes], requires_grad=False).to(self.args.device) batch_loader = torch.utils.data.DataLoader(self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch ,num_workers=self.args.workers) for i, (inputs, _) in enumerate(batch_loader): matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = torch.nn.functional.softmax(self.model(inputs.to(self.args.device)), dim=1) self.model.no_grad = False return matrix ================================================ FILE: deepcore/methods/coresetmethod.py ================================================ class CoresetMethod(object): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, **kwargs): if fraction <= 0.0 or fraction > 1.0: raise ValueError("Illegal Coreset Size.") self.dst_train = dst_train self.num_classes = len(dst_train.classes) self.fraction = fraction self.random_seed = random_seed self.index = [] self.args = args self.n_train = len(dst_train) self.coreset_size = round(self.n_train * fraction) def select(self, **kwargs): return ================================================ FILE: deepcore/methods/craig.py ================================================ from .earlytrain import EarlyTrain import torch from .methods_utils import FacilityLocation, submodular_optimizer import numpy as np from .methods_utils.euclidean import euclidean_dist_pair_np from ..nets.nets_utils import MyDataParallel class Craig(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True, greedy="LazyGreedy", **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) if greedy not in submodular_optimizer.optimizer_choices: raise ModuleNotFoundError("Greedy optimizer not found.") self._greedy = greedy self.balance = balance def before_train(self): pass def after_loss(self, outputs, loss, targets, batch_inds, epoch): pass def before_epoch(self): pass def after_epoch(self): pass def before_run(self): pass def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) def calc_gradient(self, index=None): self.model.eval() batch_loader = torch.utils.data.DataLoader( self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) sample_num = len(self.dst_val.targets) if index is None else len(index) self.embedding_dim = self.model.get_last_layer().in_features gradients = [] for i, (input, targets) in enumerate(batch_loader): self.model_optimizer.zero_grad() outputs = self.model(input.to(self.args.device)) loss = self.criterion(outputs.requires_grad_(True), targets.to(self.args.device)).sum() batch_num = targets.shape[0] with torch.no_grad(): bias_parameters_grads = torch.autograd.grad(loss, outputs)[0] weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1, self.embedding_dim).repeat(1, self.args.num_classes, 1) * bias_parameters_grads.view( batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim) gradients.append( torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu().numpy()) gradients = np.concatenate(gradients, axis=0) self.model.train() return euclidean_dist_pair_np(gradients) def calc_weights(self, matrix, result): min_sample = np.argmax(matrix[result], axis=0) weights = np.ones(np.sum(result) if result.dtype == bool else len(result)) for i in min_sample: weights[i] = weights[i] + 1 return weights def finish_run(self): if isinstance(self.model, MyDataParallel): self.model = self.model.module self.model.no_grad = True with self.model.embedding_recorder: if self.balance: # Do selection by class selection_result = np.array([], dtype=np.int32) weights = np.array([]) for c in range(self.args.num_classes): class_index = np.arange(self.n_train)[self.dst_train.targets == c] matrix = -1. * self.calc_gradient(class_index) matrix -= np.min(matrix) - 1e-3 submod_function = FacilityLocation(index=class_index, similarity_matrix=matrix) submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=class_index, budget=round(self.fraction * len( class_index))) class_result = submod_optimizer.select(gain_function=submod_function.calc_gain, update_state=submod_function.update_state) selection_result = np.append(selection_result, class_result) weights = np.append(weights, self.calc_weights(matrix, np.isin(class_index, class_result))) else: matrix = np.zeros([self.n_train, self.n_train]) all_index = np.arange(self.n_train) for c in range(self.args.num_classes): # Sparse Matrix class_index = np.arange(self.n_train)[self.dst_train.targets == c] matrix[np.ix_(class_index, class_index)] = -1. * self.calc_gradient(class_index) matrix[np.ix_(class_index, class_index)] -= np.min(matrix[np.ix_(class_index, class_index)]) - 1e-3 submod_function = FacilityLocation(index=all_index, similarity_matrix=matrix) submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=all_index, budget=self.coreset_size) selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain_batch, update_state=submod_function.update_state, batch=self.args.selection_batch) weights = self.calc_weights(matrix, selection_result) self.model.no_grad = False return {"indices": selection_result, "weights": weights} def select(self, **kwargs): selection_result = self.run() return selection_result ================================================ FILE: deepcore/methods/deepfool.py ================================================ from .earlytrain import EarlyTrain import torch import numpy as np class DeepFool(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance: bool = False, max_iter: int = 50, **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) self.balance = balance self.max_iter = max_iter def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) def finish_run(self): self.model.no_grad = False # Create a data loader for self.dst_train with batch size self.args.selection_batch batch_loader = torch.utils.data.DataLoader(self.dst_train, batch_size=self.args.selection_batch , num_workers=self.args.workers) r = np.zeros(self.n_train, dtype=np.float32) batch_num = len(batch_loader) for i, (inputs, targets) in enumerate(batch_loader): if i % self.args.print_freq == 0: print('| Selecting Batch [%3d/%3d]' % (i + 1, batch_num)) r[(i * self.args.selection_batch):(i * self.args.selection_batch + targets.shape[0])] = self.deep_fool( inputs) if self.balance: selection_result = np.array([], dtype=np.int64) for c in range(self.args.num_classes): class_index = np.arange(self.n_train)[self.dst_train.targets == c] selection_result = np.append(selection_result, class_index[ r[class_index].argsort()[:round(len(class_index) * self.fraction)]]) else: selection_result = r.argsort()[:self.coreset_size] return {"indices": selection_result, "scores": r} def deep_fool(self, inputs): # Here, start running DeepFool algorithm. self.model.eval() # Initialize a boolean mask indicating if selection has been stopped at corresponding positions. sample_size = inputs.shape[0] boolean_mask = np.ones(sample_size, dtype=bool) all_idx = np.arange(sample_size) # A matrix to store total pertubations. r_tot = np.zeros([sample_size, inputs.shape[1] * inputs.shape[2] * inputs.shape[3]]) # Set requires_grad for inputs. cur_inputs = inputs.requires_grad_(True).to(self.args.device) original_shape = inputs.shape[1:] # set requires_grad for all parametres in network as False to accelerate autograd for p in self.model.parameters(): p.requires_grad_(False) self.model.no_grad = True first_preds = self.model(cur_inputs).argmax(dim=1) self.model.no_grad = False for i in range(self.max_iter): f_all = self.model(cur_inputs) w_k = [] for c in range(self.args.num_classes): w_k.append(torch.autograd.grad(f_all[:, c].sum(), cur_inputs, retain_graph=False if c + 1 == self.args.num_classes else True)[ 0].flatten(1)) w_k = torch.stack(w_k, dim=0) w_k = w_k - w_k[first_preds, boolean_mask[boolean_mask]].unsqueeze(0) w_k_norm = w_k.norm(dim=2) w_k_norm[first_preds, boolean_mask[ boolean_mask]] = 1. # Set w_k_norm for preds positions to 1. to avoid division by zero. l_all = (f_all - f_all[boolean_mask[boolean_mask], first_preds].unsqueeze(1)).detach().abs() / w_k_norm.T l_all[boolean_mask[ boolean_mask], first_preds] = np.inf # Set l_k for preds positions to inf, as the argmin for each # row will be calculated soon. l_hat = l_all.argmin(dim=1) r_i = l_all[boolean_mask[boolean_mask], l_hat].unsqueeze(1) / w_k_norm[ l_hat, boolean_mask[boolean_mask]].T.unsqueeze(1) * w_k[l_hat, boolean_mask[boolean_mask]] # Update r_tot values. r_tot[boolean_mask] += r_i.cpu().numpy() cur_inputs += r_i.reshape([r_i.shape[0]] + list(original_shape)) # Re-input the updated sample into the network and get new predictions. self.model.no_grad = True preds = self.model(cur_inputs).argmax(dim=1) self.model.no_grad = False # In DeepFool algorithm, the iteration stops when the updated sample produces a different prediction # in the model. index_unfinished = (preds == first_preds) if torch.all(~index_unfinished): break cur_inputs = cur_inputs[index_unfinished] first_preds = first_preds[index_unfinished] boolean_mask[all_idx[boolean_mask][~index_unfinished.cpu().numpy()]] = False return (r_tot * r_tot).sum(axis=1) def select(self, **kwargs): selection_result = self.run() return selection_result ================================================ FILE: deepcore/methods/earlytrain.py ================================================ from .coresetmethod import CoresetMethod import torch, time from torch import nn import numpy as np from copy import deepcopy from .. import nets from torchvision import transforms class EarlyTrain(CoresetMethod): ''' Core code for training related to coreset selection methods when pre-training is required. ''' def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, torchvision_pretrain: bool = False, dst_pretrain_dict: dict = {}, fraction_pretrain=1., dst_test=None, **kwargs): super().__init__(dst_train, args, fraction, random_seed) self.epochs = epochs self.n_train = len(dst_train) self.coreset_size = round(self.n_train * fraction) self.specific_model = specific_model if fraction_pretrain <= 0. or fraction_pretrain > 1.: raise ValueError("Illegal pretrain fraction value.") self.fraction_pretrain = fraction_pretrain if dst_pretrain_dict.__len__() != 0: dict_keys = dst_pretrain_dict.keys() if 'im_size' not in dict_keys or 'channel' not in dict_keys or 'dst_train' not in dict_keys or \ 'num_classes' not in dict_keys: raise AttributeError( 'Argument dst_pretrain_dict must contain imszie, channel, dst_train and num_classes.') if dst_pretrain_dict['im_size'][0] != args.im_size[0] or dst_pretrain_dict['im_size'][0] != args.im_size[0]: raise ValueError("im_size of pretrain dataset does not match that of the training dataset.") if dst_pretrain_dict['channel'] != args.channel: raise ValueError("channel of pretrain dataset does not match that of the training dataset.") if dst_pretrain_dict['num_classes'] != args.num_classes: self.num_classes_mismatch() self.dst_pretrain_dict = dst_pretrain_dict self.torchvision_pretrain = torchvision_pretrain self.if_dst_pretrain = (len(self.dst_pretrain_dict) != 0) if torchvision_pretrain: # Pretrained models in torchvision only accept 224*224 inputs, therefore we resize current # datasets to 224*224. if args.im_size[0] != 224 or args.im_size[1] != 224: self.dst_train = deepcopy(dst_train) self.dst_train.transform = transforms.Compose([self.dst_train.transform, transforms.Resize(224)]) if self.if_dst_pretrain: self.dst_pretrain_dict['dst_train'] = deepcopy(dst_pretrain_dict['dst_train']) self.dst_pretrain_dict['dst_train'].transform = transforms.Compose( [self.dst_pretrain_dict['dst_train'].transform, transforms.Resize(224)]) if self.if_dst_pretrain: self.n_pretrain = len(self.dst_pretrain_dict['dst_train']) self.n_pretrain_size = round( self.fraction_pretrain * (self.n_pretrain if self.if_dst_pretrain else self.n_train)) self.dst_test = dst_test def train(self, epoch, list_of_train_idx, **kwargs): """ Train model for one epoch """ self.before_train() self.model.train() print('\n=> Training Epoch #%d' % epoch) trainset_permutation_inds = np.random.permutation(list_of_train_idx) batch_sampler = torch.utils.data.BatchSampler(trainset_permutation_inds, batch_size=self.args.selection_batch, drop_last=False) trainset_permutation_inds = list(batch_sampler) train_loader = torch.utils.data.DataLoader(self.dst_pretrain_dict['dst_train'] if self.if_dst_pretrain else self.dst_train, shuffle=False, batch_sampler=batch_sampler, num_workers=self.args.workers, pin_memory=True) for i, (inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(self.args.device), targets.to(self.args.device) # Forward propagation, compute loss, get predictions self.model_optimizer.zero_grad() outputs = self.model(inputs) loss = self.criterion(outputs, targets) self.after_loss(outputs, loss, targets, trainset_permutation_inds[i], epoch) # Update loss, backward propagate, update optimizer loss = loss.mean() self.while_update(outputs, loss, targets, epoch, i, self.args.selection_batch) loss.backward() self.model_optimizer.step() return self.finish_train() def run(self): torch.manual_seed(self.random_seed) np.random.seed(self.random_seed) self.train_indx = np.arange(self.n_train) # Setup model and loss self.model = nets.__dict__[self.args.model if self.specific_model is None else self.specific_model]( self.args.channel, self.dst_pretrain_dict["num_classes"] if self.if_dst_pretrain else self.num_classes, pretrained=self.torchvision_pretrain, im_size=(224, 224) if self.torchvision_pretrain else self.args.im_size).to(self.args.device) if self.args.device == "cpu": print("Using CPU.") elif self.args.gpu is not None: torch.cuda.set_device(self.args.gpu[0]) self.model = nets.nets_utils.MyDataParallel(self.model, device_ids=self.args.gpu) elif torch.cuda.device_count() > 1: self.model = nets.nets_utils.MyDataParallel(self.model).cuda() self.criterion = nn.CrossEntropyLoss().to(self.args.device) self.criterion.__init__() # Setup optimizer if self.args.selection_optimizer == "SGD": self.model_optimizer = torch.optim.SGD(self.model.parameters(), lr=self.args.selection_lr, momentum=self.args.selection_momentum, weight_decay=self.args.selection_weight_decay, nesterov=self.args.selection_nesterov) elif self.args.selection_optimizer == "Adam": self.model_optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.selection_lr, weight_decay=self.args.selection_weight_decay) else: self.model_optimizer = torch.optim.__dict__[self.args.selection_optimizer](self.model.parameters(), lr=self.args.selection_lr, momentum=self.args.selection_momentum, weight_decay=self.args.selection_weight_decay, nesterov=self.args.selection_nesterov) self.before_run() for epoch in range(self.epochs): list_of_train_idx = np.random.choice(np.arange(self.n_pretrain if self.if_dst_pretrain else self.n_train), self.n_pretrain_size, replace=False) self.before_epoch() self.train(epoch, list_of_train_idx) if self.dst_test is not None and self.args.selection_test_interval > 0 and ( epoch + 1) % self.args.selection_test_interval == 0: self.test(epoch) self.after_epoch() return self.finish_run() def test(self, epoch): self.model.no_grad = True self.model.eval() test_loader = torch.utils.data.DataLoader(self.dst_test if self.args.selection_test_fraction == 1. else torch.utils.data.Subset(self.dst_test, np.random.choice( np.arange(len(self.dst_test)), round(len(self.dst_test) * self.args.selection_test_fraction), replace=False)), batch_size=self.args.selection_batch, shuffle=False, num_workers=self.args.workers, pin_memory=True) correct = 0. total = 0. print('\n=> Testing Epoch #%d' % epoch) for batch_idx, (input, target) in enumerate(test_loader): output = self.model(input.to(self.args.device)) loss = self.criterion(output, target.to(self.args.device)).sum() predicted = torch.max(output.data, 1).indices.cpu() correct += predicted.eq(target).sum().item() total += target.size(0) if batch_idx % self.args.print_freq == 0: print('| Test Epoch [%3d/%3d] Iter[%3d/%3d]\t\tTest Loss: %.4f Test Acc: %.3f%%' % ( epoch, self.epochs, batch_idx + 1, (round(len(self.dst_test) * self.args.selection_test_fraction) // self.args.selection_batch) + 1, loss.item(), 100. * correct / total)) self.model.no_grad = False def num_classes_mismatch(self): pass def before_train(self): pass def after_loss(self, outputs, loss, targets, batch_inds, epoch): pass def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): pass def finish_train(self): pass def before_epoch(self): pass def after_epoch(self): pass def before_run(self): pass def finish_run(self): pass def select(self, **kwargs): selection_result = self.run() return selection_result ================================================ FILE: deepcore/methods/forgetting.py ================================================ from .earlytrain import EarlyTrain import torch, time from torch import nn import numpy as np # Acknowledgement to # https://github.com/mtoneva/example_forgetting class Forgetting(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True, dst_test=None, **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model=specific_model, dst_test=dst_test) self.balance = balance def get_hms(self, seconds): # Format time for printing purposes m, s = divmod(seconds, 60) h, m = divmod(m, 60) return h, m, s def before_train(self): self.train_loss = 0. self.correct = 0. self.total = 0. def after_loss(self, outputs, loss, targets, batch_inds, epoch): with torch.no_grad(): _, predicted = torch.max(outputs.data, 1) cur_acc = (predicted == targets).clone().detach().requires_grad_(False).type(torch.float32) self.forgetting_events[torch.tensor(batch_inds)[(self.last_acc[batch_inds]-cur_acc)>0.01]]+=1. self.last_acc[batch_inds] = cur_acc def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): self.train_loss += loss.item() self.total += targets.size(0) _, predicted = torch.max(outputs.data, 1) self.correct += predicted.eq(targets.data).cpu().sum() if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % ( epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item(), 100. * self.correct.item() / self.total)) def before_epoch(self): self.start_time = time.time() def after_epoch(self): epoch_time = time.time() - self.start_time self.elapsed_time += epoch_time print('| Elapsed time : %d:%02d:%02d' % (self.get_hms(self.elapsed_time))) def before_run(self): self.elapsed_time = 0 self.forgetting_events = torch.zeros(self.n_train, requires_grad=False).to(self.args.device) self.last_acc = torch.zeros(self.n_train, requires_grad=False).to(self.args.device) def finish_run(self): pass def select(self, **kwargs): self.run() if not self.balance: top_examples = self.train_indx[np.argsort(self.forgetting_events.cpu().numpy())][::-1][:self.coreset_size] else: top_examples = np.array([], dtype=np.int64) for c in range(self.num_classes): c_indx = self.train_indx[self.dst_train.targets == c] budget = round(self.fraction * len(c_indx)) top_examples = np.append(top_examples, c_indx[np.argsort(self.forgetting_events[c_indx].cpu().numpy())[::-1][:budget]]) return {"indices": top_examples, "scores": self.forgetting_events} ================================================ FILE: deepcore/methods/full.py ================================================ import numpy as np from .coresetmethod import CoresetMethod class Full(CoresetMethod): def __init__(self, dst_train, args, fraction, random_seed, **kwargs): self.n_train = len(dst_train) def select(self, **kwargs): return {"indices": np.arange(self.n_train)} ================================================ FILE: deepcore/methods/glister.py ================================================ from .earlytrain import EarlyTrain from .methods_utils import submodular_optimizer import torch import numpy as np from ..nets.nets_utils import MyDataParallel class Glister(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance: bool = True, greedy="LazyGreedy", eta=None, dst_val=None, **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) self.balance = balance self.eta = args.lr if eta is None else eta self.dst_val = dst_train if dst_val is None else dst_val self.n_val = len(self.dst_val) if greedy not in submodular_optimizer.optimizer_choices: raise ModuleNotFoundError("Greedy optimizer not found.") self._greedy = greedy def calc_gradient(self, index=None, val=False, record_val_detail=False): ''' Calculate gradients matrix on current network for training or validation dataset. ''' self.model.eval() if val: batch_loader = torch.utils.data.DataLoader( self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) else: batch_loader = torch.utils.data.DataLoader( self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) self.embedding_dim = self.model.get_last_layer().in_features gradients = [] if val and record_val_detail: self.init_out = [] self.init_emb = [] self.init_y = [] for i, (input, targets) in enumerate(batch_loader): self.model_optimizer.zero_grad() outputs = self.model(input.to(self.args.device)) loss = self.criterion(outputs.requires_grad_(True), targets.to(self.args.device)).sum() batch_num = targets.shape[0] with torch.no_grad(): bias_parameters_grads = torch.autograd.grad(loss, outputs)[0] weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1, self.embedding_dim).repeat(1, self.args.num_classes, 1) *\ bias_parameters_grads.view( batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim) gradients.append(torch.cat( [bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu()) if val and record_val_detail: self.init_out.append(outputs.cpu()) self.init_emb.append(self.model.embedding_recorder.embedding.cpu()) self.init_y.append(targets) gradients = torch.cat(gradients, dim=0) if val: self.val_grads = torch.mean(gradients, dim=0) if self.dst_val == self.dst_train: # No validation set was provided while instantiating Glister, so self.dst_val == self.dst_train self.train_grads = gradients else: self.train_grads = gradients if val and record_val_detail: with torch.no_grad(): self.init_out = torch.cat(self.init_out, dim=0) self.init_emb = torch.cat(self.init_emb, dim=0) self.init_y = torch.cat(self.init_y) self.model.train() def update_val_gradients(self, new_selection, selected_for_train): sum_selected_train_gradients = torch.mean(self.train_grads[selected_for_train], dim=0) new_outputs = self.init_out - self.eta * sum_selected_train_gradients[:self.args.num_classes].view(1, -1).repeat(self.init_out.shape[0], 1) - self.eta * torch.matmul(self.init_emb, sum_selected_train_gradients[self.args.num_classes:].view(self.args.num_classes, -1).T) sample_num = new_outputs.shape[0] gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)], requires_grad=False) i = 0 while i * self.args.selection_batch < sample_num: batch_indx = np.arange(sample_num)[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] new_out_puts_batch = new_outputs[batch_indx].clone().detach().requires_grad_(True) loss = self.criterion(new_out_puts_batch, self.init_y[batch_indx]) batch_num = len(batch_indx) bias_parameters_grads = torch.autograd.grad(loss.sum(), new_out_puts_batch, retain_graph=True)[0] weight_parameters_grads = self.init_emb[batch_indx].view(batch_num, 1, self.embedding_dim).repeat(1, self.args.num_classes, 1) * bias_parameters_grads.view(batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim) gradients[batch_indx] = torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu() i += 1 self.val_grads = torch.mean(gradients, dim=0) def finish_run(self): if isinstance(self.model, MyDataParallel): self.model = self.model.module self.model.embedding_recorder.record_embedding = True self.model.no_grad = True self.train_indx = np.arange(self.n_train) self.val_indx = np.arange(self.n_val) if self.balance: selection_result = np.array([], dtype=np.int64) #weights = np.array([], dtype=np.float32) for c in range(self.num_classes): c_indx = self.train_indx[self.dst_train.targets == c] c_val_inx = self.val_indx[self.dst_val.targets == c] self.calc_gradient(index=c_val_inx, val=True, record_val_detail=True) if self.dst_val != self.dst_train: self.calc_gradient(index=c_indx) submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=c_indx, budget=round(self.fraction * len(c_indx))) c_selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected, **kwargs: torch.matmul(self.train_grads[idx_gain], self.val_grads.view(-1, 1)).detach().cpu().numpy(). flatten(), upadate_state=self.update_val_gradients) selection_result = np.append(selection_result, c_selection_result) else: self.calc_gradient(val=True, record_val_detail=True) if self.dst_val != self.dst_train: self.calc_gradient() submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=np.arange(self.n_train), budget=self.coreset_size) selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected, **kwargs: torch.matmul(self.train_grads[idx_gain], self.val_grads.view(-1, 1)).detach().cpu().numpy().flatten(), upadate_state=self.update_val_gradients) self.model.embedding_recorder.record_embedding = False self.model.no_grad = False return {"indices": selection_result} def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) ================================================ FILE: deepcore/methods/gradmatch.py ================================================ import torch import numpy as np from scipy.linalg import lstsq from scipy.optimize import nnls from .earlytrain import EarlyTrain from ..nets.nets_utils import MyDataParallel # https://github.com/krishnatejakk/GradMatch class GradMatch(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True, dst_val=None, lam: float = 1., **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) self.balance = balance self.dst_val = dst_val def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) def orthogonal_matching_pursuit(self, A, b, budget: int, lam: float = 1.): '''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit Acknowlegement to: https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py Args: A: design matrix of size (d, n) b: measurement vector of length d budget: selection budget lam: regularization coef. for the final output vector Returns: vector of length n ''' with torch.no_grad(): d, n = A.shape if budget <= 0: budget = 0 elif budget > n: budget = n x = np.zeros(n, dtype=np.float32) resid = b.clone() indices = [] boolean_mask = torch.ones(n, dtype=bool, device="cuda") all_idx = torch.arange(n, device='cuda') for i in range(budget): if i % self.args.print_freq == 0: print("| Selecting [%3d/%3d]" % (i + 1, budget)) projections = torch.matmul(A.T, resid) index = torch.argmax(projections[boolean_mask]) index = all_idx[boolean_mask][index] indices.append(index.item()) boolean_mask[index] = False if indices.__len__() == 1: A_i = A[:, index] x_i = projections[index] / torch.dot(A_i, A_i).view(-1) A_i = A[:, index].view(1, -1) else: A_i = torch.cat((A_i, A[:, index].view(1, -1)), dim=0) temp = torch.matmul(A_i, torch.transpose(A_i, 0, 1)) + lam * torch.eye(A_i.shape[0], device="cuda") x_i, _ = torch.lstsq(torch.matmul(A_i, b).view(-1, 1), temp) resid = b - torch.matmul(torch.transpose(A_i, 0, 1), x_i).view(-1) if budget > 1: x_i = nnls(temp.cpu().numpy(), torch.matmul(A_i, b).view(-1).cpu().numpy())[0] x[indices] = x_i elif budget == 1: x[indices[0]] = 1. return x def orthogonal_matching_pursuit_np(self, A, b, budget: int, lam: float = 1.): '''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit Acknowlegement to: https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py Args: A: design matrix of size (d, n) b: measurement vector of length d budget: selection budget lam: regularization coef. for the final output vector Returns: vector of length n ''' d, n = A.shape if budget <= 0: budget = 0 elif budget > n: budget = n x = np.zeros(n, dtype=np.float32) resid = np.copy(b) indices = [] boolean_mask = np.ones(n, dtype=bool) all_idx = np.arange(n) for i in range(budget): if i % self.args.print_freq == 0: print("| Selecting [%3d/%3d]" % (i + 1, budget)) projections = A.T.dot(resid) index = np.argmax(projections[boolean_mask]) index = all_idx[boolean_mask][index] indices.append(index.item()) boolean_mask[index] = False if indices.__len__() == 1: A_i = A[:, index] x_i = projections[index] / A_i.T.dot(A_i) else: A_i = np.vstack([A_i, A[:, index]]) x_i = lstsq(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0] resid = b - A_i.T.dot(x_i) if budget > 1: x_i = nnls(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0] x[indices] = x_i elif budget == 1: x[indices[0]] = 1. return x def calc_gradient(self, index=None, val=False): self.model.eval() if val: batch_loader = torch.utils.data.DataLoader( self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) sample_num = len(self.dst_val.targets) if index is None else len(index) else: batch_loader = torch.utils.data.DataLoader( self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) sample_num = self.n_train if index is None else len(index) self.embedding_dim = self.model.get_last_layer().in_features gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)], requires_grad=False, device=self.args.device) for i, (input, targets) in enumerate(batch_loader): self.model_optimizer.zero_grad() outputs = self.model(input.to(self.args.device)).requires_grad_(True) loss = self.criterion(outputs, targets.to(self.args.device)).sum() batch_num = targets.shape[0] with torch.no_grad(): bias_parameters_grads = torch.autograd.grad(loss, outputs, retain_graph=True)[0].cpu() weight_parameters_grads = self.model.embedding_recorder.embedding.cpu().view(batch_num, 1, self.embedding_dim).repeat(1,self.args.num_classes,1) *\ bias_parameters_grads.view(batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim) gradients[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] =\ torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1) return gradients def finish_run(self): if isinstance(self.model, MyDataParallel): self.model = self.model.module self.model.no_grad = True with self.model.embedding_recorder: if self.dst_val is not None: val_num = len(self.dst_val.targets) if self.balance: selection_result = np.array([], dtype=np.int64) weights = np.array([], dtype=np.float32) for c in range(self.args.num_classes): class_index = np.arange(self.n_train)[self.dst_train.targets == c] cur_gradients = self.calc_gradient(class_index) if self.dst_val is not None: # Also calculate gradients of the validation set. val_class_index = np.arange(val_num)[self.dst_val.targets == c] cur_val_gradients = torch.mean(self.calc_gradient(val_class_index, val=True), dim=0) else: cur_val_gradients = torch.mean(cur_gradients, dim=0) if self.args.device == "cpu": # Compute OMP on numpy cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T, cur_val_gradients.numpy(), budget=round(len(class_index) * self.fraction)) else: cur_weights = self.orthogonal_matching_pursuit(cur_gradients.to(self.args.device).T, cur_val_gradients.to(self.args.device), budget=round(len(class_index) * self.fraction)) selection_result = np.append(selection_result, class_index[np.nonzero(cur_weights)[0]]) weights = np.append(weights, cur_weights[np.nonzero(cur_weights)[0]]) else: cur_gradients = self.calc_gradient() if self.dst_val is not None: # Also calculate gradients of the validation set. cur_val_gradients = torch.mean(self.calc_gradient(val=True), dim=0) else: cur_val_gradients = torch.mean(cur_gradients, dim=0) if self.args.device == "cpu": # Compute OMP on numpy cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T, cur_val_gradients.numpy(), budget=self.coreset_size) else: cur_weights = self.orthogonal_matching_pursuit(cur_gradients.T, cur_val_gradients, budget=self.coreset_size) selection_result = np.nonzero(cur_weights)[0] weights = cur_weights[selection_result] self.model.no_grad = False return {"indices": selection_result, "weights": weights} def select(self, **kwargs): selection_result = self.run() return selection_result ================================================ FILE: deepcore/methods/grand.py ================================================ from .earlytrain import EarlyTrain import torch, time import numpy as np from ..nets.nets_utils import MyDataParallel class GraNd(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, repeat=10, specific_model=None, balance=False, **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model) self.epochs = epochs self.n_train = len(dst_train) self.coreset_size = round(self.n_train * fraction) self.specific_model = specific_model self.repeat = repeat self.balance = balance def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item())) def before_run(self): if isinstance(self.model, MyDataParallel): self.model = self.model.module def finish_run(self): self.model.embedding_recorder.record_embedding = True # recording embedding vector self.model.eval() embedding_dim = self.model.get_last_layer().in_features batch_loader = torch.utils.data.DataLoader( self.dst_train, batch_size=self.args.selection_batch, num_workers=self.args.workers) sample_num = self.n_train for i, (input, targets) in enumerate(batch_loader): self.model_optimizer.zero_grad() outputs = self.model(input.to(self.args.device)) loss = self.criterion(outputs.requires_grad_(True), targets.to(self.args.device)).sum() batch_num = targets.shape[0] with torch.no_grad(): bias_parameters_grads = torch.autograd.grad(loss, outputs)[0] self.norm_matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num), self.cur_repeat] = torch.norm(torch.cat([bias_parameters_grads, ( self.model.embedding_recorder.embedding.view(batch_num, 1, embedding_dim).repeat(1, self.args.num_classes, 1) * bias_parameters_grads.view( batch_num, self.args.num_classes, 1).repeat(1, 1, embedding_dim)). view(batch_num, -1)], dim=1), dim=1, p=2) self.model.train() self.model.embedding_recorder.record_embedding = False def select(self, **kwargs): # Initialize a matrix to save norms of each sample on idependent runs self.norm_matrix = torch.zeros([self.n_train, self.repeat], requires_grad=False).to(self.args.device) for self.cur_repeat in range(self.repeat): self.run() self.random_seed = self.random_seed + 5 self.norm_mean = torch.mean(self.norm_matrix, dim=1).cpu().detach().numpy() if not self.balance: top_examples = self.train_indx[np.argsort(self.norm_mean)][::-1][:self.coreset_size] else: top_examples = np.array([], dtype=np.int64) for c in range(self.num_classes): c_indx = self.train_indx[self.dst_train.targets == c] budget = round(self.fraction * len(c_indx)) top_examples = np.append(top_examples, c_indx[np.argsort(self.norm_mean[c_indx])[::-1][:budget]]) return {"indices": top_examples, "scores": self.norm_mean} ================================================ FILE: deepcore/methods/herding.py ================================================ from .earlytrain import EarlyTrain import torch import numpy as np from .methods_utils import euclidean_dist from ..nets.nets_utils import MyDataParallel class Herding(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model="ResNet18", balance: bool = False, metric="euclidean", **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, **kwargs) if metric == "euclidean": self.metric = euclidean_dist elif callable(metric): self.metric = metric else: self.metric = euclidean_dist self.run = lambda: self.finish_run() def _construct_matrix(index=None): data_loader = torch.utils.data.DataLoader( self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.n_train if index is None else len(index), num_workers=self.args.workers) inputs, _ = next(iter(data_loader)) return inputs.flatten(1).requires_grad_(False).to(self.args.device) self.construct_matrix = _construct_matrix self.balance = balance def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) def construct_matrix(self, index=None): self.model.eval() self.model.no_grad = True with torch.no_grad(): with self.model.embedding_recorder: sample_num = self.n_train if index is None else len(index) matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).to(self.args.device) data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) for i, (inputs, _) in enumerate(data_loader): self.model(inputs.to(self.args.device)) matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = self.model.embedding_recorder.embedding self.model.no_grad = False return matrix def before_run(self): self.emb_dim = self.model.get_last_layer().in_features def herding(self, matrix, budget: int, index=None): sample_num = matrix.shape[0] if budget < 0: raise ValueError("Illegal budget size.") elif budget > sample_num: budget = sample_num indices = np.arange(sample_num) with torch.no_grad(): mu = torch.mean(matrix, dim=0) select_result = np.zeros(sample_num, dtype=bool) for i in range(budget): if i % self.args.print_freq == 0: print("| Selecting [%3d/%3d]" % (i + 1, budget)) dist = self.metric(((i + 1) * mu - torch.sum(matrix[select_result], dim=0)).view(1, -1), matrix[~select_result]) p = torch.argmax(dist).item() p = indices[~select_result][p] select_result[p] = True if index is None: index = indices return index[select_result] def finish_run(self): if isinstance(self.model, MyDataParallel): self.model = self.model.module if self.balance: selection_result = np.array([], dtype=np.int32) for c in range(self.args.num_classes): class_index = np.arange(self.n_train)[self.dst_train.targets == c] selection_result = np.append(selection_result, self.herding(self.construct_matrix(class_index), budget=round(self.fraction * len(class_index)), index=class_index)) else: selection_result = self.herding(self.construct_matrix(), budget=self.coreset_size) return {"indices": selection_result} def select(self, **kwargs): selection_result = self.run() return selection_result ================================================ FILE: deepcore/methods/kcentergreedy.py ================================================ from .earlytrain import EarlyTrain import torch import numpy as np from .methods_utils import euclidean_dist from ..nets.nets_utils import MyDataParallel def k_center_greedy(matrix, budget: int, metric, device, random_seed=None, index=None, already_selected=None, print_freq: int = 20): if type(matrix) == torch.Tensor: assert matrix.dim() == 2 elif type(matrix) == np.ndarray: assert matrix.ndim == 2 matrix = torch.from_numpy(matrix).requires_grad_(False).to(device) sample_num = matrix.shape[0] assert sample_num >= 1 if budget < 0: raise ValueError("Illegal budget size.") elif budget > sample_num: budget = sample_num if index is not None: assert matrix.shape[0] == len(index) else: index = np.arange(sample_num) assert callable(metric) already_selected = np.array(already_selected) with torch.no_grad(): np.random.seed(random_seed) if already_selected.__len__() == 0: select_result = np.zeros(sample_num, dtype=bool) # Randomly select one initial point. already_selected = [np.random.randint(0, sample_num)] budget -= 1 select_result[already_selected] = True else: select_result = np.in1d(index, already_selected) num_of_already_selected = np.sum(select_result) # Initialize a (num_of_already_selected+budget-1)*sample_num matrix storing distances of pool points from # each clustering center. dis_matrix = -1 * torch.ones([num_of_already_selected + budget - 1, sample_num], requires_grad=False).to(device) dis_matrix[:num_of_already_selected, ~select_result] = metric(matrix[select_result], matrix[~select_result]) mins = torch.min(dis_matrix[:num_of_already_selected, :], dim=0).values for i in range(budget): if i % print_freq == 0: print("| Selecting [%3d/%3d]" % (i + 1, budget)) p = torch.argmax(mins).item() select_result[p] = True if i == budget - 1: break mins[p] = -1 dis_matrix[num_of_already_selected + i, ~select_result] = metric(matrix[[p]], matrix[~select_result]) mins = torch.min(mins, dis_matrix[num_of_already_selected + i]) return index[select_result] class kCenterGreedy(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=0, specific_model="ResNet18", balance: bool = False, already_selected=[], metric="euclidean", torchvision_pretrain: bool = True, **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, torchvision_pretrain=torchvision_pretrain, **kwargs) if already_selected.__len__() != 0: if min(already_selected) < 0 or max(already_selected) >= self.n_train: raise ValueError("List of already selected points out of the boundary.") self.already_selected = np.array(already_selected) self.min_distances = None if metric == "euclidean": self.metric = euclidean_dist elif callable(metric): self.metric = metric else: self.metric = euclidean_dist self.run = lambda : self.finish_run() def _construct_matrix(index=None): data_loader = torch.utils.data.DataLoader( self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.n_train if index is None else len(index), num_workers=self.args.workers) inputs, _ = next(iter(data_loader)) return inputs.flatten(1).requires_grad_(False).to(self.args.device) self.construct_matrix = _construct_matrix self.balance = balance def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) def old_construct_matrix(self, index=None): self.model.eval() self.model.no_grad = True with torch.no_grad(): with self.model.embedding_recorder: sample_num = self.n_train if index is None else len(index) matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).to(self.args.device) data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) for i, (inputs, _) in enumerate(data_loader): self.model(inputs.to(self.args.device)) matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = self.model.embedding_recorder.embedding self.model.no_grad = False return matrix def construct_matrix(self, index=None): self.model.eval() self.model.no_grad = True with torch.no_grad(): with self.model.embedding_recorder: sample_num = self.n_train if index is None else len(index) matrix = [] data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) for i, (inputs, _) in enumerate(data_loader): self.model(inputs.to(self.args.device)) matrix.append(self.model.embedding_recorder.embedding) self.model.no_grad = False return torch.cat(matrix, dim=0) def before_run(self): self.emb_dim = self.model.get_last_layer().in_features def finish_run(self): if isinstance(self.model, MyDataParallel): self.model = self.model.module def select(self, **kwargs): self.run() if self.balance: selection_result = np.array([], dtype=np.int32) for c in range(self.args.num_classes): class_index = np.arange(self.n_train)[self.dst_train.targets == c] selection_result = np.append(selection_result, k_center_greedy(self.construct_matrix(class_index), budget=round( self.fraction * len(class_index)), metric=self.metric, device=self.args.device, random_seed=self.random_seed, index=class_index, already_selected=self.already_selected[ np.in1d(self.already_selected, class_index)], print_freq=self.args.print_freq)) else: matrix = self.construct_matrix() del self.model_optimizer del self.model selection_result = k_center_greedy(matrix, budget=self.coreset_size, metric=self.metric, device=self.args.device, random_seed=self.random_seed, already_selected=self.already_selected, print_freq=self.args.print_freq) return {"indices": selection_result} ================================================ FILE: deepcore/methods/methods_utils/__init__.py ================================================ from .euclidean import * from .cossim import * from .submodular_function import * from .submodular_optimizer import * ================================================ FILE: deepcore/methods/methods_utils/cossim.py ================================================ import numpy as np import torch def cossim_np(v1, v2): num = np.dot(v1, v2.T) denom = np.linalg.norm(v1, axis=1).reshape(-1, 1) * np.linalg.norm(v2, axis=1) res = num / denom res[np.isneginf(res)] = 0. return 0.5 + 0.5 * res def cossim_pair_np(v1): num = np.dot(v1, v1.T) norm = np.linalg.norm(v1, axis=1) denom = norm.reshape(-1, 1) * norm res = num / denom res[np.isneginf(res)] = 0. return 0.5 + 0.5 * res def cossim(v1, v2): num = torch.matmul(v1, v2.T) denom = torch.norm(v1, dim=1).view(-1, 1) * torch.norm(v2, dim=1) res = num / denom res[torch.isneginf(res)] = 0. return 0.5 + 0.5 * res def cossim_pair(v1): num = torch.matmul(v1, v1.T) norm = torch.norm(v1, dim=1) denom = norm.view(-1, 1) * norm res = num / denom res[torch.isneginf(res)] = 0. return 0.5 + 0.5 * res ================================================ FILE: deepcore/methods/methods_utils/euclidean.py ================================================ import torch import numpy as np def euclidean_dist(x, y): m, n = x.size(0), y.size(0) xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() dist = xx + yy dist.addmm_(1, -2, x, y.t()) dist = dist.clamp(min=1e-12).sqrt() return dist def euclidean_dist_pair(x): m = x.size(0) xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, m) dist = xx + xx.t() dist.addmm_(1, -2, x, x.t()) dist = dist.clamp(min=1e-12).sqrt() return dist def euclidean_dist_np(x, y): (rowx, colx) = x.shape (rowy, coly) = y.shape xy = np.dot(x, y.T) x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowy, axis=1) y2 = np.repeat(np.reshape(np.sum(np.multiply(y, y), axis=1), (rowy, 1)), repeats=rowx, axis=1).T return np.sqrt(np.clip(x2 + y2 - 2. * xy, 1e-12, None)) def euclidean_dist_pair_np(x): (rowx, colx) = x.shape xy = np.dot(x, x.T) x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowx, axis=1) return np.sqrt(np.clip(x2 + x2.T - 2. * xy, 1e-12, None)) ================================================ FILE: deepcore/methods/methods_utils/submodular_function.py ================================================ import numpy as np class SubmodularFunction(object): def __init__(self, index, similarity_kernel=None, similarity_matrix=None, already_selected=[]): self.index = index self.n = len(index) self.already_selected = already_selected assert similarity_kernel is not None or similarity_matrix is not None # For the sample similarity matrix, the method supports two input modes, one is to input a pairwise similarity # matrix for the whole sample, and the other case allows the input of a similarity kernel to be used to # calculate similarities incrementally at a later time if required. if similarity_kernel is not None: assert callable(similarity_kernel) self.similarity_kernel = self._similarity_kernel(similarity_kernel) else: assert similarity_matrix.shape[0] == self.n and similarity_matrix.shape[1] == self.n self.similarity_matrix = similarity_matrix self.similarity_kernel = lambda a, b: self.similarity_matrix[np.ix_(a, b)] def _similarity_kernel(self, similarity_kernel): return similarity_kernel class FacilityLocation(SubmodularFunction): def __init__(self, **kwargs): super().__init__(**kwargs) if self.already_selected.__len__()==0: self.cur_max = np.zeros(self.n, dtype=np.float32) else: self.cur_max = np.max(self.similarity_kernel(np.arange(self.n), self.already_selected), axis=1) self.all_idx = np.ones(self.n, dtype=bool) def _similarity_kernel(self, similarity_kernel): # Initialize a matrix to store similarity values of sample points. self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32) self.if_columns_calculated = np.zeros(self.n, dtype=bool) def _func(a, b): if not np.all(self.if_columns_calculated[b]): if b.dtype != bool: temp = ~self.all_idx temp[b] = True b = temp not_calculated = b & ~self.if_columns_calculated self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated) self.if_columns_calculated[not_calculated] = True return self.sim_matrix[np.ix_(a, b)] return _func def calc_gain(self, idx_gain, selected, **kwargs): gains = np.maximum(0., self.similarity_kernel(self.all_idx, idx_gain) - self.cur_max.reshape(-1, 1)).sum(axis=0) return gains def calc_gain_batch(self, idx_gain, selected, **kwargs): batch_idx = ~self.all_idx batch_idx[0:kwargs["batch"]] = True gains = np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1, 1)).sum(axis=0) for i in range(kwargs["batch"], self.n, kwargs["batch"]): batch_idx = ~self.all_idx batch_idx[i * kwargs["batch"]:(i + 1) * kwargs["batch"]] = True gains += np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1,1)).sum(axis=0) return gains def update_state(self, new_selection, total_selected, **kwargs): self.cur_max = np.maximum(self.cur_max, np.max(self.similarity_kernel(self.all_idx, new_selection), axis=1)) #self.cur_max = np.max(np.append(self.cur_max.reshape(-1, 1), self.similarity_kernel(self.all_idx, new_selection), axis=1), axis=1) class GraphCut(SubmodularFunction): def __init__(self, lam: float = 1., **kwargs): super().__init__(**kwargs) self.lam = lam if 'similarity_matrix' in kwargs: self.sim_matrix_cols_sum = np.sum(self.similarity_matrix, axis=0) self.all_idx = np.ones(self.n, dtype=bool) def _similarity_kernel(self, similarity_kernel): # Initialize a matrix to store similarity values of sample points. self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32) self.sim_matrix_cols_sum = np.zeros(self.n, dtype=np.float32) self.if_columns_calculated = np.zeros(self.n, dtype=bool) def _func(a, b): if not np.all(self.if_columns_calculated[b]): if b.dtype != bool: temp = ~self.all_idx temp[b] = True b = temp not_calculated = b & ~self.if_columns_calculated self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated) self.sim_matrix_cols_sum[not_calculated] = np.sum(self.sim_matrix[:, not_calculated], axis=0) self.if_columns_calculated[not_calculated] = True return self.sim_matrix[np.ix_(a, b)] return _func def calc_gain(self, idx_gain, selected, **kwargs): gain = -2. * np.sum(self.similarity_kernel(selected, idx_gain), axis=0) + self.lam * self.sim_matrix_cols_sum[idx_gain] return gain def update_state(self, new_selection, total_selected, **kwargs): pass class LogDeterminant(SubmodularFunction): def __init__(self, **kwargs): super().__init__(**kwargs) self.all_idx = np.ones(self.n, dtype=bool) def _similarity_kernel(self, similarity_kernel): # Initialize a matrix to store similarity values of sample points. self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32) self.if_columns_calculated = np.zeros(self.n, dtype=bool) def _func(a, b): if not np.all(self.if_columns_calculated[b]): if b.dtype != bool: temp = ~self.all_idx temp[b] = True b = temp not_calculated = b & ~self.if_columns_calculated self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated) self.if_columns_calculated[not_calculated] = True return self.sim_matrix[np.ix_(a, b)] return _func def calc_gain(self, idx_gain, selected, **kwargs): # Gain for LogDeterminant can be written as $f(x | A ) = \log\det(S_{a} - S_{a,A}S_{A}^{-1}S_{x,A}^T)$. sim_idx_gain = self.similarity_kernel(selected, idx_gain).T sim_selected = self.similarity_kernel(selected, selected) return (np.dot(sim_idx_gain, np.linalg.pinv(sim_selected)) * sim_idx_gain).sum(-1) def update_state(self, new_selection, total_selected, **kwargs): pass ================================================ FILE: deepcore/methods/methods_utils/submodular_optimizer.py ================================================ import numpy as np optimizer_choices = ["NaiveGreedy", "LazyGreedy", "StochasticGreedy", "ApproximateLazyGreedy"] class optimizer(object): def __init__(self, args, index, budget:int, already_selected=[]): self.args = args self.index = index if budget <= 0 or budget > index.__len__(): raise ValueError("Illegal budget for optimizer.") self.n = len(index) self.budget = budget self.already_selected = already_selected class NaiveGreedy(optimizer): def __init__(self, args, index, budget:int, already_selected=[]): super(NaiveGreedy, self).__init__(args, index, budget, already_selected) def select(self, gain_function, update_state=None, **kwargs): assert callable(gain_function) if update_state is not None: assert callable(update_state) selected = np.zeros(self.n, dtype=bool) selected[self.already_selected] = True greedy_gain = np.zeros(len(self.index)) for i in range(sum(selected), self.budget): if i % self.args.print_freq == 0: print("| Selecting [%3d/%3d]" % (i + 1, self.budget)) greedy_gain[~selected] = gain_function(~selected, selected, **kwargs) current_selection = greedy_gain.argmax() selected[current_selection] = True greedy_gain[current_selection] = -np.inf if update_state is not None: update_state(np.array([current_selection]), selected, **kwargs) return self.index[selected] class LazyGreedy(optimizer): def __init__(self, args, index, budget:int, already_selected=[]): super(LazyGreedy, self).__init__(args, index, budget, already_selected) def select(self, gain_function, update_state=None, **kwargs): assert callable(gain_function) if update_state is not None: assert callable(update_state) selected = np.zeros(self.n, dtype=bool) selected[self.already_selected] = True greedy_gain = np.zeros(len(self.index)) greedy_gain[~selected] = gain_function(~selected, selected, **kwargs) greedy_gain[selected] = -np.inf for i in range(sum(selected), self.budget): if i % self.args.print_freq == 0: print("| Selecting [%3d/%3d]" % (i + 1, self.budget)) best_gain = -np.inf last_max_element = -1 while True: cur_max_element = greedy_gain.argmax() if last_max_element == cur_max_element: # Select cur_max_element into the current subset selected[cur_max_element] = True greedy_gain[cur_max_element] = -np.inf if update_state is not None: update_state(np.array([cur_max_element]), selected, **kwargs) break new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0] greedy_gain[cur_max_element] = new_gain if new_gain >= best_gain: best_gain = new_gain last_max_element = cur_max_element return self.index[selected] class StochasticGreedy(optimizer): def __init__(self, args, index, budget:int, already_selected=[], epsilon: float=0.9): super(StochasticGreedy, self).__init__(args, index, budget, already_selected) self.epsilon = epsilon def select(self, gain_function, update_state=None, **kwargs): assert callable(gain_function) if update_state is not None: assert callable(update_state) selected = np.zeros(self.n, dtype=bool) selected[self.already_selected] = True sample_size = max(round(-np.log(self.epsilon) * self.n / self.budget), 1) greedy_gain = np.zeros(len(self.index)) all_idx = np.arange(self.n) for i in range(sum(selected), self.budget): if i % self.args.print_freq == 0: print("| Selecting [%3d/%3d]" % (i + 1, self.budget)) # Uniformly select a subset from unselected samples with size sample_size subset = np.random.choice(all_idx[~selected], replace=False, size=min(sample_size, self.n - i)) if subset.__len__() == 0: break greedy_gain[subset] = gain_function(subset, selected, **kwargs) current_selection = greedy_gain[subset].argmax() selected[subset[current_selection]] = True greedy_gain[subset[current_selection]] = -np.inf if update_state is not None: update_state(np.array([subset[current_selection]]), selected, **kwargs) return self.index[selected] class ApproximateLazyGreedy(optimizer): def __init__(self, args, index, budget:int, already_selected=[], beta: float=0.9): super(ApproximateLazyGreedy, self).__init__(args, index, budget, already_selected) self.beta = beta def select(self, gain_function, update_state=None, **kwargs): assert callable(gain_function) if update_state is not None: assert callable(update_state) selected = np.zeros(self.n, dtype=bool) selected[self.already_selected] = True greedy_gain = np.zeros(len(self.index)) greedy_gain[~selected] = gain_function(~selected, selected, **kwargs) greedy_gain[selected] = -np.inf for i in range(sum(selected), self.budget): if i % self.args.print_freq == 0: print("| Selecting [%3d/%3d]" % (i + 1, self.budget)) while True: cur_max_element = greedy_gain.argmax() max_gain = greedy_gain[cur_max_element] new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0] if new_gain >= self.beta * max_gain: # Select cur_max_element into the current subset selected[cur_max_element] = True greedy_gain[cur_max_element] = -np.inf if update_state is not None: update_state(np.array([cur_max_element]), selected, **kwargs) break else: greedy_gain[cur_max_element] = new_gain return self.index[selected] ================================================ FILE: deepcore/methods/submodular.py ================================================ from .earlytrain import EarlyTrain import numpy as np import torch from .methods_utils import cossim_np, submodular_function, submodular_optimizer from ..nets.nets_utils import MyDataParallel class Submodular(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=False, function="LogDeterminant", greedy="ApproximateLazyGreedy", metric="cossim", **kwargs): super(Submodular, self).__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) if greedy not in submodular_optimizer.optimizer_choices: raise ModuleNotFoundError("Greedy optimizer not found.") self._greedy = greedy self._metric = metric self._function = function self.balance = balance def before_train(self): pass def after_loss(self, outputs, loss, targets, batch_inds, epoch): pass def before_epoch(self): pass def after_epoch(self): pass def before_run(self): pass def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) def calc_gradient(self, index=None): ''' Calculate gradients matrix on current network for specified training dataset. ''' self.model.eval() batch_loader = torch.utils.data.DataLoader( self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) sample_num = self.n_train if index is None else len(index) self.embedding_dim = self.model.get_last_layer().in_features # Initialize a matrix to save gradients. # (on cpu) gradients = [] for i, (input, targets) in enumerate(batch_loader): self.model_optimizer.zero_grad() outputs = self.model(input.to(self.args.device)) loss = self.criterion(outputs.requires_grad_(True), targets.to(self.args.device)).sum() batch_num = targets.shape[0] with torch.no_grad(): bias_parameters_grads = torch.autograd.grad(loss, outputs)[0] weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1, self.embedding_dim).repeat(1, self.args.num_classes, 1) *\ bias_parameters_grads.view(batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim) gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu().numpy()) gradients = np.concatenate(gradients, axis=0) return gradients def finish_run(self): if isinstance(self.model, MyDataParallel): self.model = self.model.module # Turn on the embedding recorder and the no_grad flag with self.model.embedding_recorder: self.model.no_grad = True self.train_indx = np.arange(self.n_train) if self.balance: selection_result = np.array([], dtype=np.int64) for c in range(self.num_classes): c_indx = self.train_indx[self.dst_train.targets == c] # Calculate gradients into a matrix gradients = self.calc_gradient(index=c_indx) # Instantiate a submodular function submod_function = submodular_function.__dict__[self._function](index=c_indx, similarity_kernel=lambda a, b:cossim_np(gradients[a], gradients[b])) submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=c_indx, budget=round(self.fraction * len(c_indx)), already_selected=[]) c_selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain, update_state=submod_function.update_state) selection_result = np.append(selection_result, c_selection_result) else: # Calculate gradients into a matrix gradients = self.calc_gradient() # Instantiate a submodular function submod_function = submodular_function.__dict__[self._function](index=self.train_indx, similarity_kernel=lambda a, b: cossim_np(gradients[a], gradients[b])) submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=self.train_indx, budget=self.coreset_size) selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain, update_state=submod_function.update_state) self.model.no_grad = False return {"indices": selection_result} def select(self, **kwargs): selection_result = self.run() return selection_result ================================================ FILE: deepcore/methods/uncertainty.py ================================================ from .earlytrain import EarlyTrain import torch import numpy as np class Uncertainty(EarlyTrain): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, selection_method="LeastConfidence", specific_model=None, balance=False, **kwargs): super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs) selection_choices = ["LeastConfidence", "Entropy", "Margin"] if selection_method not in selection_choices: raise NotImplementedError("Selection algorithm unavailable.") self.selection_method = selection_method self.epochs = epochs self.balance = balance def before_train(self): pass def after_loss(self, outputs, loss, targets, batch_inds, epoch): pass def before_epoch(self): pass def after_epoch(self): pass def before_run(self): pass def num_classes_mismatch(self): raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.") def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size): if batch_idx % self.args.print_freq == 0: print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % ( epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item())) def finish_run(self): if self.balance: selection_result = np.array([], dtype=np.int64) scores = [] for c in range(self.args.num_classes): class_index = np.arange(self.n_train)[self.dst_train.targets == c] scores.append(self.rank_uncertainty(class_index)) selection_result = np.append(selection_result, class_index[np.argsort(scores[-1])[ :round(len(class_index) * self.fraction)]]) else: scores = self.rank_uncertainty() selection_result = np.argsort(scores)[::-1][:self.coreset_size] return {"indices": selection_result, "scores": scores} def rank_uncertainty(self, index=None): self.model.eval() with torch.no_grad(): train_loader = torch.utils.data.DataLoader( self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch, num_workers=self.args.workers) scores = np.array([]) batch_num = len(train_loader) for i, (input, _) in enumerate(train_loader): if i % self.args.print_freq == 0: print("| Selecting for batch [%3d/%3d]" % (i + 1, batch_num)) if self.selection_method == "LeastConfidence": scores = np.append(scores, self.model(input.to(self.args.device)).max(axis=1).values.cpu().numpy()) elif self.selection_method == "Entropy": preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1).cpu().numpy() scores = np.append(scores, (np.log(preds + 1e-6) * preds).sum(axis=1)) elif self.selection_method == 'Margin': preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1) preds_argmax = torch.argmax(preds, dim=1) max_preds = preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax].clone() preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax] = -1.0 preds_sub_argmax = torch.argmax(preds, dim=1) scores = np.append(scores, (max_preds - preds[ torch.ones(preds.shape[0], dtype=bool), preds_sub_argmax]).cpu().numpy()) return scores def select(self, **kwargs): selection_result = self.run() return selection_result ================================================ FILE: deepcore/methods/uniform.py ================================================ import numpy as np from .coresetmethod import CoresetMethod class Uniform(CoresetMethod): def __init__(self, dst_train, args, fraction=0.5, random_seed=None, balance=False, replace=False, **kwargs): super().__init__(dst_train, args, fraction, random_seed) self.balance = balance self.replace = replace self.n_train = len(dst_train) def select_balance(self): """The same sampling proportions were used in each class separately.""" np.random.seed(self.random_seed) self.index = np.array([], dtype=np.int64) all_index = np.arange(self.n_train) for c in range(self.num_classes): c_index = (self.dst_train.targets == c) self.index = np.append(self.index, np.random.choice(all_index[c_index], round(self.fraction * c_index.sum().item()), replace=self.replace)) return self.index def select_no_balance(self): np.random.seed(self.random_seed) self.index = np.random.choice(np.arange(self.n_train), round(self.n_train * self.fraction), replace=self.replace) return self.index def select(self, **kwargs): return {"indices": self.select_balance() if self.balance else self.select_no_balance()} ================================================ FILE: deepcore/nets/__init__.py ================================================ from .alexnet import * from .inceptionv3 import * from .lenet import * from .mlp import * from .mobilenetv3 import * from .resnet import * from .vgg import * from .wideresnet import * ================================================ FILE: deepcore/nets/alexnet.py ================================================ import torch.nn as nn from torch import set_grad_enabled from torchvision import models import torch from .nets_utils import EmbeddingRecorder # Acknowledgement to # https://github.com/kuangliu/pytorch-cifar, # https://github.com/BIGBALLON/CIFAR-ZOO, class AlexNet_32x32(nn.Module): def __init__(self, channel, num_classes, record_embedding=False, no_grad=False): super().__init__() self.features = nn.Sequential( nn.Conv2d(channel, 128, kernel_size=5, stride=1, padding=4 if channel == 1 else 2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(128, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(192, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 192, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(192, 192, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.fc = nn.Linear(192 * 4 * 4, num_classes) self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad def get_last_layer(self): return self.fc def forward(self, x): with set_grad_enabled(not self.no_grad): x = self.features(x) x = x.view(x.size(0), -1) x = self.embedding_recorder(x) x = self.fc(x) return x class AlexNet_224x224(models.AlexNet): def __init__(self, channel: int, num_classes: int, record_embedding: bool = False, no_grad: bool = False, **kwargs): super().__init__(num_classes, **kwargs) self.embedding_recorder = EmbeddingRecorder(record_embedding) if channel != 3: self.features[0] = nn.Conv2d(channel, 64, kernel_size=11, stride=4, padding=2) self.fc = self.classifier[-1] self.classifier[-1] = self.embedding_recorder self.classifier.add_module("fc", self.fc) self.no_grad = no_grad def get_last_layer(self): return self.fc def forward(self, x: torch.Tensor) -> torch.Tensor: with set_grad_enabled(not self.no_grad): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x def AlexNet(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): if pretrained: if im_size[0] != 224 or im_size[1] != 224: raise NotImplementedError("torchvison pretrained models only accept inputs with size of 224*224") net = AlexNet_224x224(channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) from torch.hub import load_state_dict_from_url state_dict = load_state_dict_from_url('https://download.pytorch.org/models/alexnet-owt-7be5be79.pth' , progress=True) net.load_state_dict(state_dict) if channel != 3: net.features[0] = nn.Conv2d(channel, 64, kernel_size=11, stride=4, padding=2) if num_classes != 1000: net.fc = nn.Linear(4096, num_classes) net.classifier[-1] = net.fc elif im_size[0] == 224 and im_size[1] == 224: net = AlexNet_224x224(channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( channel == 3 and im_size[0] == 32 and im_size[1] == 32): net = AlexNet_32x32(channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) else: raise NotImplementedError("Network Architecture for current dataset has not been implemented.") return net ================================================ FILE: deepcore/nets/inceptionv3.py ================================================ import torch import torch.nn as nn from torchvision.models import inception from .nets_utils import EmbeddingRecorder class BasicConv2d(nn.Module): def __init__(self, input_channels, output_channels, **kwargs): super().__init__() self.conv = nn.Conv2d(input_channels, output_channels, bias=False, **kwargs) self.bn = nn.BatchNorm2d(output_channels) self.relu = nn.ReLU(inplace=True) def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) return x # same naive inception module class InceptionA(nn.Module): def __init__(self, input_channels, pool_features): super().__init__() self.branch1x1 = BasicConv2d(input_channels, 64, kernel_size=1) self.branch5x5 = nn.Sequential( BasicConv2d(input_channels, 48, kernel_size=1), BasicConv2d(48, 64, kernel_size=5, padding=2) ) self.branch3x3 = nn.Sequential( BasicConv2d(input_channels, 64, kernel_size=1), BasicConv2d(64, 96, kernel_size=3, padding=1), BasicConv2d(96, 96, kernel_size=3, padding=1) ) self.branchpool = nn.Sequential( nn.AvgPool2d(kernel_size=3, stride=1, padding=1), BasicConv2d(input_channels, pool_features, kernel_size=3, padding=1) ) def forward(self, x): # x -> 1x1(same) branch1x1 = self.branch1x1(x) # x -> 1x1 -> 5x5(same) branch5x5 = self.branch5x5(x) # branch5x5 = self.branch5x5_2(branch5x5) # x -> 1x1 -> 3x3 -> 3x3(same) branch3x3 = self.branch3x3(x) # x -> pool -> 1x1(same) branchpool = self.branchpool(x) outputs = [branch1x1, branch5x5, branch3x3, branchpool] return torch.cat(outputs, 1) # downsample # Factorization into smaller convolutions class InceptionB(nn.Module): def __init__(self, input_channels): super().__init__() self.branch3x3 = BasicConv2d(input_channels, 384, kernel_size=3, stride=2) self.branch3x3stack = nn.Sequential( BasicConv2d(input_channels, 64, kernel_size=1), BasicConv2d(64, 96, kernel_size=3, padding=1), BasicConv2d(96, 96, kernel_size=3, stride=2) ) self.branchpool = nn.MaxPool2d(kernel_size=3, stride=2) def forward(self, x): # x - > 3x3(downsample) branch3x3 = self.branch3x3(x) # x -> 3x3 -> 3x3(downsample) branch3x3stack = self.branch3x3stack(x) # x -> avgpool(downsample) branchpool = self.branchpool(x) # """We can use two parallel stride 2 blocks: P and C. P is a pooling # layer (either average or maximum pooling) the activation, both of # them are stride 2 the filter banks of which are concatenated as in # figure 10.""" outputs = [branch3x3, branch3x3stack, branchpool] return torch.cat(outputs, 1) # Factorizing Convolutions with Large Filter Size class InceptionC(nn.Module): def __init__(self, input_channels, channels_7x7): super().__init__() self.branch1x1 = BasicConv2d(input_channels, 192, kernel_size=1) c7 = channels_7x7 # In theory, we could go even further and argue that one can replace any n × n # convolution by a 1 × n convolution followed by a n × 1 convolution and the # computational cost saving increases dramatically as n grows (see figure 6). self.branch7x7 = nn.Sequential( BasicConv2d(input_channels, c7, kernel_size=1), BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) ) self.branch7x7stack = nn.Sequential( BasicConv2d(input_channels, c7, kernel_size=1), BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)), BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) ) self.branch_pool = nn.Sequential( nn.AvgPool2d(kernel_size=3, stride=1, padding=1), BasicConv2d(input_channels, 192, kernel_size=1), ) def forward(self, x): # x -> 1x1(same) branch1x1 = self.branch1x1(x) # x -> 1layer 1*7 and 7*1 (same) branch7x7 = self.branch7x7(x) # x-> 2layer 1*7 and 7*1(same) branch7x7stack = self.branch7x7stack(x) # x-> avgpool (same) branchpool = self.branch_pool(x) outputs = [branch1x1, branch7x7, branch7x7stack, branchpool] return torch.cat(outputs, 1) class InceptionD(nn.Module): def __init__(self, input_channels): super().__init__() self.branch3x3 = nn.Sequential( BasicConv2d(input_channels, 192, kernel_size=1), BasicConv2d(192, 320, kernel_size=3, stride=2) ) self.branch7x7 = nn.Sequential( BasicConv2d(input_channels, 192, kernel_size=1), BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)), BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)), BasicConv2d(192, 192, kernel_size=3, stride=2) ) self.branchpool = nn.AvgPool2d(kernel_size=3, stride=2) def forward(self, x): # x -> 1x1 -> 3x3(downsample) branch3x3 = self.branch3x3(x) # x -> 1x1 -> 1x7 -> 7x1 -> 3x3 (downsample) branch7x7 = self.branch7x7(x) # x -> avgpool (downsample) branchpool = self.branchpool(x) outputs = [branch3x3, branch7x7, branchpool] return torch.cat(outputs, 1) # same class InceptionE(nn.Module): def __init__(self, input_channels): super().__init__() self.branch1x1 = BasicConv2d(input_channels, 320, kernel_size=1) self.branch3x3_1 = BasicConv2d(input_channels, 384, kernel_size=1) self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) self.branch3x3stack_1 = BasicConv2d(input_channels, 448, kernel_size=1) self.branch3x3stack_2 = BasicConv2d(448, 384, kernel_size=3, padding=1) self.branch3x3stack_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) self.branch3x3stack_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) self.branch_pool = nn.Sequential( nn.AvgPool2d(kernel_size=3, stride=1, padding=1), BasicConv2d(input_channels, 192, kernel_size=1) ) def forward(self, x): # x -> 1x1 (same) branch1x1 = self.branch1x1(x) # x -> 1x1 -> 3x1 # x -> 1x1 -> 1x3 # concatenate(3x1, 1x3) # """7. Inception modules with expanded the filter bank outputs. # This architecture is used on the coarsest (8 × 8) grids to promote # high dimensional representations, as suggested by principle # 2 of Section 2.""" branch3x3 = self.branch3x3_1(x) branch3x3 = [ self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3) ] branch3x3 = torch.cat(branch3x3, 1) # x -> 1x1 -> 3x3 -> 1x3 # x -> 1x1 -> 3x3 -> 3x1 # concatenate(1x3, 3x1) branch3x3stack = self.branch3x3stack_1(x) branch3x3stack = self.branch3x3stack_2(branch3x3stack) branch3x3stack = [ self.branch3x3stack_3a(branch3x3stack), self.branch3x3stack_3b(branch3x3stack) ] branch3x3stack = torch.cat(branch3x3stack, 1) branchpool = self.branch_pool(x) outputs = [branch1x1, branch3x3, branch3x3stack, branchpool] return torch.cat(outputs, 1) class InceptionV3_32x32(nn.Module): def __init__(self, channel, num_classes, record_embedding=False, no_grad=False): super().__init__() self.Conv2d_1a_3x3 = BasicConv2d(channel, 32, kernel_size=3, padding=3 if channel == 1 else 1) self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3, padding=1) self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) # naive inception module self.Mixed_5b = InceptionA(192, pool_features=32) self.Mixed_5c = InceptionA(256, pool_features=64) self.Mixed_5d = InceptionA(288, pool_features=64) # downsample self.Mixed_6a = InceptionB(288) self.Mixed_6b = InceptionC(768, channels_7x7=128) self.Mixed_6c = InceptionC(768, channels_7x7=160) self.Mixed_6d = InceptionC(768, channels_7x7=160) self.Mixed_6e = InceptionC(768, channels_7x7=192) # downsample self.Mixed_7a = InceptionD(768) self.Mixed_7b = InceptionE(1280) self.Mixed_7c = InceptionE(2048) # 6*6 feature size self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout2d() self.linear = nn.Linear(2048, num_classes) self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad def get_last_layer(self): return self.linear def forward(self, x): with torch.set_grad_enabled(not self.no_grad): # 32 -> 30 x = self.Conv2d_1a_3x3(x) x = self.Conv2d_2a_3x3(x) x = self.Conv2d_2b_3x3(x) x = self.Conv2d_3b_1x1(x) x = self.Conv2d_4a_3x3(x) # 30 -> 30 x = self.Mixed_5b(x) x = self.Mixed_5c(x) x = self.Mixed_5d(x) # 30 -> 14 # Efficient Grid Size Reduction to avoid representation # bottleneck x = self.Mixed_6a(x) # 14 -> 14 # """In practice, we have found that employing this factorization does not # work well on early layers, but it gives very good results on medium # grid-sizes (On m × m feature maps, where m ranges between 12 and 20). # On that level, very good results can be achieved by using 1 × 7 convolutions # followed by 7 × 1 convolutions.""" x = self.Mixed_6b(x) x = self.Mixed_6c(x) x = self.Mixed_6d(x) x = self.Mixed_6e(x) # 14 -> 6 # Efficient Grid Size Reduction x = self.Mixed_7a(x) # 6 -> 6 # We are using this solution only on the coarsest grid, # since that is the place where producing high dimensional # sparse representation is the most critical as the ratio of # local processing (by 1 × 1 convolutions) is increased compared # to the spatial aggregation.""" x = self.Mixed_7b(x) x = self.Mixed_7c(x) # 6 -> 1 x = self.avgpool(x) x = self.dropout(x) x = x.view(x.size(0), -1) x = self.embedding_recorder(x) x = self.linear(x) return x class InceptionV3_224x224(inception.Inception3): def __init__(self, channel: int, num_classes: int, record_embedding: bool = False, no_grad: bool = False, **kwargs): super().__init__(num_classes=num_classes, **kwargs) self.embedding_recorder = EmbeddingRecorder(record_embedding) if channel != 3: self.Conv2d_1a_3x3 = inception.conv_block(channel, 32, kernel_size=3, stride=2) self.no_grad = no_grad def get_last_layer(self): return self.fc def _forward(self, x): with torch.set_grad_enabled(not self.no_grad): # N x 3 x 299 x 299 x = self.Conv2d_1a_3x3(x) # N x 32 x 149 x 149 x = self.Conv2d_2a_3x3(x) # N x 32 x 147 x 147 x = self.Conv2d_2b_3x3(x) # N x 64 x 147 x 147 x = self.maxpool1(x) # N x 64 x 73 x 73 x = self.Conv2d_3b_1x1(x) # N x 80 x 73 x 73 x = self.Conv2d_4a_3x3(x) # N x 192 x 71 x 71 x = self.maxpool2(x) # N x 192 x 35 x 35 x = self.Mixed_5b(x) # N x 256 x 35 x 35 x = self.Mixed_5c(x) # N x 288 x 35 x 35 x = self.Mixed_5d(x) # N x 288 x 35 x 35 x = self.Mixed_6a(x) # N x 768 x 17 x 17 x = self.Mixed_6b(x) # N x 768 x 17 x 17 x = self.Mixed_6c(x) # N x 768 x 17 x 17 x = self.Mixed_6d(x) # N x 768 x 17 x 17 x = self.Mixed_6e(x) # N x 768 x 17 x 17 aux = None if self.AuxLogits is not None: if self.training: aux = self.AuxLogits(x) # N x 768 x 17 x 17 x = self.Mixed_7a(x) # N x 1280 x 8 x 8 x = self.Mixed_7b(x) # N x 2048 x 8 x 8 x = self.Mixed_7c(x) # N x 2048 x 8 x 8 # Adaptive average pooling x = self.avgpool(x) # N x 2048 x 1 x 1 x = self.dropout(x) # N x 2048 x 1 x 1 x = torch.flatten(x, 1) # N x 2048 x = self.embedding_recorder(x) x = self.fc(x) # N x 1000 (num_classes) return x, aux def InceptionV3(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): if pretrained: if im_size[0] != 224 or im_size[1] != 224: raise NotImplementedError("torchvison pretrained models only accept inputs with size of 224*224") net = InceptionV3_224x224(channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) from torch.hub import load_state_dict_from_url state_dict = load_state_dict_from_url(inception.model_urls["inception_v3_google"], progress=True) net.load_state_dict(state_dict) if channel != 3: net.Conv2d_1a_3x3 = inception.conv_block(channel, 32, kernel_size=3, stride=2) if num_classes != 1000: net.fc = nn.Linear(net.fc.in_features, num_classes) elif im_size[0] == 224 and im_size[1] == 224: net = InceptionV3_224x224(channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( channel == 3 and im_size[0] == 32 and im_size[1] == 32): net = InceptionV3_32x32(channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) else: raise NotImplementedError("Network Architecture for current dataset has not been implemented.") return net ================================================ FILE: deepcore/nets/lenet.py ================================================ import torch.nn as nn import torch.nn.functional as F from torch import set_grad_enabled from .nets_utils import EmbeddingRecorder # Acknowledgement to # https://github.com/kuangliu/pytorch-cifar, # https://github.com/BIGBALLON/CIFAR-ZOO, class LeNet(nn.Module): def __init__(self, channel, num_classes, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): if pretrained: raise NotImplementedError("torchvison pretrained models not available.") super(LeNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(channel, 6, kernel_size=5, padding=2 if channel == 1 else 0), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.fc_1 = nn.Linear(16 * 53 * 53 if im_size[0] == im_size[1] == 224 else 16 * 5 * 5, 120) self.fc_2 = nn.Linear(120, 84) self.fc_3 = nn.Linear(84, num_classes) self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad def get_last_layer(self): return self.fc_3 def forward(self, x): with set_grad_enabled(not self.no_grad): x = self.features(x) x = x.view(x.size(0), -1) x = F.relu(self.fc_1(x)) x = F.relu(self.fc_2(x)) x = self.embedding_recorder(x) x = self.fc_3(x) return x ================================================ FILE: deepcore/nets/mlp.py ================================================ import torch.nn as nn import torch.nn.functional as F from torch import set_grad_enabled from .nets_utils import EmbeddingRecorder # Acknowledgement to # https://github.com/kuangliu/pytorch-cifar, # https://github.com/BIGBALLON/CIFAR-ZOO, ''' MLP ''' class MLP(nn.Module): def __init__(self, channel, num_classes, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): if pretrained: raise NotImplementedError("torchvison pretrained models not available.") super(MLP, self).__init__() self.fc_1 = nn.Linear(im_size[0] * im_size[1] * channel, 128) self.fc_2 = nn.Linear(128, 128) self.fc_3 = nn.Linear(128, num_classes) self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad def get_last_layer(self): return self.fc_3 def forward(self, x): with set_grad_enabled(not self.no_grad): out = x.view(x.size(0), -1) out = F.relu(self.fc_1(out)) out = F.relu(self.fc_2(out)) out = self.embedding_recorder(out) out = self.fc_3(out) return out ================================================ FILE: deepcore/nets/mobilenetv3.py ================================================ import torch.nn as nn from torch import set_grad_enabled, flatten, Tensor from torchvision.models import mobilenetv3 from .nets_utils import EmbeddingRecorder import math '''MobileNetV3 in PyTorch. Paper: "Inverted Residuals and Linear Bottlenecks:Mobile Networks for Classification, Detection and Segmentation" Acknowlegement to: https://github.com/d-li14/mobilenetv3.pytorch/blob/master/mobilenetv3.py ''' def _make_divisible(v, divisor, min_value=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py """ if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_v < 0.9 * v: new_v += divisor return new_v class h_sigmoid(nn.Module): def __init__(self, inplace=True): super(h_sigmoid, self).__init__() self.relu = nn.ReLU6(inplace=inplace) def forward(self, x): return self.relu(x + 3) / 6 class h_swish(nn.Module): def __init__(self, inplace=True): super(h_swish, self).__init__() self.sigmoid = h_sigmoid(inplace=inplace) def forward(self, x): return x * self.sigmoid(x) class SELayer(nn.Module): def __init__(self, channel, reduction=4): super(SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( nn.Linear(channel, _make_divisible(channel // reduction, 8)), nn.ReLU(inplace=True), nn.Linear(_make_divisible(channel // reduction, 8), channel), h_sigmoid() ) def forward(self, x): b, c, _, _ = x.size() y = self.avg_pool(x).view(b, c) y = self.fc(y).view(b, c, 1, 1) return x * y def conv_3x3_bn(inp, oup, stride, padding=1): return nn.Sequential( nn.Conv2d(inp, oup, 3, stride, padding, bias=False), nn.BatchNorm2d(oup), h_swish() ) def conv_1x1_bn(inp, oup): return nn.Sequential( nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), h_swish() ) class InvertedResidual(nn.Module): def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs): super(InvertedResidual, self).__init__() assert stride in [1, 2] self.identity = stride == 1 and inp == oup if inp == hidden_dim: self.conv = nn.Sequential( # dw nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False), nn.BatchNorm2d(hidden_dim), h_swish() if use_hs else nn.ReLU(inplace=True), # Squeeze-and-Excite SELayer(hidden_dim) if use_se else nn.Identity(), # pw-linear nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), ) else: self.conv = nn.Sequential( # pw nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), nn.BatchNorm2d(hidden_dim), h_swish() if use_hs else nn.ReLU(inplace=True), # dw nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False), nn.BatchNorm2d(hidden_dim), # Squeeze-and-Excite SELayer(hidden_dim) if use_se else nn.Identity(), h_swish() if use_hs else nn.ReLU(inplace=True), # pw-linear nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), ) def forward(self, x): if self.identity: return x + self.conv(x) else: return self.conv(x) class MobileNetV3_32x32(nn.Module): def __init__(self, cfgs, mode, channel=3, num_classes=1000, record_embedding=False, no_grad=False, width_mult=1.): super(MobileNetV3_32x32, self).__init__() # setting of inverted residual blocks self.cfgs = cfgs assert mode in ['mobilenet_v3_large', 'mobilenet_v3_small'] self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad # building first layer input_channel = _make_divisible(16 * width_mult, 8) layers = [conv_3x3_bn(channel, input_channel, 2, padding=3 if channel == 1 else 1)] # building inverted residual blocks block = InvertedResidual for k, t, c, use_se, use_hs, s in self.cfgs: output_channel = _make_divisible(c * width_mult, 8) exp_size = _make_divisible(input_channel * t, 8) layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs)) input_channel = output_channel self.features = nn.Sequential(*layers) # building last several layers self.conv = conv_1x1_bn(input_channel, exp_size) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) output_channel = {'mobilenet_v3_large': 1280, 'mobilenet_v3_small': 1024} output_channel = _make_divisible(output_channel[mode] * width_mult, 8) if width_mult > 1.0 else output_channel[ mode] self.classifier = nn.Sequential( nn.Linear(exp_size, output_channel), h_swish(), nn.Dropout(0.2), self.embedding_recorder, nn.Linear(output_channel, num_classes), ) self._initialize_weights() def forward(self, x): with set_grad_enabled(not self.no_grad): x = self.features(x) x = self.conv(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.zero_() def get_last_layer(self): return self.classifier[-1] class MobileNetV3_224x224(mobilenetv3.MobileNetV3): def __init__(self, inverted_residual_setting, last_channel, channel=3, num_classes=1000, record_embedding=False, no_grad=False, **kwargs): super(MobileNetV3_224x224, self).__init__(inverted_residual_setting, last_channel, num_classes=num_classes, **kwargs) self.embedding_recorder = EmbeddingRecorder(record_embedding) self.fc = self.classifier[-1] self.classifier[-1] = self.embedding_recorder self.classifier.add_module("fc", self.fc) self.no_grad = no_grad def get_last_layer(self): return self.fc def _forward_impl(self, x: Tensor) -> Tensor: with set_grad_enabled(not self.no_grad): x = self.features(x) x = self.avgpool(x) x = flatten(x, 1) x = self.classifier(x) return x def MobileNetV3(arch: str, channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False, **kwargs): arch = arch.lower() if pretrained: if channel != 3: raise NotImplementedError("Network Architecture for current dataset has not been implemented.") inverted_residual_setting, last_channel = mobilenetv3._mobilenet_v3_conf(arch) net = MobileNetV3_224x224(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad, **kwargs) from torch.hub import load_state_dict_from_url state_dict = load_state_dict_from_url(mobilenetv3.model_urls[arch], progress=True) net.load_state_dict(state_dict) if num_classes != 1000: net.fc = nn.Linear(last_channel, num_classes) net.classifier[-1] = net.fc elif im_size[0] == 224 and im_size[1] == 224: if channel != 3: raise NotImplementedError("Network Architecture for current dataset has not been implemented.") inverted_residual_setting, last_channel = mobilenetv3._mobilenet_v3_conf(arch) net = MobileNetV3_224x224(inverted_residual_setting=inverted_residual_setting, last_channel=last_channel, channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad, **kwargs) elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( channel == 3 and im_size[0] == 32 and im_size[1] == 32): if arch == "mobilenet_v3_large": cfgs = [ # k, t, c, SE, HS, s [3, 1, 16, 0, 0, 1], [3, 4, 24, 0, 0, 2], [3, 3, 24, 0, 0, 1], [5, 3, 40, 1, 0, 2], [5, 3, 40, 1, 0, 1], [5, 3, 40, 1, 0, 1], [3, 6, 80, 0, 1, 2], [3, 2.5, 80, 0, 1, 1], [3, 2.3, 80, 0, 1, 1], [3, 2.3, 80, 0, 1, 1], [3, 6, 112, 1, 1, 1], [3, 6, 112, 1, 1, 1], [5, 6, 160, 1, 1, 2], [5, 6, 160, 1, 1, 1], [5, 6, 160, 1, 1, 1] ] net = MobileNetV3_32x32(cfgs, arch, channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "mobilenet_v3_small": cfgs = [ # k, t, c, SE, HS, s [3, 1, 16, 1, 0, 2], [3, 4.5, 24, 0, 0, 2], [3, 3.67, 24, 0, 0, 1], [5, 4, 40, 1, 1, 2], [5, 6, 40, 1, 1, 1], [5, 6, 40, 1, 1, 1], [5, 3, 48, 1, 1, 1], [5, 3, 48, 1, 1, 1], [5, 6, 96, 1, 1, 2], [5, 6, 96, 1, 1, 1], [5, 6, 96, 1, 1, 1], ] net = MobileNetV3_32x32(cfgs, arch, channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) else: raise ValueError("Model architecture not found.") else: raise NotImplementedError("Network Architecture for current dataset has not been implemented.") return net def MobileNetV3Large(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False, **kwargs): return MobileNetV3("mobilenet_v3_large", channel, num_classes, im_size, record_embedding, no_grad, pretrained, **kwargs) def MobileNetV3Small(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False, **kwargs): return MobileNetV3("mobilenet_v3_small", channel, num_classes, im_size, record_embedding, no_grad, pretrained, **kwargs) ================================================ FILE: deepcore/nets/nets_utils/__init__.py ================================================ from .parallel import * from .recorder import * ================================================ FILE: deepcore/nets/nets_utils/parallel.py ================================================ from torch.nn import DataParallel class MyDataParallel(DataParallel): def __getattr__(self, name): try: return super().__getattr__(name) except AttributeError: return getattr(self.module, name) def __setattr__(self, name, value): try: if name == "no_grad": return setattr(self.module, name, value) return super().__setattr__(name, value) except AttributeError: return setattr(self.module, name, value) ================================================ FILE: deepcore/nets/nets_utils/recorder.py ================================================ from torch import nn class EmbeddingRecorder(nn.Module): def __init__(self, record_embedding: bool = False): super().__init__() self.record_embedding = record_embedding def forward(self, x): if self.record_embedding: self.embedding = x return x def __enter__(self): self.record_embedding = True def __exit__(self, exc_type, exc_val, exc_tb): self.record_embedding = False ================================================ FILE: deepcore/nets/resnet.py ================================================ import torch.nn as nn import torch.nn.functional as F from torch import set_grad_enabled, flatten, Tensor from .nets_utils import EmbeddingRecorder from torchvision.models import resnet # Acknowledgement to # https://github.com/kuangliu/pytorch-cifar, # https://github.com/BIGBALLON/CIFAR-ZOO, def conv3x3(in_planes, out_planes, stride=1): return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) class BasicBlock(nn.Module): expansion = 1 def __init__(self, in_planes, planes, stride=1): super(BasicBlock, self).__init__() self.conv1 = conv3x3(in_planes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) out += self.shortcut(x) out = F.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, in_planes, planes, stride=1): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(self.expansion * planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = F.relu(self.bn2(self.conv2(out))) out = self.bn3(self.conv3(out)) out += self.shortcut(x) out = F.relu(out) return out class ResNet_32x32(nn.Module): def __init__(self, block, num_blocks, channel=3, num_classes=10, record_embedding: bool = False, no_grad: bool = False): super().__init__() self.in_planes = 64 self.conv1 = conv3x3(channel, 64) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = nn.Linear(512 * block.expansion, num_classes) self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad def get_last_layer(self): return self.linear def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): with set_grad_enabled(not self.no_grad): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.embedding_recorder(out) out = self.linear(out) return out class ResNet_224x224(resnet.ResNet): def __init__(self, block, layers, channel: int, num_classes: int, record_embedding: bool = False, no_grad: bool = False, **kwargs): super().__init__(block, layers, **kwargs) self.embedding_recorder = EmbeddingRecorder(record_embedding) if channel != 3: self.conv1 = nn.Conv2d(channel, 64, kernel_size=7, stride=2, padding=3, bias=False) if num_classes != 1000: self.fc = nn.Linear(self.fc.in_features, num_classes) self.no_grad = no_grad def get_last_layer(self): return self.fc def _forward_impl(self, x: Tensor) -> Tensor: # See note [TorchScript super()] with set_grad_enabled(not self.no_grad): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = flatten(x, 1) x = self.embedding_recorder(x) x = self.fc(x) return x def ResNet(arch: str, channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): arch = arch.lower() if pretrained: if arch == "resnet18": net = ResNet_224x224(resnet.BasicBlock, [2, 2, 2, 2], channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet34": net = ResNet_224x224(resnet.BasicBlock, [3, 4, 6, 3], channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet50": net = ResNet_224x224(resnet.Bottleneck, [3, 4, 6, 3], channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet101": net = ResNet_224x224(resnet.Bottleneck, [3, 4, 23, 3], channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet152": net = ResNet_224x224(resnet.Bottleneck, [3, 8, 36, 3], channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) else: raise ValueError("Model architecture not found.") from torch.hub import load_state_dict_from_url state_dict = load_state_dict_from_url(resnet.model_urls[arch], progress=True) net.load_state_dict(state_dict) if channel != 3: net.conv1 = nn.Conv2d(channel, 64, kernel_size=7, stride=2, padding=3, bias=False) if num_classes != 1000: net.fc = nn.Linear(net.fc.in_features, num_classes) elif im_size[0] == 224 and im_size[1] == 224: if arch == "resnet18": net = ResNet_224x224(resnet.BasicBlock, [2, 2, 2, 2], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet34": net = ResNet_224x224(resnet.BasicBlock, [3, 4, 6, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet50": net = ResNet_224x224(resnet.Bottleneck, [3, 4, 6, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet101": net = ResNet_224x224(resnet.Bottleneck, [3, 4, 23, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet152": net = ResNet_224x224(resnet.Bottleneck, [3, 8, 36, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) else: raise ValueError("Model architecture not found.") elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( channel == 3 and im_size[0] == 32 and im_size[1] == 32): if arch == "resnet18": net = ResNet_32x32(BasicBlock, [2, 2, 2, 2], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet34": net = ResNet_32x32(BasicBlock, [3, 4, 6, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet50": net = ResNet_32x32(Bottleneck, [3, 4, 6, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet101": net = ResNet_32x32(Bottleneck, [3, 4, 23, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif arch == "resnet152": net = ResNet_32x32(Bottleneck, [3, 8, 36, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) else: raise ValueError("Model architecture not found.") else: raise NotImplementedError("Network Architecture for current dataset has not been implemented.") return net def ResNet18(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return ResNet("resnet18", channel, num_classes, im_size, record_embedding, no_grad, pretrained) def ResNet34(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return ResNet("resnet34", channel, num_classes, im_size, record_embedding, no_grad, pretrained) def ResNet50(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return ResNet("resnet50", channel, num_classes, im_size, record_embedding, no_grad, pretrained) def ResNet101(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return ResNet("resnet101", channel, num_classes, im_size, record_embedding, no_grad, pretrained) def ResNet152(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return ResNet("resnet152", channel, num_classes, im_size, record_embedding, no_grad, pretrained) ================================================ FILE: deepcore/nets/vgg.py ================================================ import torch.nn as nn from torch import set_grad_enabled, flatten, Tensor from .nets_utils import EmbeddingRecorder from torchvision.models import vgg # Acknowledgement to # https://github.com/kuangliu/pytorch-cifar, # https://github.com/BIGBALLON/CIFAR-ZOO, cfg_vgg = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } class VGG_32x32(nn.Module): def __init__(self, vgg_name, channel, num_classes, record_embedding=False, no_grad=False): super(VGG_32x32, self).__init__() self.channel = channel self.features = self._make_layers(cfg_vgg[vgg_name]) self.classifier = nn.Linear(512 if vgg_name != 'VGGS' else 128, num_classes) self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad def forward(self, x): with set_grad_enabled(not self.no_grad): x = self.features(x) x = x.view(x.size(0), -1) x = self.embedding_recorder(x) x = self.classifier(x) return x def get_last_layer(self): return self.classifier def _make_layers(self, cfg): layers = [] in_channels = self.channel for ic, x in enumerate(cfg): if x == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=3 if self.channel == 1 and ic == 0 else 1), nn.BatchNorm2d(x), nn.ReLU(inplace=True)] in_channels = x layers += [nn.AvgPool2d(kernel_size=1, stride=1)] return nn.Sequential(*layers) class VGG_224x224(vgg.VGG): def __init__(self, features: nn.Module, channel: int, num_classes: int, record_embedding: bool = False, no_grad: bool = False, **kwargs): super(VGG_224x224, self).__init__(features, num_classes, **kwargs) self.embedding_recorder = EmbeddingRecorder(record_embedding) if channel != 3: self.features[0] = nn.Conv2d(channel, 64, kernel_size=3, padding=1) self.fc = self.classifier[-1] self.classifier[-1] = self.embedding_recorder self.classifier.add_module("fc", self.fc) self.no_grad = no_grad def get_last_layer(self): return self.fc def forward(self, x: Tensor) -> Tensor: with set_grad_enabled(not self.no_grad): x = self.features(x) x = self.avgpool(x) x = flatten(x, 1) x = self.classifier(x) return x def VGG(arch: str, channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): arch = arch.lower() if pretrained: if im_size[0] != 224 or im_size[1] != 224: raise NotImplementedError("torchvison pretrained models only accept inputs with size of 224*224") net = VGG_224x224(features=vgg.make_layers(cfg_vgg[arch], True), channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad) from torch.hub import load_state_dict_from_url state_dict = load_state_dict_from_url(vgg.model_urls[arch], progress=True) net.load_state_dict(state_dict) if channel != 3: net.features[0] = nn.Conv2d(channel, 64, kernel_size=3, padding=1) if num_classes != 1000: net.fc = nn.Linear(4096, num_classes) net.classifier[-1] = net.fc elif im_size[0] == 224 and im_size[1] == 224: net = VGG_224x224(features=vgg.make_layers(cfg_vgg[arch], True), channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( channel == 3 and im_size[0] == 32 and im_size[1] == 32): net = VGG_32x32(arch, channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad) else: raise NotImplementedError("Network Architecture for current dataset has not been implemented.") return net def VGG11(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return VGG("vgg11", channel, num_classes, im_size, record_embedding, no_grad, pretrained) def VGG13(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return VGG('vgg13', channel, num_classes, im_size, record_embedding, no_grad, pretrained) def VGG16(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return VGG('vgg16', channel, num_classes, im_size, record_embedding, no_grad, pretrained) def VGG19(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return VGG('vgg19', channel, num_classes, im_size, record_embedding, no_grad, pretrained) ================================================ FILE: deepcore/nets/wideresnet.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from .nets_utils import EmbeddingRecorder from torchvision.models import resnet from .resnet import ResNet_224x224 # Acknowledgement to # https://github.com/xternalz/WideResNet-pytorch class BasicBlock(nn.Module): def __init__(self, in_planes, out_planes, stride, dropRate=0.0): super(BasicBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.relu1 = nn.ReLU(inplace=True) self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(out_planes) self.relu2 = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False) self.droprate = dropRate self.equalInOut = (in_planes == out_planes) self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) or None def forward(self, x): if not self.equalInOut: x = self.relu1(self.bn1(x)) else: out = self.relu1(self.bn1(x)) out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) if self.droprate > 0: out = F.dropout(out, p=self.droprate, training=self.training) out = self.conv2(out) return torch.add(x if self.equalInOut else self.convShortcut(x), out) class NetworkBlock(nn.Module): def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0): super(NetworkBlock, self).__init__() self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate) def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate): layers = [] for i in range(int(nb_layers)): layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate)) return nn.Sequential(*layers) def forward(self, x): return self.layer(x) class WideResNet_32x32(nn.Module): def __init__(self, depth, num_classes, channel=3, widen_factor=1, drop_rate=0.0, record_embedding=False, no_grad=False): super(WideResNet_32x32, self).__init__() nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor] assert ((depth - 4) % 6 == 0) n = (depth - 4) / 6 block = BasicBlock # 1st conv before any network block self.conv1 = nn.Conv2d(channel, nChannels[0], kernel_size=3, stride=1, padding=3 if channel == 1 else 1, bias=False) # 1st block self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, drop_rate) # 2nd block self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, drop_rate) # 3rd block self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, drop_rate) # global average pooling and classifier self.bn1 = nn.BatchNorm2d(nChannels[3]) self.relu = nn.ReLU(inplace=True) self.fc = nn.Linear(nChannels[3], num_classes) self.nChannels = nChannels[3] for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_() self.embedding_recorder = EmbeddingRecorder(record_embedding) self.no_grad = no_grad def get_last_layer(self): return self.fc def forward(self, x): with torch.set_grad_enabled(not self.no_grad): out = self.conv1(x) out = self.block1(out) out = self.block2(out) out = self.block3(out) out = self.relu(self.bn1(out)) out = F.avg_pool2d(out, 8) out = out.view(-1, self.nChannels) out = self.embedding_recorder(out) return self.fc(out) def WideResNet(arch: str, channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): arch = arch.lower() if pretrained: if im_size[0] != 224 or im_size[1] != 224: raise NotImplementedError("torchvison pretrained models only accept inputs with size of 224*224") if arch == "wrn502": arch = "wide_resnet50_2" net = ResNet_224x224(resnet.Bottleneck, [3, 4, 6, 3], channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad, width_per_group=64 * 2) elif arch == "wrn1012": arch = "wide_resnet101_2" net = ResNet_224x224(resnet.Bottleneck, [3, 4, 23, 3], channel=3, num_classes=1000, record_embedding=record_embedding, no_grad=no_grad, width_per_group=64 * 2) else: raise ValueError("Model architecture not found.") from torch.hub import load_state_dict_from_url state_dict = load_state_dict_from_url(resnet.model_urls[arch], progress=True) net.load_state_dict(state_dict) if channel != 3: net.conv1 = nn.Conv2d(channel, 64, kernel_size=7, stride=2, padding=3, bias=False) if num_classes != 1000: net.fc = nn.Linear(net.fc.in_features, num_classes) elif im_size[0] == 224 and im_size[1] == 224: # Use torchvision models without pretrained parameters if arch == "wrn502": arch = "wide_resnet50_2" net = ResNet_224x224(resnet.Bottleneck, [3, 4, 6, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad, width_per_group=64 * 2) elif arch == "wrn1012": arch = "wide_resnet101_2" net = ResNet_224x224(resnet.Bottleneck, [3, 4, 23, 3], channel=channel, num_classes=num_classes, record_embedding=record_embedding, no_grad=no_grad, width_per_group=64 * 2) else: raise ValueError("Model architecture not found.") elif (channel == 1 and im_size[0] == 28 and im_size[1] == 28) or ( channel == 3 and im_size[0] == 32 and im_size[1] == 32): if arch == "wrn168": net = WideResNet_32x32(16, num_classes, channel, 8) elif arch == "wrn2810": net = WideResNet_32x32(28, num_classes, channel, 10) elif arch == "wrn282": net = WideResNet_32x32(28, num_classes, channel, 2) else: raise ValueError("Model architecture not found.") else: raise NotImplementedError("Network Architecture for current dataset has not been implemented.") return net def WRN168(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return WideResNet("wrn168", channel, num_classes, im_size, record_embedding, no_grad, pretrained) def WRN2810(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return WideResNet("wrn2810", channel, num_classes, im_size, record_embedding, no_grad, pretrained) def WRN282(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return WideResNet('wrn282', channel, num_classes, im_size, record_embedding, no_grad, pretrained) def WRN502(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return WideResNet("wrn502", channel, num_classes, im_size, record_embedding, no_grad, pretrained) def WRN1012(channel: int, num_classes: int, im_size, record_embedding: bool = False, no_grad: bool = False, pretrained: bool = False): return WideResNet("wrn1012", channel, num_classes, im_size, record_embedding, no_grad, pretrained) ================================================ FILE: main.py ================================================ import os import torch.nn as nn import argparse import deepcore.nets as nets import deepcore.datasets as datasets import deepcore.methods as methods from torchvision import transforms from utils import * from datetime import datetime from time import sleep def main(): parser = argparse.ArgumentParser(description='Parameter Processing') # Basic arguments parser.add_argument('--dataset', type=str, default='CIFAR10', help='dataset') parser.add_argument('--model', type=str, default='ResNet18', help='model') parser.add_argument('--selection', type=str, default="uniform", help="selection method") parser.add_argument('--num_exp', type=int, default=5, help='the number of experiments') parser.add_argument('--num_eval', type=int, default=10, help='the number of evaluating randomly initialized models') parser.add_argument('--epochs', default=200, type=int, help='number of total epochs to run') parser.add_argument('--data_path', type=str, default='data', help='dataset path') parser.add_argument('--gpu', default=None, nargs="+", type=int, help='GPU id to use') parser.add_argument('--print_freq', '-p', default=20, type=int, help='print frequency (default: 20)') parser.add_argument('--fraction', default=0.1, type=float, help='fraction of data to be selected (default: 0.1)') parser.add_argument('--seed', default=int(time.time() * 1000) % 100000, type=int, help="random seed") parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument("--cross", type=str, nargs="+", default=None, help="models for cross-architecture experiments") # Optimizer and scheduler parser.add_argument('--optimizer', default="SGD", help='optimizer to use, e.g. SGD, Adam') parser.add_argument('--lr', type=float, default=0.1, help='learning rate for updating network parameters') parser.add_argument('--min_lr', type=float, default=1e-4, help='minimum learning rate') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum (default: 0.9)') parser.add_argument('-wd', '--weight_decay', default=5e-4, type=float, metavar='W', help='weight decay (default: 5e-4)', dest='weight_decay') parser.add_argument("--nesterov", default=True, type=str_to_bool, help="if set nesterov") parser.add_argument("--scheduler", default="CosineAnnealingLR", type=str, help= "Learning rate scheduler") parser.add_argument("--gamma", type=float, default=.5, help="Gamma value for StepLR") parser.add_argument("--step_size", type=float, default=50, help="Step size for StepLR") # Training parser.add_argument('--batch', '--batch-size', "-b", default=256, type=int, metavar='N', help='mini-batch size (default: 256)') parser.add_argument("--train_batch", "-tb", default=None, type=int, help="batch size for training, if not specified, it will equal to batch size in argument --batch") parser.add_argument("--selection_batch", "-sb", default=None, type=int, help="batch size for selection, if not specified, it will equal to batch size in argument --batch") # Testing parser.add_argument("--test_interval", '-ti', default=1, type=int, help= "the number of training epochs to be preformed between two test epochs; a value of 0 means no test will be run (default: 1)") parser.add_argument("--test_fraction", '-tf', type=float, default=1., help="proportion of test dataset used for evaluating the model (default: 1.)") # Selecting parser.add_argument("--selection_epochs", "-se", default=40, type=int, help="number of epochs whiling performing selection on full dataset") parser.add_argument('--selection_momentum', '-sm', default=0.9, type=float, metavar='M', help='momentum whiling performing selection (default: 0.9)') parser.add_argument('--selection_weight_decay', '-swd', default=5e-4, type=float, metavar='W', help='weight decay whiling performing selection (default: 5e-4)', dest='selection_weight_decay') parser.add_argument('--selection_optimizer', "-so", default="SGD", help='optimizer to use whiling performing selection, e.g. SGD, Adam') parser.add_argument("--selection_nesterov", "-sn", default=True, type=str_to_bool, help="if set nesterov whiling performing selection") parser.add_argument('--selection_lr', '-slr', type=float, default=0.1, help='learning rate for selection') parser.add_argument("--selection_test_interval", '-sti', default=1, type=int, help= "the number of training epochs to be preformed between two test epochs during selection (default: 1)") parser.add_argument("--selection_test_fraction", '-stf', type=float, default=1., help="proportion of test dataset used for evaluating the model while preforming selection (default: 1.)") parser.add_argument('--balance', default=True, type=str_to_bool, help="whether balance selection is performed per class") # Algorithm parser.add_argument('--submodular', default="GraphCut", help="specifiy submodular function to use") parser.add_argument('--submodular_greedy', default="LazyGreedy", help="specifiy greedy algorithm for submodular optimization") parser.add_argument('--uncertainty', default="Entropy", help="specifiy uncertanty score to use") # Checkpoint and resumption parser.add_argument('--save_path', "-sp", type=str, default='', help='path to save results (default: do not save)') parser.add_argument('--resume', '-r', type=str, default='', help="path to latest checkpoint (default: do not load)") args = parser.parse_args() args.device = 'cuda' if torch.cuda.is_available() else 'cpu' if args.train_batch is None: args.train_batch = args.batch if args.selection_batch is None: args.selection_batch = args.batch if args.save_path != "" and not os.path.exists(args.save_path): os.mkdir(args.save_path) if not os.path.exists(args.data_path): os.mkdir(args.data_path) if args.resume != "": # Load checkpoint try: print("=> Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=args.device) assert {"exp", "epoch", "state_dict", "opt_dict", "best_acc1", "rec", "subset", "sel_args"} <= set( checkpoint.keys()) assert 'indices' in checkpoint["subset"].keys() start_exp = checkpoint['exp'] start_epoch = checkpoint["epoch"] except AssertionError: try: assert {"exp", "subset", "sel_args"} <= set(checkpoint.keys()) assert 'indices' in checkpoint["subset"].keys() print("=> The checkpoint only contains the subset, training will start from the begining") start_exp = checkpoint['exp'] start_epoch = 0 except AssertionError: print("=> Failed to load the checkpoint, an empty one will be created") checkpoint = {} start_exp = 0 start_epoch = 0 else: checkpoint = {} start_exp = 0 start_epoch = 0 for exp in range(start_exp, args.num_exp): if args.save_path != "": checkpoint_name = "{dst}_{net}_{mtd}_exp{exp}_epoch{epc}_{dat}_{fr}_".format(dst=args.dataset, net=args.model, mtd=args.selection, dat=datetime.now(), exp=start_exp, epc=args.epochs, fr=args.fraction) print('\n================== Exp %d ==================\n' % exp) print("dataset: ", args.dataset, ", model: ", args.model, ", selection: ", args.selection, ", num_ex: ", args.num_exp, ", epochs: ", args.epochs, ", fraction: ", args.fraction, ", seed: ", args.seed, ", lr: ", args.lr, ", save_path: ", args.save_path, ", resume: ", args.resume, ", device: ", args.device, ", checkpoint_name: " + checkpoint_name if args.save_path != "" else "", "\n", sep="") channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test = datasets.__dict__[args.dataset] \ (args.data_path) args.channel, args.im_size, args.num_classes, args.class_names = channel, im_size, num_classes, class_names torch.random.manual_seed(args.seed) if "subset" in checkpoint.keys(): subset = checkpoint['subset'] selection_args = checkpoint["sel_args"] else: selection_args = dict(epochs=args.selection_epochs, selection_method=args.uncertainty, balance=args.balance, greedy=args.submodular_greedy, function=args.submodular ) method = methods.__dict__[args.selection](dst_train, args, args.fraction, args.seed, **selection_args) subset = method.select() print(len(subset["indices"])) # Augmentation if args.dataset == "CIFAR10" or args.dataset == "CIFAR100": dst_train.transform = transforms.Compose( [transforms.RandomCrop(args.im_size, padding=4, padding_mode="reflect"), transforms.RandomHorizontalFlip(), dst_train.transform]) elif args.dataset == "ImageNet": dst_train.transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std) ]) # Handle weighted subset if_weighted = "weights" in subset.keys() if if_weighted: dst_subset = WeightedSubset(dst_train, subset["indices"], subset["weights"]) else: dst_subset = torch.utils.data.Subset(dst_train, subset["indices"]) # BackgroundGenerator for ImageNet to speed up dataloaders if args.dataset == "ImageNet": train_loader = DataLoaderX(dst_subset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = DataLoaderX(dst_test, batch_size=args.train_batch, shuffle=False, num_workers=args.workers, pin_memory=True) else: train_loader = torch.utils.data.DataLoader(dst_subset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(dst_test, batch_size=args.train_batch, shuffle=False, num_workers=args.workers, pin_memory=True) # Listing cross-architecture experiment settings if specified. models = [args.model] if isinstance(args.cross, list): for model in args.cross: if model != args.model: models.append(model) for model in models: if len(models) > 1: print("| Training on model %s" % model) network = nets.__dict__[model](channel, num_classes, im_size).to(args.device) if args.device == "cpu": print("Using CPU.") elif args.gpu is not None: torch.cuda.set_device(args.gpu[0]) network = nets.nets_utils.MyDataParallel(network, device_ids=args.gpu) elif torch.cuda.device_count() > 1: network = nets.nets_utils.MyDataParallel(network).cuda() if "state_dict" in checkpoint.keys(): # Loading model state_dict network.load_state_dict(checkpoint["state_dict"]) criterion = nn.CrossEntropyLoss(reduction='none').to(args.device) # Optimizer if args.optimizer == "SGD": optimizer = torch.optim.SGD(network.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == "Adam": optimizer = torch.optim.Adam(network.parameters(), args.lr, weight_decay=args.weight_decay) else: optimizer = torch.optim.__dict__[args.optimizer](network.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # LR scheduler if args.scheduler == "CosineAnnealingLR": scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * args.epochs, eta_min=args.min_lr) elif args.scheduler == "StepLR": scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=len(train_loader) * args.step_size, gamma=args.gamma) else: scheduler = torch.optim.lr_scheduler.__dict__[args.scheduler](optimizer) scheduler.last_epoch = (start_epoch - 1) * len(train_loader) if "opt_dict" in checkpoint.keys(): optimizer.load_state_dict(checkpoint["opt_dict"]) # Log recorder if "rec" in checkpoint.keys(): rec = checkpoint["rec"] else: rec = init_recorder() best_prec1 = checkpoint["best_acc1"] if "best_acc1" in checkpoint.keys() else 0.0 # Save the checkpont with only the susbet. if args.save_path != "" and args.resume == "": save_checkpoint({"exp": exp, "subset": subset, "sel_args": selection_args}, os.path.join(args.save_path, checkpoint_name + ("" if model == args.model else model + "_") + "unknown.ckpt"), 0, 0.) for epoch in range(start_epoch, args.epochs): # train for one epoch train(train_loader, network, criterion, optimizer, scheduler, epoch, args, rec, if_weighted=if_weighted) # evaluate on validation set if args.test_interval > 0 and (epoch + 1) % args.test_interval == 0: prec1 = test(test_loader, network, criterion, epoch, args, rec) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 if is_best: best_prec1 = prec1 if args.save_path != "": rec = record_ckpt(rec, epoch) save_checkpoint({"exp": exp, "epoch": epoch + 1, "state_dict": network.state_dict(), "opt_dict": optimizer.state_dict(), "best_acc1": best_prec1, "rec": rec, "subset": subset, "sel_args": selection_args}, os.path.join(args.save_path, checkpoint_name + ( "" if model == args.model else model + "_") + "unknown.ckpt"), epoch=epoch, prec=best_prec1) # Prepare for the next checkpoint if args.save_path != "": try: os.rename( os.path.join(args.save_path, checkpoint_name + ("" if model == args.model else model + "_") + "unknown.ckpt"), os.path.join(args.save_path, checkpoint_name + ("" if model == args.model else model + "_") + "%f.ckpt" % best_prec1)) except: save_checkpoint({"exp": exp, "epoch": args.epochs, "state_dict": network.state_dict(), "opt_dict": optimizer.state_dict(), "best_acc1": best_prec1, "rec": rec, "subset": subset, "sel_args": selection_args}, os.path.join(args.save_path, checkpoint_name + ("" if model == args.model else model + "_") + "%f.ckpt" % best_prec1), epoch=args.epochs - 1, prec=best_prec1) print('| Best accuracy: ', best_prec1, ", on model " + model if len(models) > 1 else "", end="\n\n") start_epoch = 0 checkpoint = {} sleep(2) if __name__ == '__main__': main() ================================================ FILE: requirements.txt ================================================ numpy==1.22 prefetch_generator==1.0.1 requests==2.25.1 scipy==1.5.3 torch==1.10.1 torchvision==0.11.2 ================================================ FILE: utils.py ================================================ import time, torch from argparse import ArgumentTypeError from prefetch_generator import BackgroundGenerator class WeightedSubset(torch.utils.data.Subset): def __init__(self, dataset, indices, weights) -> None: self.dataset = dataset assert len(indices) == len(weights) self.indices = indices self.weights = weights def __getitem__(self, idx): if isinstance(idx, list): return self.dataset[[self.indices[i] for i in idx]], self.weights[[i for i in idx]] return self.dataset[self.indices[idx]], self.weights[idx] def train(train_loader, network, criterion, optimizer, scheduler, epoch, args, rec, if_weighted: bool = False): """Train for one epoch on the training set""" batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') # switch to train mode network.train() end = time.time() for i, contents in enumerate(train_loader): optimizer.zero_grad() if if_weighted: target = contents[0][1].to(args.device) input = contents[0][0].to(args.device) # Compute output output = network(input) weights = contents[1].to(args.device).requires_grad_(False) loss = torch.sum(criterion(output, target) * weights) / torch.sum(weights) else: target = contents[1].to(args.device) input = contents[0].to(args.device) # Compute output output = network(input) loss = criterion(output, target).mean() # Measure accuracy and record loss prec1 = accuracy(output.data, target, topk=(1,))[0] losses.update(loss.data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) # Compute gradient and do SGD step loss.backward() optimizer.step() scheduler.step() # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, loss=losses, top1=top1)) record_train_stats(rec, epoch, losses.avg, top1.avg, optimizer.state_dict()['param_groups'][0]['lr']) def test(test_loader, network, criterion, epoch, args, rec): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') # Switch to evaluate mode network.eval() network.no_grad = True end = time.time() for i, (input, target) in enumerate(test_loader): target = target.to(args.device) input = input.to(args.device) # Compute output with torch.no_grad(): output = network(input) loss = criterion(output, target).mean() # Measure accuracy and record loss prec1 = accuracy(output.data, target, topk=(1,))[0] losses.update(loss.data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(test_loader), batch_time=batch_time, loss=losses, top1=top1)) print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) network.no_grad = False record_test_stats(rec, epoch, losses.avg, top1.avg) return top1.avg class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self, name, fmt=':f'): self.name = name self.fmt = fmt self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count def __str__(self): fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' return fmtstr.format(**self.__dict__) def accuracy(output, target, topk=(1,)): """Computes the accuracy over the k top predictions for the specified values of k""" with torch.no_grad(): maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / batch_size)) return res def str_to_bool(v): # Handle boolean type in arguments. if isinstance(v, bool): return v if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise ArgumentTypeError('Boolean value expected.') def save_checkpoint(state, path, epoch, prec): print("=> Saving checkpoint for epoch %d, with Prec@1 %f." % (epoch, prec)) torch.save(state, path) def init_recorder(): from types import SimpleNamespace rec = SimpleNamespace() rec.train_step = [] rec.train_loss = [] rec.train_acc = [] rec.lr = [] rec.test_step = [] rec.test_loss = [] rec.test_acc = [] rec.ckpts = [] return rec def record_train_stats(rec, step, loss, acc, lr): rec.train_step.append(step) rec.train_loss.append(loss) rec.train_acc.append(acc) rec.lr.append(lr) return rec def record_test_stats(rec, step, loss, acc): rec.test_step.append(step) rec.test_loss.append(loss) rec.test_acc.append(acc) return rec def record_ckpt(rec, step): rec.ckpts.append(step) return rec class DataLoaderX(torch.utils.data.DataLoader): def __iter__(self): return BackgroundGenerator(super().__iter__())