Repository: qweasdd/manga-colorization-v2 Branch: master Commit: a0d0e4482e5e Files: 13 Total size: 38.5 KB Directory structure: gitextract_ul_19dwz/ ├── .gitignore ├── colorizator.py ├── denoising/ │ ├── denoiser.py │ ├── functions.py │ ├── models.py │ └── utils.py ├── inference.py ├── networks/ │ ├── extractor.py │ └── models.py ├── readme.md ├── requirements.txt ├── start_kr.md └── utils/ └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.ipynb *.pth *.zip __pycache__/ temp_colorization/ static/temp_images/ ================================================ FILE: colorizator.py ================================================ import torch from torchvision.transforms import ToTensor import numpy as np from networks.models import Colorizer from denoising.denoiser import FFDNetDenoiser from utils.utils import resize_pad class MangaColorizator: def __init__(self, device, generator_path = 'networks/generator.zip', extractor_path = 'networks/extractor.pth'): self.colorizer = Colorizer().to(device) self.colorizer.generator.load_state_dict(torch.load(generator_path, map_location = device)) self.colorizer = self.colorizer.eval() self.denoiser = FFDNetDenoiser(device) self.current_image = None self.current_hint = None self.current_pad = None self.device = device def set_image(self, image, size = 576, apply_denoise = True, denoise_sigma = 25, transform = ToTensor()): if (size % 32 != 0): raise RuntimeError("size is not divisible by 32") if apply_denoise: image = self.denoiser.get_denoised_image(image, sigma = denoise_sigma) image, self.current_pad = resize_pad(image, size) self.current_image = transform(image).unsqueeze(0).to(self.device) self.current_hint = torch.zeros(1, 4, self.current_image.shape[2], self.current_image.shape[3]).float().to(self.device) def update_hint(self, hint, mask): ''' Args: hint: numpy.ndarray with shape (self.current_image.shape[2], self.current_image.shape[3], 3) mask: numpy.ndarray with shape (self.current_image.shape[2], self.current_image.shape[3]) ''' if issubclass(hint.dtype.type, np.integer): hint = hint.astype('float32') / 255 hint = (hint - 0.5) / 0.5 hint = torch.FloatTensor(hint).permute(2, 0, 1) mask = torch.FloatTensor(np.expand_dims(mask, 0)) self.current_hint = torch.cat([hint * mask, mask], 0).unsqueeze(0).to(self.device) def colorize(self): with torch.no_grad(): fake_color, _ = self.colorizer(torch.cat([self.current_image, self.current_hint], 1)) fake_color = fake_color.detach() result = fake_color[0].detach().cpu().permute(1, 2, 0) * 0.5 + 0.5 if self.current_pad[0] != 0: result = result[:-self.current_pad[0]] if self.current_pad[1] != 0: result = result[:, :-self.current_pad[1]] return result.numpy() ================================================ FILE: denoising/denoiser.py ================================================ """ Denoise an image with the FFDNet denoising method Copyright (C) 2018, Matias Tassano This program is free software: you can use, modify and/or redistribute it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. You should have received a copy of this license along this program. If not, see . """ import os import argparse import time import numpy as np import cv2 import torch import torch.nn as nn from torch.autograd import Variable from .models import FFDNet from .utils import normalize, variable_to_cv2_image, remove_dataparallel_wrapper, is_rgb class FFDNetDenoiser: def __init__(self, _device, _sigma = 25, _weights_dir = 'denoising/models/', _in_ch = 3): self.sigma = _sigma / 255 self.weights_dir = _weights_dir self.channels = _in_ch self.device = _device self.model = FFDNet(num_input_channels = _in_ch) self.load_weights() self.model.eval() def load_weights(self): weights_name = 'net_rgb.pth' if self.channels == 3 else 'net_gray.pth' weights_path = os.path.join(self.weights_dir, weights_name) if self.device == 'cuda': state_dict = torch.load(weights_path, map_location=torch.device('cpu')) device_ids = [0] self.model = nn.DataParallel(self.model, device_ids=device_ids).cuda() else: state_dict = torch.load(weights_path, map_location='cpu') # CPU mode: remove the DataParallel wrapper state_dict = remove_dataparallel_wrapper(state_dict) self.model.load_state_dict(state_dict) def get_denoised_image(self, imorig, sigma = None): if sigma is not None: cur_sigma = sigma / 255 else: cur_sigma = self.sigma if len(imorig.shape) < 3 or imorig.shape[2] == 1: imorig = np.repeat(np.expand_dims(imorig, 2), 3, 2) imorig = imorig[..., :3] if (max(imorig.shape[0], imorig.shape[1]) > 1200): ratio = max(imorig.shape[0], imorig.shape[1]) / 1200 imorig = cv2.resize(imorig, (int(imorig.shape[1] / ratio), int(imorig.shape[0] / ratio)), interpolation = cv2.INTER_AREA) imorig = imorig.transpose(2, 0, 1) if (imorig.max() > 1.2): imorig = normalize(imorig) imorig = np.expand_dims(imorig, 0) # Handle odd sizes expanded_h = False expanded_w = False sh_im = imorig.shape if sh_im[2]%2 == 1: expanded_h = True imorig = np.concatenate((imorig, imorig[:, :, -1, :][:, :, np.newaxis, :]), axis=2) if sh_im[3]%2 == 1: expanded_w = True imorig = np.concatenate((imorig, imorig[:, :, :, -1][:, :, :, np.newaxis]), axis=3) imorig = torch.Tensor(imorig) # Sets data type according to CPU or GPU modes if self.device == 'cuda': dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor imnoisy = imorig.clone() with torch.no_grad(): imorig, imnoisy = imorig.type(dtype), imnoisy.type(dtype) nsigma = torch.FloatTensor([cur_sigma]).type(dtype) # Estimate noise and subtract it to the input image im_noise_estim = self.model(imnoisy, nsigma) outim = torch.clamp(imnoisy-im_noise_estim, 0., 1.) if expanded_h: imorig = imorig[:, :, :-1, :] outim = outim[:, :, :-1, :] imnoisy = imnoisy[:, :, :-1, :] if expanded_w: imorig = imorig[:, :, :, :-1] outim = outim[:, :, :, :-1] imnoisy = imnoisy[:, :, :, :-1] return variable_to_cv2_image(outim) ================================================ FILE: denoising/functions.py ================================================ """ Functions implementing custom NN layers Copyright (C) 2018, Matias Tassano This program is free software: you can use, modify and/or redistribute it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. You should have received a copy of this license along this program. If not, see . """ import torch from torch.autograd import Function, Variable def concatenate_input_noise_map(input, noise_sigma): r"""Implements the first layer of FFDNet. This function returns a torch.autograd.Variable composed of the concatenation of the downsampled input image and the noise map. Each image of the batch of size CxHxW gets converted to an array of size 4*CxH/2xW/2. Each of the pixels of the non-overlapped 2x2 patches of the input image are placed in the new array along the first dimension. Args: input: batch containing CxHxW images noise_sigma: the value of the pixels of the CxH/2xW/2 noise map """ # noise_sigma is a list of length batch_size N, C, H, W = input.size() dtype = input.type() sca = 2 sca2 = sca*sca Cout = sca2*C Hout = H//sca Wout = W//sca idxL = [[0, 0], [0, 1], [1, 0], [1, 1]] # Fill the downsampled image with zeros if 'cuda' in dtype: downsampledfeatures = torch.cuda.FloatTensor(N, Cout, Hout, Wout).fill_(0) else: downsampledfeatures = torch.FloatTensor(N, Cout, Hout, Wout).fill_(0) # Build the CxH/2xW/2 noise map noise_map = noise_sigma.view(N, 1, 1, 1).repeat(1, C, Hout, Wout) # Populate output for idx in range(sca2): downsampledfeatures[:, idx:Cout:sca2, :, :] = \ input[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca] # concatenate de-interleaved mosaic with noise map return torch.cat((noise_map, downsampledfeatures), 1) class UpSampleFeaturesFunction(Function): r"""Extends PyTorch's modules by implementing a torch.autograd.Function. This class implements the forward and backward methods of the last layer of FFDNet. It basically performs the inverse of concatenate_input_noise_map(): it converts each of the images of a batch of size CxH/2xW/2 to images of size C/4xHxW """ @staticmethod def forward(ctx, input): N, Cin, Hin, Win = input.size() dtype = input.type() sca = 2 sca2 = sca*sca Cout = Cin//sca2 Hout = Hin*sca Wout = Win*sca idxL = [[0, 0], [0, 1], [1, 0], [1, 1]] assert (Cin%sca2 == 0), 'Invalid input dimensions: number of channels should be divisible by 4' result = torch.zeros((N, Cout, Hout, Wout)).type(dtype) for idx in range(sca2): result[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca] = input[:, idx:Cin:sca2, :, :] return result @staticmethod def backward(ctx, grad_output): N, Cg_out, Hg_out, Wg_out = grad_output.size() dtype = grad_output.data.type() sca = 2 sca2 = sca*sca Cg_in = sca2*Cg_out Hg_in = Hg_out//sca Wg_in = Wg_out//sca idxL = [[0, 0], [0, 1], [1, 0], [1, 1]] # Build output grad_input = torch.zeros((N, Cg_in, Hg_in, Wg_in)).type(dtype) # Populate output for idx in range(sca2): grad_input[:, idx:Cg_in:sca2, :, :] = grad_output.data[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca] return Variable(grad_input) # Alias functions upsamplefeatures = UpSampleFeaturesFunction.apply ================================================ FILE: denoising/models.py ================================================ """ Definition of the FFDNet model and its custom layers Copyright (C) 2018, Matias Tassano This program is free software: you can use, modify and/or redistribute it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. You should have received a copy of this license along this program. If not, see . """ import torch.nn as nn from torch.autograd import Variable import denoising.functions as functions class UpSampleFeatures(nn.Module): r"""Implements the last layer of FFDNet """ def __init__(self): super(UpSampleFeatures, self).__init__() def forward(self, x): return functions.upsamplefeatures(x) class IntermediateDnCNN(nn.Module): r"""Implements the middel part of the FFDNet architecture, which is basically a DnCNN net """ def __init__(self, input_features, middle_features, num_conv_layers): super(IntermediateDnCNN, self).__init__() self.kernel_size = 3 self.padding = 1 self.input_features = input_features self.num_conv_layers = num_conv_layers self.middle_features = middle_features if self.input_features == 5: self.output_features = 4 #Grayscale image elif self.input_features == 15: self.output_features = 12 #RGB image else: raise Exception('Invalid number of input features') layers = [] layers.append(nn.Conv2d(in_channels=self.input_features,\ out_channels=self.middle_features,\ kernel_size=self.kernel_size,\ padding=self.padding,\ bias=False)) layers.append(nn.ReLU(inplace=True)) for _ in range(self.num_conv_layers-2): layers.append(nn.Conv2d(in_channels=self.middle_features,\ out_channels=self.middle_features,\ kernel_size=self.kernel_size,\ padding=self.padding,\ bias=False)) layers.append(nn.BatchNorm2d(self.middle_features)) layers.append(nn.ReLU(inplace=True)) layers.append(nn.Conv2d(in_channels=self.middle_features,\ out_channels=self.output_features,\ kernel_size=self.kernel_size,\ padding=self.padding,\ bias=False)) self.itermediate_dncnn = nn.Sequential(*layers) def forward(self, x): out = self.itermediate_dncnn(x) return out class FFDNet(nn.Module): r"""Implements the FFDNet architecture """ def __init__(self, num_input_channels): super(FFDNet, self).__init__() self.num_input_channels = num_input_channels if self.num_input_channels == 1: # Grayscale image self.num_feature_maps = 64 self.num_conv_layers = 15 self.downsampled_channels = 5 self.output_features = 4 elif self.num_input_channels == 3: # RGB image self.num_feature_maps = 96 self.num_conv_layers = 12 self.downsampled_channels = 15 self.output_features = 12 else: raise Exception('Invalid number of input features') self.intermediate_dncnn = IntermediateDnCNN(\ input_features=self.downsampled_channels,\ middle_features=self.num_feature_maps,\ num_conv_layers=self.num_conv_layers) self.upsamplefeatures = UpSampleFeatures() def forward(self, x, noise_sigma): concat_noise_x = functions.concatenate_input_noise_map(x.data, noise_sigma.data) concat_noise_x = Variable(concat_noise_x) h_dncnn = self.intermediate_dncnn(concat_noise_x) pred_noise = self.upsamplefeatures(h_dncnn) return pred_noise ================================================ FILE: denoising/utils.py ================================================ """ Different utilities such as orthogonalization of weights, initialization of loggers, etc Copyright (C) 2018, Matias Tassano This program is free software: you can use, modify and/or redistribute it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. You should have received a copy of this license along this program. If not, see . """ import numpy as np import cv2 def variable_to_cv2_image(varim): r"""Converts a torch.autograd.Variable to an OpenCV image Args: varim: a torch.autograd.Variable """ nchannels = varim.size()[1] if nchannels == 1: res = (varim.data.cpu().numpy()[0, 0, :]*255.).clip(0, 255).astype(np.uint8) elif nchannels == 3: res = varim.data.cpu().numpy()[0] res = cv2.cvtColor(res.transpose(1, 2, 0), cv2.COLOR_RGB2BGR) res = (res*255.).clip(0, 255).astype(np.uint8) else: raise Exception('Number of color channels not supported') return res def normalize(data): return np.float32(data/255.) def remove_dataparallel_wrapper(state_dict): r"""Converts a DataParallel model to a normal one by removing the "module." wrapper in the module dictionary Args: state_dict: a torch.nn.DataParallel state dictionary """ from collections import OrderedDict new_state_dict = OrderedDict() for k, vl in state_dict.items(): name = k[7:] # remove 'module.' of DataParallel new_state_dict[name] = vl return new_state_dict def is_rgb(im_path): r""" Returns True if the image in im_path is an RGB image """ from skimage.io import imread rgb = False im = imread(im_path) if (len(im.shape) == 3): if not(np.allclose(im[...,0], im[...,1]) and np.allclose(im[...,2], im[...,1])): rgb = True print("rgb: {}".format(rgb)) print("im shape: {}".format(im.shape)) return rgb ================================================ FILE: inference.py ================================================ import os import argparse import sys import numpy as np import matplotlib.pyplot as plt from colorizator import MangaColorizator def process_image(image, colorizator, args): colorizator.set_image(image, args.size, args.denoiser, args.denoiser_sigma) return colorizator.colorize() def colorize_single_image(image_path, save_path, colorizator, args): image = plt.imread(image_path) colorization = process_image(image, colorizator, args) plt.imsave(save_path, colorization) return True def colorize_images(target_path, colorizator, args): images = os.listdir(args.path) for image_name in images: file_path = os.path.join(args.path, image_name) if os.path.isdir(file_path): continue name, ext = os.path.splitext(image_name) if (ext != '.png'): image_name = name + '.png' print(file_path) save_path = os.path.join(target_path, image_name) colorize_single_image(file_path, save_path, colorizator, args) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("-p", "--path", required=True) parser.add_argument("-gen", "--generator", default = 'networks/generator.zip') parser.add_argument("-ext", "--extractor", default = 'networks/extractor.pth') parser.add_argument('-g', '--gpu', dest = 'gpu', action = 'store_true') parser.add_argument('-nd', '--no_denoise', dest = 'denoiser', action = 'store_false') parser.add_argument("-ds", "--denoiser_sigma", type = int, default = 25) parser.add_argument("-s", "--size", type = int, default = 576) parser.set_defaults(gpu = False) parser.set_defaults(denoiser = True) args = parser.parse_args() return args if __name__ == "__main__": args = parse_args() if args.gpu: device = 'cuda' else: device = 'cpu' colorizer = MangaColorizator(device, args.generator, args.extractor) if os.path.isdir(args.path): colorization_path = os.path.join(args.path, 'colorization') if not os.path.exists(colorization_path): os.makedirs(colorization_path) colorize_images(colorization_path, colorizer, args) elif os.path.isfile(args.path): split = os.path.splitext(args.path) if split[1].lower() in ('.jpg', '.png', '.jpeg'): new_image_path = split[0] + '_colorized' + '.png' colorize_single_image(args.path, new_image_path, colorizer, args) else: print('Wrong format') else: print('Wrong path') ================================================ FILE: networks/extractor.py ================================================ import torch import torch.nn as nn import math '''https://github.com/blandocs/Tag2Pix/blob/master/model/pretrained.py''' # Pretrained version class Selayer(nn.Module): def __init__(self, inplanes): super(Selayer, self).__init__() self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.conv1 = nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1) self.conv2 = nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1) self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid() def forward(self, x): out = self.global_avgpool(x) out = self.conv1(out) out = self.relu(out) out = self.conv2(out) out = self.sigmoid(out) return x * out class BottleneckX_Origin(nn.Module): expansion = 4 def __init__(self, inplanes, planes, cardinality, stride=1, downsample=None): super(BottleneckX_Origin, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes * 2) self.conv2 = nn.Conv2d(planes * 2, planes * 2, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) self.bn2 = nn.BatchNorm2d(planes * 2) self.conv3 = nn.Conv2d(planes * 2, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.selayer = Selayer(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out = self.selayer(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class SEResNeXt_Origin(nn.Module): def __init__(self, block, layers, input_channels=3, cardinality=32, num_classes=1000): super(SEResNeXt_Origin, self).__init__() self.cardinality = cardinality self.inplanes = 64 self.input_channels = input_channels self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, self.cardinality, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, self.cardinality)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x1 = self.relu(x) x2 = self.layer1(x1) x3 = self.layer2(x2) x4 = self.layer3(x3) return x1, x2, x3, x4 ================================================ FILE: networks/models.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import torchvision.models as M import math from torch import Tensor from torch.nn import Parameter from .extractor import SEResNeXt_Origin, BottleneckX_Origin '''https://github.com/orashi/AlacGAN/blob/master/models/standard.py''' def l2normalize(v, eps=1e-12): return v / (v.norm() + eps) class SpectralNorm(nn.Module): def __init__(self, module, name='weight', power_iterations=1): super(SpectralNorm, self).__init__() self.module = module self.name = name self.power_iterations = power_iterations if not self._made_params(): self._make_params() def _update_u_v(self): u = getattr(self.module, self.name + "_u") v = getattr(self.module, self.name + "_v") w = getattr(self.module, self.name + "_bar") height = w.data.shape[0] for _ in range(self.power_iterations): v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data)) u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data)) # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data)) sigma = u.dot(w.view(height, -1).mv(v)) setattr(self.module, self.name, w / sigma.expand_as(w)) def _made_params(self): try: u = getattr(self.module, self.name + "_u") v = getattr(self.module, self.name + "_v") w = getattr(self.module, self.name + "_bar") return True except AttributeError: return False def _make_params(self): w = getattr(self.module, self.name) height = w.data.shape[0] width = w.view(height, -1).data.shape[1] u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False) v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False) u.data = l2normalize(u.data) v.data = l2normalize(v.data) w_bar = Parameter(w.data) del self.module._parameters[self.name] self.module.register_parameter(self.name + "_u", u) self.module.register_parameter(self.name + "_v", v) self.module.register_parameter(self.name + "_bar", w_bar) def forward(self, *args): self._update_u_v() return self.module.forward(*args) class Selayer(nn.Module): def __init__(self, inplanes): super(Selayer, self).__init__() self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.conv1 = nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1) self.conv2 = nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1) self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid() def forward(self, x): out = self.global_avgpool(x) out = self.conv1(out) out = self.relu(out) out = self.conv2(out) out = self.sigmoid(out) return x * out class SelayerSpectr(nn.Module): def __init__(self, inplanes): super(SelayerSpectr, self).__init__() self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.conv1 = SpectralNorm(nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1)) self.conv2 = SpectralNorm(nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1)) self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid() def forward(self, x): out = self.global_avgpool(x) out = self.conv1(out) out = self.relu(out) out = self.conv2(out) out = self.sigmoid(out) return x * out class ResNeXtBottleneck(nn.Module): def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32, dilate=1): super(ResNeXtBottleneck, self).__init__() D = out_channels // 2 self.out_channels = out_channels self.conv_reduce = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False) self.conv_conv = nn.Conv2d(D, D, kernel_size=2 + stride, stride=stride, padding=dilate, dilation=dilate, groups=cardinality, bias=False) self.conv_expand = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False) self.shortcut = nn.Sequential() if stride != 1: self.shortcut.add_module('shortcut', nn.AvgPool2d(2, stride=2)) self.selayer = Selayer(out_channels) def forward(self, x): bottleneck = self.conv_reduce.forward(x) bottleneck = F.leaky_relu(bottleneck, 0.2, True) bottleneck = self.conv_conv.forward(bottleneck) bottleneck = F.leaky_relu(bottleneck, 0.2, True) bottleneck = self.conv_expand.forward(bottleneck) bottleneck = self.selayer(bottleneck) x = self.shortcut.forward(x) return x + bottleneck class SpectrResNeXtBottleneck(nn.Module): def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32, dilate=1): super(SpectrResNeXtBottleneck, self).__init__() D = out_channels // 2 self.out_channels = out_channels self.conv_reduce = SpectralNorm(nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False)) self.conv_conv = SpectralNorm(nn.Conv2d(D, D, kernel_size=2 + stride, stride=stride, padding=dilate, dilation=dilate, groups=cardinality, bias=False)) self.conv_expand = SpectralNorm(nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False)) self.shortcut = nn.Sequential() if stride != 1: self.shortcut.add_module('shortcut', nn.AvgPool2d(2, stride=2)) self.selayer = SelayerSpectr(out_channels) def forward(self, x): bottleneck = self.conv_reduce.forward(x) bottleneck = F.leaky_relu(bottleneck, 0.2, True) bottleneck = self.conv_conv.forward(bottleneck) bottleneck = F.leaky_relu(bottleneck, 0.2, True) bottleneck = self.conv_expand.forward(bottleneck) bottleneck = self.selayer(bottleneck) x = self.shortcut.forward(x) return x + bottleneck class FeatureConv(nn.Module): def __init__(self, input_dim=512, output_dim=512): super(FeatureConv, self).__init__() no_bn = True seq = [] seq.append(nn.Conv2d(input_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False)) if not no_bn: seq.append(nn.BatchNorm2d(output_dim)) seq.append(nn.ReLU(inplace=True)) seq.append(nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=2, padding=1, bias=False)) if not no_bn: seq.append(nn.BatchNorm2d(output_dim)) seq.append(nn.ReLU(inplace=True)) seq.append(nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False)) seq.append(nn.ReLU(inplace=True)) self.network = nn.Sequential(*seq) def forward(self, x): return self.network(x) class Generator(nn.Module): def __init__(self, ngf=64): super(Generator, self).__init__() self.encoder = SEResNeXt_Origin(BottleneckX_Origin, [3, 4, 6, 3], num_classes= 370, input_channels=1) self.to0 = self._make_encoder_block_first(5, 32) self.to1 = self._make_encoder_block(32, 64) self.to2 = self._make_encoder_block(64, 92) self.to3 = self._make_encoder_block(92, 128) self.to4 = self._make_encoder_block(128, 256) self.deconv_for_decoder = nn.Sequential( nn.ConvTranspose2d(256, 128, 3, stride=2, padding=1, output_padding=1), # output is 64 * 64 nn.LeakyReLU(0.2), nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1), # output is 128 * 128 nn.LeakyReLU(0.2), nn.ConvTranspose2d(64, 32, 3, stride=1, padding=1, output_padding=0), # output is 256 * 256 nn.LeakyReLU(0.2), nn.ConvTranspose2d(32, 3, 3, stride=1, padding=1, output_padding=0), # output is 256 * 256 nn.Tanh(), ) tunnel4 = nn.Sequential(*[ResNeXtBottleneck(512, 512, cardinality=32, dilate=1) for _ in range(20)]) self.tunnel4 = nn.Sequential(nn.Conv2d(1024 + 128, 512, kernel_size=3, stride=1, padding=1), nn.LeakyReLU(0.2, True), tunnel4, nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1), nn.PixelShuffle(2), nn.LeakyReLU(0.2, True) ) # 64 depth = 2 tunnel = [ResNeXtBottleneck(256, 256, cardinality=32, dilate=1) for _ in range(depth)] tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=2) for _ in range(depth)] tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=4) for _ in range(depth)] tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=2), ResNeXtBottleneck(256, 256, cardinality=32, dilate=1)] tunnel3 = nn.Sequential(*tunnel) self.tunnel3 = nn.Sequential(nn.Conv2d(512 + 256, 256, kernel_size=3, stride=1, padding=1), nn.LeakyReLU(0.2, True), tunnel3, nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), nn.PixelShuffle(2), nn.LeakyReLU(0.2, True) ) # 128 tunnel = [ResNeXtBottleneck(128, 128, cardinality=32, dilate=1) for _ in range(depth)] tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=2) for _ in range(depth)] tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=4) for _ in range(depth)] tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=2), ResNeXtBottleneck(128, 128, cardinality=32, dilate=1)] tunnel2 = nn.Sequential(*tunnel) self.tunnel2 = nn.Sequential(nn.Conv2d(128 + 256 + 64, 128, kernel_size=3, stride=1, padding=1), nn.LeakyReLU(0.2, True), tunnel2, nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), nn.PixelShuffle(2), nn.LeakyReLU(0.2, True) ) tunnel = [ResNeXtBottleneck(64, 64, cardinality=16, dilate=1)] tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=2)] tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=4)] tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=2), ResNeXtBottleneck(64, 64, cardinality=16, dilate=1)] tunnel1 = nn.Sequential(*tunnel) self.tunnel1 = nn.Sequential(nn.Conv2d(64 + 32, 64, kernel_size=3, stride=1, padding=1), nn.LeakyReLU(0.2, True), tunnel1, nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), nn.PixelShuffle(2), nn.LeakyReLU(0.2, True) ) self.exit = nn.Sequential(nn.Conv2d(64 + 32, 32, kernel_size=3, stride=1, padding=1), nn.LeakyReLU(0.2, True), nn.Conv2d(32, 3, kernel_size= 1, stride = 1, padding = 0)) def _make_encoder_block(self, inplanes, planes): return nn.Sequential( nn.Conv2d(inplanes, planes, 3, 2, 1), nn.LeakyReLU(0.2), nn.Conv2d(planes, planes, 3, 1, 1), nn.LeakyReLU(0.2), ) def _make_encoder_block_first(self, inplanes, planes): return nn.Sequential( nn.Conv2d(inplanes, planes, 3, 1, 1), nn.LeakyReLU(0.2), nn.Conv2d(planes, planes, 3, 1, 1), nn.LeakyReLU(0.2), ) def forward(self, sketch): x0 = self.to0(sketch) aux_out = self.to1(x0) aux_out = self.to2(aux_out) aux_out = self.to3(aux_out) x1, x2, x3, x4 = self.encoder(sketch[:, 0:1]) out = self.tunnel4(torch.cat([x4, aux_out], 1)) x = self.tunnel3(torch.cat([out, x3], 1)) x = self.tunnel2(torch.cat([x, x2, x1], 1)) x = torch.tanh(self.exit(torch.cat([x, x0], 1))) decoder_output = self.deconv_for_decoder(out) return x, decoder_output class Colorizer(nn.Module): def __init__(self): super(Colorizer, self).__init__() self.generator = Generator() def forward(self, x, extractor_grad = False): fake, guide = self.generator(x) return fake, guide ================================================ FILE: readme.md ================================================ ## **UPD!!!** **A demo of Manga Colorization v2.5 is now available [link](https://mangacol.com). Feel free to check it out!** # Automatic colorization 1. Download [generator](https://drive.google.com/file/d/1qmxUEKADkEM4iYLp1fpPLLKnfZ6tcF-t/view?usp=sharing) and [denoiser](https://drive.google.com/file/d/161oyQcYpdkVdw8gKz_MA8RD-Wtg9XDp3/view?usp=sharing) weights. Put generator and extractor weights in `networks` and denoiser weights in `denoising/models`. 2. To colorize image or folder of images, use the following command: ``` $ python inference.py -p "path to file or folder" ``` | Original | Colorization | |------------|-------------| | | | | | | | | | | | | | | | | | | ================================================ FILE: requirements.txt ================================================ torch torchvision opencv-python matplotlib ================================================ FILE: start_kr.md ================================================ # requirements - 모델 다운 -> [모델](https://drive.google.com/file/d/161oyQcYpdkVdw8gKz_MA8RD-Wtg9XDp3/view) - 다운 받은 모델 `denoising/models`에 넣기 - generator 다운 -> [generator](https://drive.google.com/file/d/1qmxUEKADkEM4iYLp1fpPLLKnfZ6tcF-t/view) - 다운 받은 generator.zip `networks/`에 넣기 # start ``` $ python3 -m venv venv $ source venv/bin/activate $ pip install -r requirements.txt $ python inference.py -p <이미지 폴더 주소> ``` ================================================ FILE: utils/utils.py ================================================ import numpy as np import cv2 def resize_pad(img, size = 256): if len(img.shape) == 2: img = np.expand_dims(img, 2) if img.shape[2] == 1: img = np.repeat(img, 3, 2) if img.shape[2] == 4: img = img[:, :, :3] pad = None if (img.shape[0] < img.shape[1]): height = img.shape[0] ratio = height / (size * 1.5) width = int(np.ceil(img.shape[1] / ratio)) img = cv2.resize(img, (width, int(size * 1.5)), interpolation = cv2.INTER_AREA) new_width = width + (32 - width % 32) pad = (0, new_width - width) img = np.pad(img, ((0, 0), (0, pad[1]), (0, 0)), 'maximum') else: width = img.shape[1] ratio = width / size height = int(np.ceil(img.shape[0] / ratio)) img = cv2.resize(img, (size, height), interpolation = cv2.INTER_AREA) new_height = height + (32 - height % 32) pad = (new_height - height, 0) img = np.pad(img, ((0, pad[0]), (0, 0), (0, 0)), 'maximum') if (img.dtype == 'float32'): np.clip(img, 0, 1, out = img) return img[:, :, :1], pad