Repository: qweasdd/manga-colorization-v2
Branch: master
Commit: a0d0e4482e5e
Files: 13
Total size: 38.5 KB
Directory structure:
gitextract_ul_19dwz/
├── .gitignore
├── colorizator.py
├── denoising/
│ ├── denoiser.py
│ ├── functions.py
│ ├── models.py
│ └── utils.py
├── inference.py
├── networks/
│ ├── extractor.py
│ └── models.py
├── readme.md
├── requirements.txt
├── start_kr.md
└── utils/
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.ipynb
*.pth
*.zip
__pycache__/
temp_colorization/
static/temp_images/
================================================
FILE: colorizator.py
================================================
import torch
from torchvision.transforms import ToTensor
import numpy as np
from networks.models import Colorizer
from denoising.denoiser import FFDNetDenoiser
from utils.utils import resize_pad
class MangaColorizator:
def __init__(self, device, generator_path = 'networks/generator.zip', extractor_path = 'networks/extractor.pth'):
self.colorizer = Colorizer().to(device)
self.colorizer.generator.load_state_dict(torch.load(generator_path, map_location = device))
self.colorizer = self.colorizer.eval()
self.denoiser = FFDNetDenoiser(device)
self.current_image = None
self.current_hint = None
self.current_pad = None
self.device = device
def set_image(self, image, size = 576, apply_denoise = True, denoise_sigma = 25, transform = ToTensor()):
if (size % 32 != 0):
raise RuntimeError("size is not divisible by 32")
if apply_denoise:
image = self.denoiser.get_denoised_image(image, sigma = denoise_sigma)
image, self.current_pad = resize_pad(image, size)
self.current_image = transform(image).unsqueeze(0).to(self.device)
self.current_hint = torch.zeros(1, 4, self.current_image.shape[2], self.current_image.shape[3]).float().to(self.device)
def update_hint(self, hint, mask):
'''
Args:
hint: numpy.ndarray with shape (self.current_image.shape[2], self.current_image.shape[3], 3)
mask: numpy.ndarray with shape (self.current_image.shape[2], self.current_image.shape[3])
'''
if issubclass(hint.dtype.type, np.integer):
hint = hint.astype('float32') / 255
hint = (hint - 0.5) / 0.5
hint = torch.FloatTensor(hint).permute(2, 0, 1)
mask = torch.FloatTensor(np.expand_dims(mask, 0))
self.current_hint = torch.cat([hint * mask, mask], 0).unsqueeze(0).to(self.device)
def colorize(self):
with torch.no_grad():
fake_color, _ = self.colorizer(torch.cat([self.current_image, self.current_hint], 1))
fake_color = fake_color.detach()
result = fake_color[0].detach().cpu().permute(1, 2, 0) * 0.5 + 0.5
if self.current_pad[0] != 0:
result = result[:-self.current_pad[0]]
if self.current_pad[1] != 0:
result = result[:, :-self.current_pad[1]]
return result.numpy()
================================================
FILE: denoising/denoiser.py
================================================
"""
Denoise an image with the FFDNet denoising method
Copyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>
This program is free software: you can use, modify and/or
redistribute it under the terms of the GNU General Public
License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later
version. You should have received a copy of this license along
this program. If not, see <http://www.gnu.org/licenses/>.
"""
import os
import argparse
import time
import numpy as np
import cv2
import torch
import torch.nn as nn
from torch.autograd import Variable
from .models import FFDNet
from .utils import normalize, variable_to_cv2_image, remove_dataparallel_wrapper, is_rgb
class FFDNetDenoiser:
def __init__(self, _device, _sigma = 25, _weights_dir = 'denoising/models/', _in_ch = 3):
self.sigma = _sigma / 255
self.weights_dir = _weights_dir
self.channels = _in_ch
self.device = _device
self.model = FFDNet(num_input_channels = _in_ch)
self.load_weights()
self.model.eval()
def load_weights(self):
weights_name = 'net_rgb.pth' if self.channels == 3 else 'net_gray.pth'
weights_path = os.path.join(self.weights_dir, weights_name)
if self.device == 'cuda':
state_dict = torch.load(weights_path, map_location=torch.device('cpu'))
device_ids = [0]
self.model = nn.DataParallel(self.model, device_ids=device_ids).cuda()
else:
state_dict = torch.load(weights_path, map_location='cpu')
# CPU mode: remove the DataParallel wrapper
state_dict = remove_dataparallel_wrapper(state_dict)
self.model.load_state_dict(state_dict)
def get_denoised_image(self, imorig, sigma = None):
if sigma is not None:
cur_sigma = sigma / 255
else:
cur_sigma = self.sigma
if len(imorig.shape) < 3 or imorig.shape[2] == 1:
imorig = np.repeat(np.expand_dims(imorig, 2), 3, 2)
imorig = imorig[..., :3]
if (max(imorig.shape[0], imorig.shape[1]) > 1200):
ratio = max(imorig.shape[0], imorig.shape[1]) / 1200
imorig = cv2.resize(imorig, (int(imorig.shape[1] / ratio), int(imorig.shape[0] / ratio)), interpolation = cv2.INTER_AREA)
imorig = imorig.transpose(2, 0, 1)
if (imorig.max() > 1.2):
imorig = normalize(imorig)
imorig = np.expand_dims(imorig, 0)
# Handle odd sizes
expanded_h = False
expanded_w = False
sh_im = imorig.shape
if sh_im[2]%2 == 1:
expanded_h = True
imorig = np.concatenate((imorig, imorig[:, :, -1, :][:, :, np.newaxis, :]), axis=2)
if sh_im[3]%2 == 1:
expanded_w = True
imorig = np.concatenate((imorig, imorig[:, :, :, -1][:, :, :, np.newaxis]), axis=3)
imorig = torch.Tensor(imorig)
# Sets data type according to CPU or GPU modes
if self.device == 'cuda':
dtype = torch.cuda.FloatTensor
else:
dtype = torch.FloatTensor
imnoisy = imorig.clone()
with torch.no_grad():
imorig, imnoisy = imorig.type(dtype), imnoisy.type(dtype)
nsigma = torch.FloatTensor([cur_sigma]).type(dtype)
# Estimate noise and subtract it to the input image
im_noise_estim = self.model(imnoisy, nsigma)
outim = torch.clamp(imnoisy-im_noise_estim, 0., 1.)
if expanded_h:
imorig = imorig[:, :, :-1, :]
outim = outim[:, :, :-1, :]
imnoisy = imnoisy[:, :, :-1, :]
if expanded_w:
imorig = imorig[:, :, :, :-1]
outim = outim[:, :, :, :-1]
imnoisy = imnoisy[:, :, :, :-1]
return variable_to_cv2_image(outim)
================================================
FILE: denoising/functions.py
================================================
"""
Functions implementing custom NN layers
Copyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>
This program is free software: you can use, modify and/or
redistribute it under the terms of the GNU General Public
License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later
version. You should have received a copy of this license along
this program. If not, see <http://www.gnu.org/licenses/>.
"""
import torch
from torch.autograd import Function, Variable
def concatenate_input_noise_map(input, noise_sigma):
r"""Implements the first layer of FFDNet. This function returns a
torch.autograd.Variable composed of the concatenation of the downsampled
input image and the noise map. Each image of the batch of size CxHxW gets
converted to an array of size 4*CxH/2xW/2. Each of the pixels of the
non-overlapped 2x2 patches of the input image are placed in the new array
along the first dimension.
Args:
input: batch containing CxHxW images
noise_sigma: the value of the pixels of the CxH/2xW/2 noise map
"""
# noise_sigma is a list of length batch_size
N, C, H, W = input.size()
dtype = input.type()
sca = 2
sca2 = sca*sca
Cout = sca2*C
Hout = H//sca
Wout = W//sca
idxL = [[0, 0], [0, 1], [1, 0], [1, 1]]
# Fill the downsampled image with zeros
if 'cuda' in dtype:
downsampledfeatures = torch.cuda.FloatTensor(N, Cout, Hout, Wout).fill_(0)
else:
downsampledfeatures = torch.FloatTensor(N, Cout, Hout, Wout).fill_(0)
# Build the CxH/2xW/2 noise map
noise_map = noise_sigma.view(N, 1, 1, 1).repeat(1, C, Hout, Wout)
# Populate output
for idx in range(sca2):
downsampledfeatures[:, idx:Cout:sca2, :, :] = \
input[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca]
# concatenate de-interleaved mosaic with noise map
return torch.cat((noise_map, downsampledfeatures), 1)
class UpSampleFeaturesFunction(Function):
r"""Extends PyTorch's modules by implementing a torch.autograd.Function.
This class implements the forward and backward methods of the last layer
of FFDNet. It basically performs the inverse of
concatenate_input_noise_map(): it converts each of the images of a
batch of size CxH/2xW/2 to images of size C/4xHxW
"""
@staticmethod
def forward(ctx, input):
N, Cin, Hin, Win = input.size()
dtype = input.type()
sca = 2
sca2 = sca*sca
Cout = Cin//sca2
Hout = Hin*sca
Wout = Win*sca
idxL = [[0, 0], [0, 1], [1, 0], [1, 1]]
assert (Cin%sca2 == 0), 'Invalid input dimensions: number of channels should be divisible by 4'
result = torch.zeros((N, Cout, Hout, Wout)).type(dtype)
for idx in range(sca2):
result[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca] = input[:, idx:Cin:sca2, :, :]
return result
@staticmethod
def backward(ctx, grad_output):
N, Cg_out, Hg_out, Wg_out = grad_output.size()
dtype = grad_output.data.type()
sca = 2
sca2 = sca*sca
Cg_in = sca2*Cg_out
Hg_in = Hg_out//sca
Wg_in = Wg_out//sca
idxL = [[0, 0], [0, 1], [1, 0], [1, 1]]
# Build output
grad_input = torch.zeros((N, Cg_in, Hg_in, Wg_in)).type(dtype)
# Populate output
for idx in range(sca2):
grad_input[:, idx:Cg_in:sca2, :, :] = grad_output.data[:, :, idxL[idx][0]::sca, idxL[idx][1]::sca]
return Variable(grad_input)
# Alias functions
upsamplefeatures = UpSampleFeaturesFunction.apply
================================================
FILE: denoising/models.py
================================================
"""
Definition of the FFDNet model and its custom layers
Copyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>
This program is free software: you can use, modify and/or
redistribute it under the terms of the GNU General Public
License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later
version. You should have received a copy of this license along
this program. If not, see <http://www.gnu.org/licenses/>.
"""
import torch.nn as nn
from torch.autograd import Variable
import denoising.functions as functions
class UpSampleFeatures(nn.Module):
r"""Implements the last layer of FFDNet
"""
def __init__(self):
super(UpSampleFeatures, self).__init__()
def forward(self, x):
return functions.upsamplefeatures(x)
class IntermediateDnCNN(nn.Module):
r"""Implements the middel part of the FFDNet architecture, which
is basically a DnCNN net
"""
def __init__(self, input_features, middle_features, num_conv_layers):
super(IntermediateDnCNN, self).__init__()
self.kernel_size = 3
self.padding = 1
self.input_features = input_features
self.num_conv_layers = num_conv_layers
self.middle_features = middle_features
if self.input_features == 5:
self.output_features = 4 #Grayscale image
elif self.input_features == 15:
self.output_features = 12 #RGB image
else:
raise Exception('Invalid number of input features')
layers = []
layers.append(nn.Conv2d(in_channels=self.input_features,\
out_channels=self.middle_features,\
kernel_size=self.kernel_size,\
padding=self.padding,\
bias=False))
layers.append(nn.ReLU(inplace=True))
for _ in range(self.num_conv_layers-2):
layers.append(nn.Conv2d(in_channels=self.middle_features,\
out_channels=self.middle_features,\
kernel_size=self.kernel_size,\
padding=self.padding,\
bias=False))
layers.append(nn.BatchNorm2d(self.middle_features))
layers.append(nn.ReLU(inplace=True))
layers.append(nn.Conv2d(in_channels=self.middle_features,\
out_channels=self.output_features,\
kernel_size=self.kernel_size,\
padding=self.padding,\
bias=False))
self.itermediate_dncnn = nn.Sequential(*layers)
def forward(self, x):
out = self.itermediate_dncnn(x)
return out
class FFDNet(nn.Module):
r"""Implements the FFDNet architecture
"""
def __init__(self, num_input_channels):
super(FFDNet, self).__init__()
self.num_input_channels = num_input_channels
if self.num_input_channels == 1:
# Grayscale image
self.num_feature_maps = 64
self.num_conv_layers = 15
self.downsampled_channels = 5
self.output_features = 4
elif self.num_input_channels == 3:
# RGB image
self.num_feature_maps = 96
self.num_conv_layers = 12
self.downsampled_channels = 15
self.output_features = 12
else:
raise Exception('Invalid number of input features')
self.intermediate_dncnn = IntermediateDnCNN(\
input_features=self.downsampled_channels,\
middle_features=self.num_feature_maps,\
num_conv_layers=self.num_conv_layers)
self.upsamplefeatures = UpSampleFeatures()
def forward(self, x, noise_sigma):
concat_noise_x = functions.concatenate_input_noise_map(x.data, noise_sigma.data)
concat_noise_x = Variable(concat_noise_x)
h_dncnn = self.intermediate_dncnn(concat_noise_x)
pred_noise = self.upsamplefeatures(h_dncnn)
return pred_noise
================================================
FILE: denoising/utils.py
================================================
"""
Different utilities such as orthogonalization of weights, initialization of
loggers, etc
Copyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>
This program is free software: you can use, modify and/or
redistribute it under the terms of the GNU General Public
License as published by the Free Software Foundation, either
version 3 of the License, or (at your option) any later
version. You should have received a copy of this license along
this program. If not, see <http://www.gnu.org/licenses/>.
"""
import numpy as np
import cv2
def variable_to_cv2_image(varim):
r"""Converts a torch.autograd.Variable to an OpenCV image
Args:
varim: a torch.autograd.Variable
"""
nchannels = varim.size()[1]
if nchannels == 1:
res = (varim.data.cpu().numpy()[0, 0, :]*255.).clip(0, 255).astype(np.uint8)
elif nchannels == 3:
res = varim.data.cpu().numpy()[0]
res = cv2.cvtColor(res.transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
res = (res*255.).clip(0, 255).astype(np.uint8)
else:
raise Exception('Number of color channels not supported')
return res
def normalize(data):
return np.float32(data/255.)
def remove_dataparallel_wrapper(state_dict):
r"""Converts a DataParallel model to a normal one by removing the "module."
wrapper in the module dictionary
Args:
state_dict: a torch.nn.DataParallel state dictionary
"""
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, vl in state_dict.items():
name = k[7:] # remove 'module.' of DataParallel
new_state_dict[name] = vl
return new_state_dict
def is_rgb(im_path):
r""" Returns True if the image in im_path is an RGB image
"""
from skimage.io import imread
rgb = False
im = imread(im_path)
if (len(im.shape) == 3):
if not(np.allclose(im[...,0], im[...,1]) and np.allclose(im[...,2], im[...,1])):
rgb = True
print("rgb: {}".format(rgb))
print("im shape: {}".format(im.shape))
return rgb
================================================
FILE: inference.py
================================================
import os
import argparse
import sys
import numpy as np
import matplotlib.pyplot as plt
from colorizator import MangaColorizator
def process_image(image, colorizator, args):
colorizator.set_image(image, args.size, args.denoiser, args.denoiser_sigma)
return colorizator.colorize()
def colorize_single_image(image_path, save_path, colorizator, args):
image = plt.imread(image_path)
colorization = process_image(image, colorizator, args)
plt.imsave(save_path, colorization)
return True
def colorize_images(target_path, colorizator, args):
images = os.listdir(args.path)
for image_name in images:
file_path = os.path.join(args.path, image_name)
if os.path.isdir(file_path):
continue
name, ext = os.path.splitext(image_name)
if (ext != '.png'):
image_name = name + '.png'
print(file_path)
save_path = os.path.join(target_path, image_name)
colorize_single_image(file_path, save_path, colorizator, args)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--path", required=True)
parser.add_argument("-gen", "--generator", default = 'networks/generator.zip')
parser.add_argument("-ext", "--extractor", default = 'networks/extractor.pth')
parser.add_argument('-g', '--gpu', dest = 'gpu', action = 'store_true')
parser.add_argument('-nd', '--no_denoise', dest = 'denoiser', action = 'store_false')
parser.add_argument("-ds", "--denoiser_sigma", type = int, default = 25)
parser.add_argument("-s", "--size", type = int, default = 576)
parser.set_defaults(gpu = False)
parser.set_defaults(denoiser = True)
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
if args.gpu:
device = 'cuda'
else:
device = 'cpu'
colorizer = MangaColorizator(device, args.generator, args.extractor)
if os.path.isdir(args.path):
colorization_path = os.path.join(args.path, 'colorization')
if not os.path.exists(colorization_path):
os.makedirs(colorization_path)
colorize_images(colorization_path, colorizer, args)
elif os.path.isfile(args.path):
split = os.path.splitext(args.path)
if split[1].lower() in ('.jpg', '.png', '.jpeg'):
new_image_path = split[0] + '_colorized' + '.png'
colorize_single_image(args.path, new_image_path, colorizer, args)
else:
print('Wrong format')
else:
print('Wrong path')
================================================
FILE: networks/extractor.py
================================================
import torch
import torch.nn as nn
import math
'''https://github.com/blandocs/Tag2Pix/blob/master/model/pretrained.py'''
# Pretrained version
class Selayer(nn.Module):
def __init__(self, inplanes):
super(Selayer, self).__init__()
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1)
self.conv2 = nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1)
self.relu = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.global_avgpool(x)
out = self.conv1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.sigmoid(out)
return x * out
class BottleneckX_Origin(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, cardinality, stride=1, downsample=None):
super(BottleneckX_Origin, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes * 2)
self.conv2 = nn.Conv2d(planes * 2, planes * 2, kernel_size=3, stride=stride,
padding=1, groups=cardinality, bias=False)
self.bn2 = nn.BatchNorm2d(planes * 2)
self.conv3 = nn.Conv2d(planes * 2, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.selayer = Selayer(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out = self.selayer(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class SEResNeXt_Origin(nn.Module):
def __init__(self, block, layers, input_channels=3, cardinality=32, num_classes=1000):
super(SEResNeXt_Origin, self).__init__()
self.cardinality = cardinality
self.inplanes = 64
self.input_channels = input_channels
self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, self.cardinality, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, self.cardinality))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x1 = self.relu(x)
x2 = self.layer1(x1)
x3 = self.layer2(x2)
x4 = self.layer3(x3)
return x1, x2, x3, x4
================================================
FILE: networks/models.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as M
import math
from torch import Tensor
from torch.nn import Parameter
from .extractor import SEResNeXt_Origin, BottleneckX_Origin
'''https://github.com/orashi/AlacGAN/blob/master/models/standard.py'''
def l2normalize(v, eps=1e-12):
return v / (v.norm() + eps)
class SpectralNorm(nn.Module):
def __init__(self, module, name='weight', power_iterations=1):
super(SpectralNorm, self).__init__()
self.module = module
self.name = name
self.power_iterations = power_iterations
if not self._made_params():
self._make_params()
def _update_u_v(self):
u = getattr(self.module, self.name + "_u")
v = getattr(self.module, self.name + "_v")
w = getattr(self.module, self.name + "_bar")
height = w.data.shape[0]
for _ in range(self.power_iterations):
v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))
# sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
sigma = u.dot(w.view(height, -1).mv(v))
setattr(self.module, self.name, w / sigma.expand_as(w))
def _made_params(self):
try:
u = getattr(self.module, self.name + "_u")
v = getattr(self.module, self.name + "_v")
w = getattr(self.module, self.name + "_bar")
return True
except AttributeError:
return False
def _make_params(self):
w = getattr(self.module, self.name)
height = w.data.shape[0]
width = w.view(height, -1).data.shape[1]
u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
u.data = l2normalize(u.data)
v.data = l2normalize(v.data)
w_bar = Parameter(w.data)
del self.module._parameters[self.name]
self.module.register_parameter(self.name + "_u", u)
self.module.register_parameter(self.name + "_v", v)
self.module.register_parameter(self.name + "_bar", w_bar)
def forward(self, *args):
self._update_u_v()
return self.module.forward(*args)
class Selayer(nn.Module):
def __init__(self, inplanes):
super(Selayer, self).__init__()
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1)
self.conv2 = nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1)
self.relu = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.global_avgpool(x)
out = self.conv1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.sigmoid(out)
return x * out
class SelayerSpectr(nn.Module):
def __init__(self, inplanes):
super(SelayerSpectr, self).__init__()
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.conv1 = SpectralNorm(nn.Conv2d(inplanes, inplanes // 16, kernel_size=1, stride=1))
self.conv2 = SpectralNorm(nn.Conv2d(inplanes // 16, inplanes, kernel_size=1, stride=1))
self.relu = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.global_avgpool(x)
out = self.conv1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.sigmoid(out)
return x * out
class ResNeXtBottleneck(nn.Module):
def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32, dilate=1):
super(ResNeXtBottleneck, self).__init__()
D = out_channels // 2
self.out_channels = out_channels
self.conv_reduce = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False)
self.conv_conv = nn.Conv2d(D, D, kernel_size=2 + stride, stride=stride, padding=dilate, dilation=dilate,
groups=cardinality,
bias=False)
self.conv_expand = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
self.shortcut = nn.Sequential()
if stride != 1:
self.shortcut.add_module('shortcut',
nn.AvgPool2d(2, stride=2))
self.selayer = Selayer(out_channels)
def forward(self, x):
bottleneck = self.conv_reduce.forward(x)
bottleneck = F.leaky_relu(bottleneck, 0.2, True)
bottleneck = self.conv_conv.forward(bottleneck)
bottleneck = F.leaky_relu(bottleneck, 0.2, True)
bottleneck = self.conv_expand.forward(bottleneck)
bottleneck = self.selayer(bottleneck)
x = self.shortcut.forward(x)
return x + bottleneck
class SpectrResNeXtBottleneck(nn.Module):
def __init__(self, in_channels=256, out_channels=256, stride=1, cardinality=32, dilate=1):
super(SpectrResNeXtBottleneck, self).__init__()
D = out_channels // 2
self.out_channels = out_channels
self.conv_reduce = SpectralNorm(nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False))
self.conv_conv = SpectralNorm(nn.Conv2d(D, D, kernel_size=2 + stride, stride=stride, padding=dilate, dilation=dilate,
groups=cardinality,
bias=False))
self.conv_expand = SpectralNorm(nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False))
self.shortcut = nn.Sequential()
if stride != 1:
self.shortcut.add_module('shortcut',
nn.AvgPool2d(2, stride=2))
self.selayer = SelayerSpectr(out_channels)
def forward(self, x):
bottleneck = self.conv_reduce.forward(x)
bottleneck = F.leaky_relu(bottleneck, 0.2, True)
bottleneck = self.conv_conv.forward(bottleneck)
bottleneck = F.leaky_relu(bottleneck, 0.2, True)
bottleneck = self.conv_expand.forward(bottleneck)
bottleneck = self.selayer(bottleneck)
x = self.shortcut.forward(x)
return x + bottleneck
class FeatureConv(nn.Module):
def __init__(self, input_dim=512, output_dim=512):
super(FeatureConv, self).__init__()
no_bn = True
seq = []
seq.append(nn.Conv2d(input_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False))
if not no_bn: seq.append(nn.BatchNorm2d(output_dim))
seq.append(nn.ReLU(inplace=True))
seq.append(nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=2, padding=1, bias=False))
if not no_bn: seq.append(nn.BatchNorm2d(output_dim))
seq.append(nn.ReLU(inplace=True))
seq.append(nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False))
seq.append(nn.ReLU(inplace=True))
self.network = nn.Sequential(*seq)
def forward(self, x):
return self.network(x)
class Generator(nn.Module):
def __init__(self, ngf=64):
super(Generator, self).__init__()
self.encoder = SEResNeXt_Origin(BottleneckX_Origin, [3, 4, 6, 3], num_classes= 370, input_channels=1)
self.to0 = self._make_encoder_block_first(5, 32)
self.to1 = self._make_encoder_block(32, 64)
self.to2 = self._make_encoder_block(64, 92)
self.to3 = self._make_encoder_block(92, 128)
self.to4 = self._make_encoder_block(128, 256)
self.deconv_for_decoder = nn.Sequential(
nn.ConvTranspose2d(256, 128, 3, stride=2, padding=1, output_padding=1), # output is 64 * 64
nn.LeakyReLU(0.2),
nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1), # output is 128 * 128
nn.LeakyReLU(0.2),
nn.ConvTranspose2d(64, 32, 3, stride=1, padding=1, output_padding=0), # output is 256 * 256
nn.LeakyReLU(0.2),
nn.ConvTranspose2d(32, 3, 3, stride=1, padding=1, output_padding=0), # output is 256 * 256
nn.Tanh(),
)
tunnel4 = nn.Sequential(*[ResNeXtBottleneck(512, 512, cardinality=32, dilate=1) for _ in range(20)])
self.tunnel4 = nn.Sequential(nn.Conv2d(1024 + 128, 512, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(0.2, True),
tunnel4,
nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),
nn.PixelShuffle(2),
nn.LeakyReLU(0.2, True)
) # 64
depth = 2
tunnel = [ResNeXtBottleneck(256, 256, cardinality=32, dilate=1) for _ in range(depth)]
tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=2) for _ in range(depth)]
tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=4) for _ in range(depth)]
tunnel += [ResNeXtBottleneck(256, 256, cardinality=32, dilate=2),
ResNeXtBottleneck(256, 256, cardinality=32, dilate=1)]
tunnel3 = nn.Sequential(*tunnel)
self.tunnel3 = nn.Sequential(nn.Conv2d(512 + 256, 256, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(0.2, True),
tunnel3,
nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
nn.PixelShuffle(2),
nn.LeakyReLU(0.2, True)
) # 128
tunnel = [ResNeXtBottleneck(128, 128, cardinality=32, dilate=1) for _ in range(depth)]
tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=2) for _ in range(depth)]
tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=4) for _ in range(depth)]
tunnel += [ResNeXtBottleneck(128, 128, cardinality=32, dilate=2),
ResNeXtBottleneck(128, 128, cardinality=32, dilate=1)]
tunnel2 = nn.Sequential(*tunnel)
self.tunnel2 = nn.Sequential(nn.Conv2d(128 + 256 + 64, 128, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(0.2, True),
tunnel2,
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.PixelShuffle(2),
nn.LeakyReLU(0.2, True)
)
tunnel = [ResNeXtBottleneck(64, 64, cardinality=16, dilate=1)]
tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=2)]
tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=4)]
tunnel += [ResNeXtBottleneck(64, 64, cardinality=16, dilate=2),
ResNeXtBottleneck(64, 64, cardinality=16, dilate=1)]
tunnel1 = nn.Sequential(*tunnel)
self.tunnel1 = nn.Sequential(nn.Conv2d(64 + 32, 64, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(0.2, True),
tunnel1,
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.PixelShuffle(2),
nn.LeakyReLU(0.2, True)
)
self.exit = nn.Sequential(nn.Conv2d(64 + 32, 32, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(0.2, True),
nn.Conv2d(32, 3, kernel_size= 1, stride = 1, padding = 0))
def _make_encoder_block(self, inplanes, planes):
return nn.Sequential(
nn.Conv2d(inplanes, planes, 3, 2, 1),
nn.LeakyReLU(0.2),
nn.Conv2d(planes, planes, 3, 1, 1),
nn.LeakyReLU(0.2),
)
def _make_encoder_block_first(self, inplanes, planes):
return nn.Sequential(
nn.Conv2d(inplanes, planes, 3, 1, 1),
nn.LeakyReLU(0.2),
nn.Conv2d(planes, planes, 3, 1, 1),
nn.LeakyReLU(0.2),
)
def forward(self, sketch):
x0 = self.to0(sketch)
aux_out = self.to1(x0)
aux_out = self.to2(aux_out)
aux_out = self.to3(aux_out)
x1, x2, x3, x4 = self.encoder(sketch[:, 0:1])
out = self.tunnel4(torch.cat([x4, aux_out], 1))
x = self.tunnel3(torch.cat([out, x3], 1))
x = self.tunnel2(torch.cat([x, x2, x1], 1))
x = torch.tanh(self.exit(torch.cat([x, x0], 1)))
decoder_output = self.deconv_for_decoder(out)
return x, decoder_output
class Colorizer(nn.Module):
def __init__(self):
super(Colorizer, self).__init__()
self.generator = Generator()
def forward(self, x, extractor_grad = False):
fake, guide = self.generator(x)
return fake, guide
================================================
FILE: readme.md
================================================
## **UPD!!!** **A demo of Manga Colorization v2.5 is now available [link](https://mangacol.com). Feel free to check it out!**
# Automatic colorization
1. Download [generator](https://drive.google.com/file/d/1qmxUEKADkEM4iYLp1fpPLLKnfZ6tcF-t/view?usp=sharing) and [denoiser](https://drive.google.com/file/d/161oyQcYpdkVdw8gKz_MA8RD-Wtg9XDp3/view?usp=sharing) weights. Put generator and extractor weights in `networks` and denoiser weights in `denoising/models`.
2. To colorize image or folder of images, use the following command:
```
$ python inference.py -p "path to file or folder"
```
| Original | Colorization |
|------------|-------------|
| <img src="figures/bw1.jpg" width="512"> | <img src="figures/color1.png" width="512"> |
| <img src="figures/bw2.jpg" width="512"> | <img src="figures/color2.png" width="512"> |
| <img src="figures/bw3.jpg" width="512"> | <img src="figures/color3.png" width="512"> |
| <img src="figures/bw4.jpg" width="512"> | <img src="figures/color4.png" width="512"> |
| <img src="figures/bw5.jpg" width="512"> | <img src="figures/color5.png" width="512"> |
| <img src="figures/bw6.jpg" width="512"> | <img src="figures/color6.png" width="512"> |
================================================
FILE: requirements.txt
================================================
torch
torchvision
opencv-python
matplotlib
================================================
FILE: start_kr.md
================================================
# requirements
- 모델 다운 -> [모델](https://drive.google.com/file/d/161oyQcYpdkVdw8gKz_MA8RD-Wtg9XDp3/view)
- 다운 받은 모델 `denoising/models`에 넣기
- generator 다운 -> [generator](https://drive.google.com/file/d/1qmxUEKADkEM4iYLp1fpPLLKnfZ6tcF-t/view)
- 다운 받은 generator.zip `networks/`에 넣기
# start
```
$ python3 -m venv venv
$ source venv/bin/activate
$ pip install -r requirements.txt
$ python inference.py -p <이미지 폴더 주소>
```
================================================
FILE: utils/utils.py
================================================
import numpy as np
import cv2
def resize_pad(img, size = 256):
if len(img.shape) == 2:
img = np.expand_dims(img, 2)
if img.shape[2] == 1:
img = np.repeat(img, 3, 2)
if img.shape[2] == 4:
img = img[:, :, :3]
pad = None
if (img.shape[0] < img.shape[1]):
height = img.shape[0]
ratio = height / (size * 1.5)
width = int(np.ceil(img.shape[1] / ratio))
img = cv2.resize(img, (width, int(size * 1.5)), interpolation = cv2.INTER_AREA)
new_width = width + (32 - width % 32)
pad = (0, new_width - width)
img = np.pad(img, ((0, 0), (0, pad[1]), (0, 0)), 'maximum')
else:
width = img.shape[1]
ratio = width / size
height = int(np.ceil(img.shape[0] / ratio))
img = cv2.resize(img, (size, height), interpolation = cv2.INTER_AREA)
new_height = height + (32 - height % 32)
pad = (new_height - height, 0)
img = np.pad(img, ((0, pad[0]), (0, 0), (0, 0)), 'maximum')
if (img.dtype == 'float32'):
np.clip(img, 0, 1, out = img)
return img[:, :, :1], pad
gitextract_ul_19dwz/
├── .gitignore
├── colorizator.py
├── denoising/
│ ├── denoiser.py
│ ├── functions.py
│ ├── models.py
│ └── utils.py
├── inference.py
├── networks/
│ ├── extractor.py
│ └── models.py
├── readme.md
├── requirements.txt
├── start_kr.md
└── utils/
└── utils.py
SYMBOL INDEX (71 symbols across 9 files)
FILE: colorizator.py
class MangaColorizator (line 9) | class MangaColorizator:
method __init__ (line 10) | def __init__(self, device, generator_path = 'networks/generator.zip', ...
method set_image (line 23) | def set_image(self, image, size = 576, apply_denoise = True, denoise_s...
method update_hint (line 34) | def update_hint(self, hint, mask):
method colorize (line 50) | def colorize(self):
FILE: denoising/denoiser.py
class FFDNetDenoiser (line 26) | class FFDNetDenoiser:
method __init__ (line 27) | def __init__(self, _device, _sigma = 25, _weights_dir = 'denoising/mod...
method load_weights (line 38) | def load_weights(self):
method get_denoised_image (line 51) | def get_denoised_image(self, imorig, sigma = None):
FILE: denoising/functions.py
function concatenate_input_noise_map (line 16) | def concatenate_input_noise_map(input, noise_sigma):
class UpSampleFeaturesFunction (line 55) | class UpSampleFeaturesFunction(Function):
method forward (line 63) | def forward(ctx, input):
method backward (line 82) | def backward(ctx, grad_output):
FILE: denoising/models.py
class UpSampleFeatures (line 17) | class UpSampleFeatures(nn.Module):
method __init__ (line 20) | def __init__(self):
method forward (line 22) | def forward(self, x):
class IntermediateDnCNN (line 25) | class IntermediateDnCNN(nn.Module):
method __init__ (line 29) | def __init__(self, input_features, middle_features, num_conv_layers):
method forward (line 64) | def forward(self, x):
class FFDNet (line 68) | class FFDNet(nn.Module):
method __init__ (line 71) | def __init__(self, num_input_channels):
method forward (line 95) | def forward(self, x, noise_sigma):
FILE: denoising/utils.py
function variable_to_cv2_image (line 18) | def variable_to_cv2_image(varim):
function normalize (line 36) | def normalize(data):
function remove_dataparallel_wrapper (line 39) | def remove_dataparallel_wrapper(state_dict):
function is_rgb (line 55) | def is_rgb(im_path):
FILE: inference.py
function process_image (line 10) | def process_image(image, colorizator, args):
function colorize_single_image (line 15) | def colorize_single_image(image_path, save_path, colorizator, args):
function colorize_images (line 26) | def colorize_images(target_path, colorizator, args):
function parse_args (line 44) | def parse_args():
FILE: networks/extractor.py
class Selayer (line 8) | class Selayer(nn.Module):
method __init__ (line 9) | def __init__(self, inplanes):
method forward (line 17) | def forward(self, x):
class BottleneckX_Origin (line 27) | class BottleneckX_Origin(nn.Module):
method __init__ (line 30) | def __init__(self, inplanes, planes, cardinality, stride=1, downsample...
method forward (line 48) | def forward(self, x):
class SEResNeXt_Origin (line 72) | class SEResNeXt_Origin(nn.Module):
method __init__ (line 73) | def __init__(self, block, layers, input_channels=3, cardinality=32, nu...
method _make_layer (line 98) | def _make_layer(self, block, planes, blocks, stride=1):
method forward (line 115) | def forward(self, x):
FILE: networks/models.py
function l2normalize (line 13) | def l2normalize(v, eps=1e-12):
class SpectralNorm (line 17) | class SpectralNorm(nn.Module):
method __init__ (line 18) | def __init__(self, module, name='weight', power_iterations=1):
method _update_u_v (line 26) | def _update_u_v(self):
method _made_params (line 40) | def _made_params(self):
method _make_params (line 50) | def _make_params(self):
method forward (line 68) | def forward(self, *args):
class Selayer (line 72) | class Selayer(nn.Module):
method __init__ (line 73) | def __init__(self, inplanes):
method forward (line 81) | def forward(self, x):
class SelayerSpectr (line 90) | class SelayerSpectr(nn.Module):
method __init__ (line 91) | def __init__(self, inplanes):
method forward (line 99) | def forward(self, x):
class ResNeXtBottleneck (line 108) | class ResNeXtBottleneck(nn.Module):
method __init__ (line 109) | def __init__(self, in_channels=256, out_channels=256, stride=1, cardin...
method forward (line 125) | def forward(self, x):
class SpectrResNeXtBottleneck (line 136) | class SpectrResNeXtBottleneck(nn.Module):
method __init__ (line 137) | def __init__(self, in_channels=256, out_channels=256, stride=1, cardin...
method forward (line 153) | def forward(self, x):
class FeatureConv (line 164) | class FeatureConv(nn.Module):
method __init__ (line 165) | def __init__(self, input_dim=512, output_dim=512):
method forward (line 182) | def forward(self, x):
class Generator (line 185) | class Generator(nn.Module):
method __init__ (line 186) | def __init__(self, ngf=64):
method _make_encoder_block (line 270) | def _make_encoder_block(self, inplanes, planes):
method _make_encoder_block_first (line 278) | def _make_encoder_block_first(self, inplanes, planes):
method forward (line 286) | def forward(self, sketch):
class Colorizer (line 311) | class Colorizer(nn.Module):
method __init__ (line 312) | def __init__(self):
method forward (line 317) | def forward(self, x, extractor_grad = False):
FILE: utils/utils.py
function resize_pad (line 4) | def resize_pad(img, size = 256):
Condensed preview — 13 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (41K chars).
[
{
"path": ".gitignore",
"chars": 74,
"preview": "*.ipynb\n*.pth\n*.zip\n\n__pycache__/\ntemp_colorization/\n\nstatic/temp_images/\n"
},
{
"path": "colorizator.py",
"chars": 2480,
"preview": "import torch\nfrom torchvision.transforms import ToTensor\nimport numpy as np\n\nfrom networks.models import Colorizer\nfrom "
},
{
"path": "denoising/denoiser.py",
"chars": 3944,
"preview": "\"\"\"\nDenoise an image with the FFDNet denoising method\n\nCopyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes"
},
{
"path": "denoising/functions.py",
"chars": 3659,
"preview": "\"\"\"\nFunctions implementing custom NN layers\n\nCopyright (C) 2018, Matias Tassano <matias.tassano@parisdescartes.fr>\n\nThis"
},
{
"path": "denoising/models.py",
"chars": 4153,
"preview": "\"\"\"\nDefinition of the FFDNet model and its custom layers\n\nCopyright (C) 2018, Matias Tassano <matias.tassano@parisdescar"
},
{
"path": "denoising/utils.py",
"chars": 2062,
"preview": "\"\"\"\nDifferent utilities such as orthogonalization of weights, initialization of\nloggers, etc\n\nCopyright (C) 2018, Matias"
},
{
"path": "inference.py",
"chars": 2742,
"preview": "import os\nimport argparse\nimport sys\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom colorizator import MangaC"
},
{
"path": "networks/extractor.py",
"chars": 4116,
"preview": "import torch\nimport torch.nn as nn\nimport math\n\n'''https://github.com/blandocs/Tag2Pix/blob/master/model/pretrained.py''"
},
{
"path": "networks/models.py",
"chars": 13263,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torchvision.models as M\nimport math\nfrom torch"
},
{
"path": "readme.md",
"chars": 1193,
"preview": "## **UPD!!!** **A demo of Manga Colorization v2.5 is now available [link](https://mangacol.com). Feel free to check it o"
},
{
"path": "requirements.txt",
"chars": 42,
"preview": "torch\ntorchvision\nopencv-python\nmatplotlib"
},
{
"path": "start_kr.md",
"chars": 430,
"preview": "# requirements\n- 모델 다운 -> [모델](https://drive.google.com/file/d/161oyQcYpdkVdw8gKz_MA8RD-Wtg9XDp3/view)\n- 다운 받은 모델 `denoi"
},
{
"path": "utils/utils.py",
"chars": 1228,
"preview": "import numpy as np\nimport cv2\n\ndef resize_pad(img, size = 256):\n \n if len(img.shape) == 2:\n img = n"
}
]
About this extraction
This page contains the full source code of the qweasdd/manga-colorization-v2 GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 13 files (38.5 KB), approximately 10.6k tokens, and a symbol index with 71 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.