Repository: RollingPlain/IVIF_ZOO
Branch: main
Commit: 53b4c12ccfc9
Files: 6
Total size: 102.6 KB

Directory structure:
gitextract_r6h374q9/

├── Metric/
│   ├── Metric_torch.py
│   ├── Nabf.py
│   ├── Qabf.py
│   ├── eval_torch.py
│   └── ssim.py
└── README.md

================================================
FILE CONTENTS
================================================

================================================
FILE: Metric/Metric_torch.py
================================================
import numpy as np
from scipy.signal import convolve2d
from Qabf import get_Qabf
from Nabf import get_Nabf
import math
import torch
import torch.nn.functional as F
import torch.fft
from ssim import ssim, ms_ssim
from sklearn.metrics import normalized_mutual_info_score

def EN_function(image_tensor):
    histogram = torch.histc(image_tensor, bins=256, min=0, max=255)
    histogram = histogram / histogram.sum()
    entropy = -torch.sum(histogram * torch.log2(histogram + 1e-7))
    return entropy

def CE_function(ir_img_tensor, vi_img_tensor, f_img_tensor):
    ir_img_tensor = torch.sigmoid(ir_img_tensor)
    vi_img_tensor = torch.sigmoid(vi_img_tensor)
    f_img_tensor = torch.sigmoid(f_img_tensor)
    epsilon = 1e-7
    f_img_tensor = torch.clamp(f_img_tensor, epsilon, 1.0 - epsilon)
    true_tensor = (ir_img_tensor + vi_img_tensor) / 2
    true_tensor = torch.clamp(true_tensor, epsilon, 1.0 - epsilon)
    CE = F.binary_cross_entropy(f_img_tensor, true_tensor)
    return CE

def QNCIE_function(ir_img_tensor, vi_img_tensor, f_img_tensor):
    def normalize1(img_tensor):
        img_min = img_tensor.min()
        img_max = img_tensor.max()
        return (img_tensor - img_min) / (img_max - img_min)
    def NCC(img1, img2):
        mean1 = torch.mean(img1)
        mean2 = torch.mean(img2)
        numerator = torch.sum((img1 - mean1) * (img2 - mean2))
        denominator = torch.sqrt(torch.sum((img1 - mean1) ** 2) * torch.sum((img2 - mean2) ** 2))
        return numerator / (denominator + 1e-10)

    ir_img_tensor = normalize1(ir_img_tensor)
    vi_img_tensor = normalize1(vi_img_tensor)
    f_img_tensor = normalize1(f_img_tensor)

    NCCxy = NCC(ir_img_tensor, vi_img_tensor)
    NCCxf = NCC(ir_img_tensor, f_img_tensor)
    NCCyf = NCC(vi_img_tensor, f_img_tensor)
    R = torch.tensor([[1, NCCxy, NCCxf],
                      [NCCxy, 1, NCCyf],
                      [NCCxf, NCCyf, 1]], dtype=torch.float32)

    r = torch.linalg.eigvals(R).real
    K = 3
    b = 256
    HR = torch.sum(r * torch.log2(r / K) / K)
    HR = -HR / np.log2(b)
    QNCIE = 1 - HR.item()
    return QNCIE


def TE_function(ir_img_tensor, vi_img_tensor, f_img_tensor, q=1, ksize=256):
    def compute_entropy(img_tensor, q, ksize):
        img_tensor = img_tensor.view(-1).float()
        histogram = torch.histc(img_tensor, bins=ksize, min=0, max=ksize - 1)
        probabilities = histogram / torch.sum(histogram)
        if q == 1:
            entropy = -torch.sum(probabilities * torch.log2(probabilities + 1e-10))
        else:
            entropy = (1 / (q - 1)) * (1 - torch.sum(probabilities ** q))
        return entropy.item()

    TE_ir = compute_entropy(ir_img_tensor, q, ksize)
    TE_vi = compute_entropy(vi_img_tensor, q, ksize)
    TE_f = compute_entropy(f_img_tensor, q, ksize)
    TE = TE_ir + TE_vi - TE_f
    return TE


def EI_function(f_img_tensor):
    sobel_kernel_x = torch.tensor([[-1., 0., 1.],
                                   [-2., 0., 2.],
                                   [-1., 0., 1.]]).to(f_img_tensor.device)

    sobel_kernel_y = torch.tensor([[-1., -2., -1.],
                                   [0., 0., 0.],
                                   [1., 2., 1.]]).to(f_img_tensor.device)

    sobel_kernel_x = sobel_kernel_x.view(1, 1, 3, 3)
    sobel_kernel_y = sobel_kernel_y.view(1, 1, 3, 3)
    gx = F.conv2d(f_img_tensor.unsqueeze(0).unsqueeze(0), sobel_kernel_x, padding=1)
    gy = F.conv2d(f_img_tensor.unsqueeze(0).unsqueeze(0), sobel_kernel_y, padding=1)

    g = torch.sqrt(gx ** 2 + gy ** 2)
    EI = torch.mean(g).item()

    return EI


def SF_function(image_tensor):

    RF = image_tensor[1:, :] - image_tensor[:-1, :]
    CF = image_tensor[:, 1:] - image_tensor[:, :-1]
    RF1 = torch.sqrt(torch.mean(RF ** 2))
    CF1 = torch.sqrt(torch.mean(CF ** 2))

    SF = torch.sqrt(RF1 ** 2 + CF1 ** 2)
    return SF


def SD_function(image_tensor):
    m, n = image_tensor.shape
    u = torch.mean(image_tensor)
    SD = torch.sqrt(torch.sum((image_tensor - u) ** 2) / (m * n))
    return SD

def PSNR_function(A, B, F):
    A = A.float() / 255.0
    B = B.float() / 255.0
    F = F.float() / 255.0

    m, n = F.shape
    MSE_AF = torch.mean((F - A) ** 2)
    MSE_BF = torch.mean((F - B) ** 2)

    MSE = 0.5 * MSE_AF + 0.5 * MSE_BF
    PSNR = 20 * torch.log10(1 / torch.sqrt(MSE))

    return PSNR


def MSE_function(A, B, F):
    A = A.float() / 255.0
    B = B.float() / 255.0
    F = F.float() / 255.0

    m, n = F.shape
    MSE_AF = torch.mean((F - A) ** 2)
    MSE_BF = torch.mean((F - B) ** 2)

    MSE = 0.5 * MSE_AF + 0.5 * MSE_BF
    return MSE

def fspecial_gaussian(shape, sigma):
    m, n = [(ss-1.)/2. for ss in shape]
    y, x = np.ogrid[-m:m+1, -n:n+1]
    h = np.exp(-(x*x + y*y) / (2.*sigma*sigma))
    h[h < np.finfo(h.dtype).eps*h.max()] = 0
    sumh = h.sum()
    if sumh != 0:
        h /= sumh
    return h

def fspecial_gaussian(size, sigma):
    x = torch.linspace(-size[0]//2, size[0]//2, size[0])
    y = torch.linspace(-size[1]//2, size[1]//2, size[1])
    x, y = torch.meshgrid(x, y)
    g = torch.exp(-(x**2 + y**2) / (2 * sigma**2))
    return g / g.sum()

def convolve2d(input, kernel):
    kernel = kernel.unsqueeze(0).unsqueeze(0).to(input.device)  # Add batch and channel dimensions
    return F.conv2d(input.unsqueeze(0).unsqueeze(0), kernel, padding=kernel.shape[2] // 2)[0][0]

def vifp_mscale(ref, dist):
    sigma_nsq = 2
    num = 0
    den = 0
    for scale in range(1, 5):
        N = 2 ** (4 - scale + 1) + 1
        win = fspecial_gaussian((N, N), N / 5)

        if scale > 1:
            ref = convolve2d(ref, win)
            dist = convolve2d(dist, win)
            ref = ref[::2, ::2]
            dist = dist[::2, ::2]

        mu1 = convolve2d(ref, win)
        mu2 = convolve2d(dist, win)
        mu1_sq = mu1 * mu1
        mu2_sq = mu2 * mu2
        mu1_mu2 = mu1 * mu2
        sigma1_sq = convolve2d(ref * ref, win) - mu1_sq
        sigma2_sq = convolve2d(dist * dist, win) - mu2_sq
        sigma12 = convolve2d(ref * dist, win) - mu1_mu2
        sigma1_sq[sigma1_sq < 0] = 0
        sigma2_sq[sigma2_sq < 0] = 0

        g = sigma12 / (sigma1_sq + 1e-10)
        sv_sq = sigma2_sq - g * sigma12

        g[sigma1_sq < 1e-10] = 0
        sv_sq[sigma1_sq < 1e-10] = sigma2_sq[sigma1_sq < 1e-10]
        sigma1_sq[sigma1_sq < 1e-10] = 0

        g[sigma2_sq < 1e-10] = 0
        sv_sq[sigma2_sq < 1e-10] = 0

        sv_sq[g < 0] = sigma2_sq[g < 0]
        g[g < 0] = 0
        sv_sq[sv_sq <= 1e-10] = 1e-10

        num += torch.sum(torch.log10(1 + g**2 * sigma1_sq / (sv_sq + sigma_nsq)))
        den += torch.sum(torch.log10(1 + sigma1_sq / sigma_nsq))

    vifp = num / den
    return vifp

def VIF_function(A, B, F):
    VIF = vifp_mscale(A, F) + vifp_mscale(B, F)
    return VIF


def CC_function(A, B, F):
    rAF = torch.sum((A - torch.mean(A)) * (F - torch.mean(F))) / torch.sqrt(torch.sum((A - torch.mean(A)) ** 2) * torch.sum((F - torch.mean(F)) ** 2))
    rBF = torch.sum((B - torch.mean(B)) * (F - torch.mean(F))) / torch.sqrt(torch.sum((B - torch.mean(B)) ** 2) * torch.sum((F - torch.mean(F)) ** 2))
    CC = torch.mean(torch.tensor([rAF, rBF]))
    return CC

def corr2(a, b):
    a = a - torch.mean(a)
    b = b - torch.mean(b)
    r = torch.sum(a * b) / torch.sqrt(torch.sum(a * a) * torch.sum(b * b))
    return r

def SCD_function(A, B, F):
    r = corr2(F - B, A) + corr2(F - A, B)
    return r

def Qabf_function(A, B, F):
    return get_Qabf(A, B, F)

def Nabf_function(A, B, F):
    return Nabf_function(A, B, F)


def Hab(im1, im2, gray_level):
	hang, lie = im1.shape
	count = hang * lie
	N = gray_level
	h = np.zeros((N, N))
	for i in range(hang):
		for j in range(lie):
			h[im1[i, j], im2[i, j]] = h[im1[i, j], im2[i, j]] + 1
	h = h / np.sum(h)
	im1_marg = np.sum(h, axis=0)
	im2_marg = np.sum(h, axis=1)
	H_x = 0
	H_y = 0
	for i in range(N):
		if (im1_marg[i] != 0):
			H_x = H_x + im1_marg[i] * math.log2(im1_marg[i])
	for i in range(N):
		if (im2_marg[i] != 0):
			H_x = H_x + im2_marg[i] * math.log2(im2_marg[i])
	H_xy = 0
	for i in range(N):
		for j in range(N):
			if (h[i, j] != 0):
				H_xy = H_xy + h[i, j] * math.log2(h[i, j])
	MI = H_xy - H_x - H_y
	return MI

def MI_function(A, B, F, gray_level=256):
	MIA = Hab(A, F, gray_level)
	MIB = Hab(B, F, gray_level)
	MI_results = MIA + MIB
	return MI_results

def entropy(im, gray_level=256):

    hist, _ = np.histogram(im, bins=gray_level, range=(0, gray_level), density=True)
    H = -np.sum(hist * np.log2(hist + 1e-10))
    return H

def NMI_function(A, B, F, gray_level=256):

    MIA = Hab(A, F, gray_level)
    MIB = Hab(B, F, gray_level)
    MI_results = MIA + MIB

    H_A = entropy(A, gray_level)
    H_B = entropy(B, gray_level)

    NMI = 2 * MI_results / (H_A + H_B + 1e-10)

    return NMI

def AG_function(image_tensor):
    grady, gradx = torch.gradient(image_tensor)
    s = torch.sqrt((gradx ** 2 + grady ** 2) / 2)
    AG = torch.sum(s) / (image_tensor.shape[0] * image_tensor.shape[1])
    return AG

def SSIM_function(A, B, F):
    ssim_A = ssim(A, F)
    ssim_B = ssim(B, F)
    SSIM = (ssim_A + 1 * ssim_B) / 2
    return SSIM.item()

def MS_SSIM_function(A, B, F):
    ssim_A = ms_ssim(A, F)
    ssim_B = ms_ssim(B, F)
    MS_SSIM = (ssim_A + 1 * ssim_B) / 2
    return MS_SSIM.item()

def Nabf_function(A, B, F):
    Nabf = get_Nabf(A, B, F)
    return Nabf

def Qy_function(ir_img_tensor, vi_img_tensor, f_img_tensor):
    def gaussian_filter(window_size, sigma):
        gauss = torch.tensor([np.exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)], device=ir_img_tensor.device)
        gauss = gauss / gauss.sum()
        gauss = gauss.view(1, 1, -1).repeat(1, 1, 1, 1)
        return gauss

    def ssim_yang(img1, img2):
        window_size = 7
        sigma = 1.5
        window = gaussian_filter(window_size, sigma)
        window = window.expand(1, 1, window_size, window_size)

        mu1 = F.conv2d(img1, window, stride=1, padding=window_size // 2)
        mu2 = F.conv2d(img2, window, stride=1, padding=window_size // 2)
        mu1_sq = mu1.pow(2)
        mu2_sq = mu2.pow(2)
        mu1_mu2 = mu1 * mu2
        sigma1_sq = F.conv2d(img1.pow(2), window, padding=window_size // 2) - mu1_sq
        sigma2_sq = F.conv2d(img2.pow(2), window, padding=window_size // 2) - mu2_sq
        sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2) - mu1_mu2
        C1 = 0.01**2
        C2 = 0.03**2
        ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
        mssim = ssim_map.mean().item()

        return mssim, ssim_map, sigma1_sq, sigma2_sq

    ir_img_tensor = ir_img_tensor.unsqueeze(0).unsqueeze(0).double()
    vi_img_tensor = vi_img_tensor.unsqueeze(0).unsqueeze(0).double()
    f_img_tensor = f_img_tensor.unsqueeze(0).unsqueeze(0).double()

    _, ssim_map1, sigma1_sq1, sigma2_sq1 = ssim_yang(ir_img_tensor, vi_img_tensor)
    _, ssim_map2, _, _ = ssim_yang(ir_img_tensor, f_img_tensor)
    _, ssim_map3, _, _ = ssim_yang(vi_img_tensor, f_img_tensor)
    bin_map = (ssim_map1 >= 0.75).double()
    ramda = sigma1_sq1 / (sigma1_sq1 + sigma2_sq1 + 1e-10)

    Q1 = (ramda * ssim_map2 + (1 - ramda) * ssim_map3) * bin_map
    Q2 = torch.max(ssim_map2, ssim_map3) * (1 - bin_map)
    Qy = (Q1 + Q2).mean().item()

    return Qy

def gaussian2d(n1, n2, sigma, device):
    x = torch.arange(-15, 16, device=device, dtype=torch.double)
    y = torch.arange(-15, 16, device=device, dtype=torch.double)
    x, y = torch.meshgrid(x, y)
    G = torch.exp(-(x**2 + y**2) / (2 * sigma**2)) / (2 * torch.pi * sigma**2)
    return G

def contrast(G1, G2, img):
    buff = F.conv2d(img.unsqueeze(0).unsqueeze(0), G1.unsqueeze(0).unsqueeze(0), padding=G1.shape[-1] // 2)
    buff1 = F.conv2d(img.unsqueeze(0).unsqueeze(0), G2.unsqueeze(0).unsqueeze(0), padding=G2.shape[-1] // 2)
    return buff / (buff1 + 1e-10) - 1

def Qcb_function(ir_img_tensor, vi_img_tensor, f_img_tensor):
    device = ir_img_tensor.device

    ir_img_tensor = ir_img_tensor.double().to(device)
    vi_img_tensor = vi_img_tensor.double().to(device)
    f_img_tensor = f_img_tensor.double().to(device)
    ir_img_tensor = (ir_img_tensor - ir_img_tensor.min()) / (ir_img_tensor.max() - ir_img_tensor.min())
    vi_img_tensor = (vi_img_tensor - vi_img_tensor.min()) / (vi_img_tensor.max() - vi_img_tensor.min())
    f_img_tensor = (f_img_tensor - f_img_tensor.min()) / (f_img_tensor.max() - f_img_tensor.min())

    f0 = 15.3870
    f1 = 1.3456
    a = 0.7622
    k = 1
    h = 1
    p = 3
    q = 2
    Z = 0.0001
    hang, lie = ir_img_tensor.shape[-2:]

    u, v = torch.meshgrid(torch.fft.fftfreq(hang, device=device), torch.fft.fftfreq(lie, device=device), indexing='ij')
    u = u * (hang / 30)
    v = v * (lie / 30)
    r = torch.sqrt(u**2 + v**2)
    Sd = (torch.exp(-(r / f0)**2) - a * torch.exp(-(r / f1)**2)).to(device)
    fim1 = torch.fft.ifft2(torch.fft.fft2(ir_img_tensor) * Sd).real
    fim2 = torch.fft.ifft2(torch.fft.fft2(vi_img_tensor) * Sd).real
    ffim = torch.fft.ifft2(torch.fft.fft2(f_img_tensor) * Sd).real
    G1 = gaussian2d(hang, lie, 2, device).to(device)
    G2 = gaussian2d(hang, lie, 4, device).to(device)
    C1 = contrast(G1, G2, fim1)
    C2 = contrast(G1, G2, fim2)
    Cf = contrast(G1, G2, ffim)
    C1P = (k * (torch.abs(C1)**p)) / (h * (torch.abs(C1)**q) + Z)
    C2P = (k * (torch.abs(C2)**p)) / (h * (torch.abs(C2)**q) + Z)
    CfP = (k * (torch.abs(Cf)**p)) / (h * (torch.abs(Cf)**q) + Z)

    mask1 = (C1P < CfP).double()
    Q1F = (C1P / CfP) * mask1 + (CfP / C1P) * (1 - mask1)
    mask2 = (C2P < CfP).double()
    Q2F = (C2P / CfP) * mask2 + (CfP / C2P) * (1 - mask2)
    ramda1 = (C1P**2) / (C1P**2 + C2P**2 + 1e-10)
    ramda2 = (C2P**2) / (C1P**2 + C2P**2 + 1e-10)
    Q = ramda1 * Q1F + ramda2 * Q2F
    Qcb = Q.mean().item()

    return Qcb


================================================
FILE: Metric/Nabf.py
================================================
import numpy as np
from scipy.signal import convolve2d
import math
import torch
def sobel_fn(x):
    vtemp = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) / 8
    htemp = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]]) / 8

    a, b = htemp.shape
    x_ext = per_extn_im_fn(x, a)
    p, q = x_ext.shape
    gv = np.zeros((p - 2, q - 2))
    gh = np.zeros((p - 2, q - 2))
    gv = convolve2d(x_ext, vtemp, mode='valid')
    gh = convolve2d(x_ext, htemp, mode='valid')

    return gv, gh


def per_extn_im_fn(x, wsize):

    hwsize = (wsize - 1) // 2  # Half window size excluding centre pixel.

    p, q = x.shape
    xout_ext = np.zeros((p + wsize - 1, q + wsize - 1))
    xout_ext[hwsize: p + hwsize, hwsize: q + hwsize] = x
    if wsize - 1 == hwsize + 1:
        xout_ext[0: hwsize, :] = xout_ext[2, :].reshape(1, -1)
        xout_ext[p + hwsize: p + wsize - 1, :] = xout_ext[-3, :].reshape(1, -1)

    xout_ext[:, 0: hwsize] = xout_ext[:, 2].reshape(-1, 1)
    xout_ext[:, q + hwsize: q + wsize - 1] = xout_ext[:, -3].reshape(-1, 1)

    return xout_ext

def get_Nabf(I1, I2, f):
    Td=2
    wt_min=0.001
    P=1
    Lg=1.5
    Nrg=0.9999
    kg=19
    sigmag=0.5
    Nra=0.9995
    ka=22
    sigmaa=0.5

    I1 = I1.cpu().numpy() if isinstance(I1, torch.Tensor) else I1
    I2 = I2.cpu().numpy() if isinstance(I2, torch.Tensor) else I2
    f = f.cpu().numpy() if isinstance(f, torch.Tensor) else f
    xrcw = f.astype(np.float64)
    x1 = I1.astype(np.float64)
    x2 = I2.astype(np.float64)

    gvA,ghA=sobel_fn(x1)
    gA=np.sqrt(ghA**2+gvA**2)

    gvB,ghB=sobel_fn(x2)
    gB=np.sqrt(ghB**2+gvB**2)

    gvF,ghF=sobel_fn(xrcw)
    gF=np.sqrt(ghF**2+gvF**2)

    gAF=np.zeros(gA.shape)
    gBF=np.zeros(gB.shape)
    aA=np.zeros(ghA.shape)
    aB=np.zeros(ghB.shape)
    aF=np.zeros(ghF.shape)
    p,q=xrcw.shape
    maskAF1 = (gA == 0) | (gF == 0)
    maskAF2 = (gA > gF)
    gAF[~maskAF1] = np.where(maskAF2, gF / gA, gA / gF)[~maskAF1]
    maskBF1 = (gB == 0) | (gF == 0)
    maskBF2 = (gB > gF)
    gBF[~maskBF1] = np.where(maskBF2, gF / gB, gB / gF)[~maskBF1]
    aA = np.where((gvA == 0) & (ghA == 0), 0, np.arctan(gvA / ghA))
    aB = np.where((gvB == 0) & (ghB == 0), 0, np.arctan(gvB / ghB))
    aF = np.where((gvF == 0) & (ghF == 0), 0, np.arctan(gvF / ghF))

    aAF=np.abs(np.abs(aA-aF)-np.pi/2)*2/np.pi
    aBF=np.abs(np.abs(aB-aF)-np.pi/2)*2/np.pi

    QgAF = Nrg / (1 + np.exp(-kg * (gAF - sigmag)))
    QaAF = Nra / (1 + np.exp(-ka * (aAF - sigmaa)))
    QAF = np.sqrt(QgAF * QaAF)
    QgBF = Nrg / (1 + np.exp(-kg * (gBF - sigmag)))
    QaBF = Nra / (1 + np.exp(-ka * (aBF - sigmaa)))
    QBF = np.sqrt(QgBF * QaBF)

    wtA = wt_min * np.ones((p, q))
    wtB = wt_min * np.ones((p, q))
    cA = np.ones((p, q))
    cB = np.ones((p, q))
    wtA = np.where(gA >= Td, cA * gA ** Lg, 0)
    wtB = np.where(gB >= Td, cB * gB ** Lg, 0)

    wt_sum = np.sum(wtA + wtB)
    QAF_wtsum = np.sum(QAF * wtA) / wt_sum
    QBF_wtsum = np.sum(QBF * wtB) / wt_sum
    QABF = QAF_wtsum + QBF_wtsum


    Qdelta = np.abs(QAF - QBF)
    QCinfo = (QAF + QBF - Qdelta) / 2
    QdeltaAF = QAF - QCinfo
    QdeltaBF = QBF - QCinfo
    QdeltaAF_wtsum = np.sum(QdeltaAF * wtA) / wt_sum
    QdeltaBF_wtsum = np.sum(QdeltaBF * wtB) / wt_sum
    QdeltaABF = QdeltaAF_wtsum + QdeltaBF_wtsum
    QCinfo_wtsum = np.sum(QCinfo * (wtA + wtB)) / wt_sum
    QABF11 = QdeltaABF + QCinfo_wtsum

    rr = np.zeros((p, q))
    rr = np.where(gF <= np.minimum(gA, gB), 1, 0)

    LABF = np.sum(rr * ((1 - QAF) * wtA + (1 - QBF) * wtB)) / wt_sum

    na1 = np.where((gF > gA) & (gF > gB), 2 - QAF - QBF, 0)
    NABF1 = np.sum(na1 * (wtA + wtB)) / wt_sum

    na = np.where((gF > gA) & (gF > gB), 1, 0)
    NABF = np.sum(na * ((1 - QAF) * wtA + (1 - QBF) * wtB)) / wt_sum
    return NABF

================================================
FILE: Metric/Qabf.py
================================================
import numpy as np
import math
from scipy.signal import convolve2d


def sobel_fn(x):
    vtemp = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) / 8
    htemp = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]]) / 8

    a, b = htemp.shape
    x_ext = per_extn_im_fn(x, a)
    p, q = x_ext.shape
    gv = np.zeros((p - 2, q - 2))
    gh = np.zeros((p - 2, q - 2))
    gv = convolve2d(x_ext, vtemp, mode='valid')
    gh = convolve2d(x_ext, htemp, mode='valid')

    return gv, gh


def per_extn_im_fn(x, wsize):
    hwsize = (wsize - 1) // 2

    p, q = x.shape
    xout_ext = np.zeros((p + wsize - 1, q + wsize - 1))
    xout_ext[hwsize: p + hwsize, hwsize: q + hwsize] = x


    if wsize - 1 == hwsize + 1:
        xout_ext[0: hwsize, :] = xout_ext[2, :].reshape(1, -1)
        xout_ext[p + hwsize: p + wsize - 1, :] = xout_ext[-3, :].reshape(1, -1)

    xout_ext[:, 0: hwsize] = xout_ext[:, 2].reshape(-1, 1)
    xout_ext[:, q + hwsize: q + wsize - 1] = xout_ext[:, -3].reshape(-1, 1)

    return xout_ext

def get_Qabf(pA, pB, pF):
    L = 1
    Tg = 0.9994
    kg = -15
    Dg = 0.5;
    Ta = 0.9879
    ka = -22
    Da = 0.8

    h1 = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).astype(np.float32)
    h2 = np.array([[0, 1, 2], [-1, 0, 1], [-2, -1, 0]]).astype(np.float32)
    h3 = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).astype(np.float32)


    strA = pA
    strB = pB
    strF = pF

    def flip180(arr):
        return np.flip(arr)

    def convolution(k, data):
        k = flip180(k)
        data = np.pad(data, ((1, 1), (1, 1)), 'constant', constant_values=(0, 0))
        img_new = convolve2d(data, k, mode='valid')
        return img_new

    def getArray(img):
        SAx = convolution(h3, img)
        SAy = convolution(h1, img)
        gA = np.sqrt(np.multiply(SAx, SAx) + np.multiply(SAy, SAy))
        n, m = img.shape
        aA = np.zeros((n, m))
        zero_mask = SAx == 0
        aA[~zero_mask] = np.arctan(SAy[~zero_mask] / SAx[~zero_mask])
        aA[zero_mask] = np.pi / 2
        return gA, aA

    gA, aA = getArray(strA)
    gB, aB = getArray(strB)
    gF, aF = getArray(strF)

    def getQabf(aA, gA, aF, gF):
        mask = (gA > gF)
        GAF = np.where(mask, gF / gA, np.where(gA == gF, gF, gA / gF))

        AAF = 1 - np.abs(aA - aF) / (math.pi / 2)

        QgAF = Tg / (1 + np.exp(kg * (GAF - Dg)))
        QaAF = Ta / (1 + np.exp(ka * (AAF - Da)))

        QAF = QgAF * QaAF
        return QAF

    QAF = getQabf(aA, gA, aF, gF)
    QBF = getQabf(aB, gB, aF, gF)

    deno = np.sum(gA + gB)
    nume = np.sum(np.multiply(QAF, gA) + np.multiply(QBF, gB))
    output = nume / deno
    return output

================================================
FILE: Metric/eval_torch.py
================================================
import numpy as np
from PIL import Image
from Metric_torch import *
from natsort import natsorted
from tqdm import tqdm
import os
import torch
import warnings
from openpyxl import Workbook, load_workbook
from openpyxl.utils import get_column_letter

warnings.filterwarnings("ignore")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def write_excel(excel_name='metric.xlsx', worksheet_name='VIF', column_index=0, data=None):
    try:
        workbook = load_workbook(excel_name)
    except FileNotFoundError:
        workbook = Workbook()

    worksheet = workbook.create_sheet(title=worksheet_name) if worksheet_name not in workbook.sheetnames else workbook[
        worksheet_name]

    column = get_column_letter(column_index + 1)
    for i, value in enumerate(data):
        cell = worksheet[column + str(i + 1)]
        cell.value = value

    workbook.save(excel_name)


def evaluation_one(ir_name, vi_name, f_name):
    f_img = Image.open(f_name).convert('L')
    ir_img = Image.open(ir_name).convert('L')
    vi_img = Image.open(vi_name).convert('L')

    f_img_tensor = torch.tensor(np.array(f_img)).float().to(device)
    ir_img_tensor = torch.tensor(np.array(ir_img)).float().to(device)
    vi_img_tensor = torch.tensor(np.array(vi_img)).float().to(device)

    f_img_int = np.array(f_img).astype(np.int32)
    f_img_double = np.array(f_img).astype(np.float32)

    ir_img_int = np.array(ir_img).astype(np.int32)
    ir_img_double = np.array(ir_img).astype(np.float32)

    vi_img_int = np.array(vi_img).astype(np.int32)
    vi_img_double = np.array(vi_img).astype(np.float32)


    CE = CE_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    NMI = NMI_function(ir_img_int, vi_img_int, f_img_int, gray_level=256)
    QNCIE = QNCIE_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    TE = TE_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    EI = EI_function(f_img_tensor)
    Qy = Qy_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    Qcb = Qcb_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    EN = EN_function(f_img_tensor)
    MI = MI_function(ir_img_int, vi_img_int, f_img_int, gray_level=256)
    SF = SF_function(f_img_tensor)
    SD = SD_function(f_img_tensor)
    AG = AG_function(f_img_tensor)
    PSNR = PSNR_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    MSE = MSE_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    VIF = VIF_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    CC = CC_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    SCD = SCD_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    Qabf = Qabf_function(ir_img_double, vi_img_double, f_img_double)
    Nabf = Nabf_function(ir_img_tensor, vi_img_tensor, f_img_tensor)
    SSIM = SSIM_function(ir_img_double, vi_img_double, f_img_double)
    MS_SSIM = MS_SSIM_function(ir_img_double, vi_img_double, f_img_double)

    return CE, NMI, QNCIE, TE, EI, Qy, Qcb, EN, MI, SF, AG, SD, CC, SCD, VIF, MSE, PSNR, Qabf, Nabf, SSIM, MS_SSIM


if __name__ == '__main__':
    if __name__ == '__main__':
        with_mean = True
        config = {
            'dataroot': '/mnt/disk1/IVIF/',  # Change to your local infrared and visible images path
            'results_root': '/mnt/disk1/IVIF/',  # Change to your local fusion images path
            'dataset': 'M3FD_4200',  # Specify the dataset name
            'save_dir': '/mnt/disk4/test'  # Directory for saving metrics
        }

        ir_dir = os.path.join(config['dataroot'], config['dataset'], 'Ir')  # Infrared images directory
        vi_dir = os.path.join(config['dataroot'], config['dataset'], 'Vis')  # Visible images directory
        f_dir = os.path.join(config['results_root'], config['dataset'])  # Fusion images directory
        os.makedirs(config['save_dir'], exist_ok=True)
        filelist = natsorted(os.listdir(ir_dir))[:300]
        metric_save_name = os.path.join(config['save_dir'], f'metric_{config["dataset"]}.xlsx')  # Metrics file name

        # Change to the directory name of the fusion images you want to evaluate
        Method_list = [
            'BDLFusion', 'CAF', 'CDDFuse', 'CoCoNet', 'DATFuse', 'DDcGAN', 'DDFM',
            'DeFusion', 'Densefuse', 'DIDFuse', 'EMMA', 'FusinDN', 'GANMcC',
            'IF-FILM', 'IGNet', 'IRFS', 'LRRNet', 'MetaFusion', 'MFEIF', 'MRFS',
            'PAIF', 'PMGI', 'PSFusion', 'ReCoNet', 'RFN-Nest', 'SDCFusion',
            'SDNet', 'SeAFusion', 'SegMif', 'SHIP', 'SuperFusion', 'SwinFusion',
            'TarDAL', 'Text-IF', 'TGFuse', 'TIMFusion', 'U2Fusion', 'UMFusion',
            'YDTR', 'FusionGAN', 'DetFusion', 'MoE-Fusion', 'PromptF'
        ]

        # Starting index for the method 'BDLFusion'
        start_index = Method_list.index('BDLFusion')

    for i, Method in enumerate(Method_list[start_index:], start=start_index):
        CE_list = []
        NMI_list = []
        QNCIE_list = []
        TE_list = []
        EI_list = []
        Qy_list = []
        Qcb_list = []
        EN_list = []
        MI_list = []
        SF_list = []
        AG_list = []
        SD_list = []
        CC_list = []
        SCD_list = []
        VIF_list = []
        MSE_list = []
        PSNR_list = []
        Qabf_list = []
        Nabf_list = []
        SSIM_list = []
        MS_SSIM_list = []
        filename_list = ['']
        sub_f_dir = os.path.join(f_dir, Method)
        eval_bar = tqdm(filelist)
        for _, item in enumerate(eval_bar):
            ir_name = os.path.join(ir_dir, item)
            vi_name = os.path.join(vi_dir, item)
            f_name = os.path.join(sub_f_dir, item)

            if os.path.exists(f_name):
                print(ir_name, vi_name, f_name)
                CE, NMI, QNCIE, TE, EI, Qy, Qcb, EN, MI, SF, AG, SD, CC, SCD, VIF, MSE, PSNR, Qabf, Nabf, SSIM, MS_SSIM = evaluation_one(ir_name, vi_name, f_name)
                CE_list.append(CE)
                NMI_list.append(NMI)
                QNCIE_list.append(QNCIE)
                TE_list.append(TE)
                EI_list.append(EI)
                Qy_list.append(Qy)
                Qcb_list.append(Qcb)
                EN_list.append(EN)
                MI_list.append(MI)
                SF_list.append(SF)
                AG_list.append(AG)
                SD_list.append(SD)
                CC_list.append(CC)
                SCD_list.append(SCD)
                VIF_list.append(VIF)
                MSE_list.append(MSE)
                PSNR_list.append(PSNR)
                Qabf_list.append(Qabf)
                Nabf_list.append(Nabf)
                SSIM_list.append(SSIM)
                MS_SSIM_list.append(MS_SSIM)
                filename_list.append(item)
                eval_bar.set_description("{} | {}".format(Method, item))

        if with_mean:
            CE_tensor = torch.tensor(CE_list).mean().item()
            CE_list.append(CE_tensor)
            NMI_tensor = torch.tensor(NMI_list).mean().item()
            NMI_list.append(NMI_tensor)
            QNCIE_tensor = torch.tensor(QNCIE_list).mean().item()
            QNCIE_list.append(QNCIE_tensor)
            TE_tensor = torch.tensor(TE_list).mean().item()
            TE_list.append(TE_tensor)
            EI_tensor = torch.tensor(EI_list).mean().item()
            EI_list.append(EI_tensor)
            Qy_tensor = torch.tensor(Qy_list).mean().item()
            Qy_list.append(Qy_tensor)
            Qcb_tensor = torch.tensor(Qcb_list).mean().item()
            Qcb_list.append(Qcb_tensor)
            EN_tensor = torch.tensor(EN_list).mean().item()
            EN_list.append(EN_tensor)
            MI_tensor = torch.tensor(MI_list).mean().item()
            MI_list.append(MI_tensor)
            SF_tensor = torch.tensor(SF_list).mean().item()
            SF_list.append(SF_tensor)
            AG_tensor = torch.tensor(AG_list).mean().item()
            AG_list.append(AG_tensor)
            SD_tensor = torch.tensor(SD_list).mean().item()
            SD_list.append(SD_tensor)
            CC_tensor = torch.tensor(CC_list).mean().item()
            CC_list.append(CC_tensor)
            SCD_tensor = torch.tensor(SCD_list).mean().item()
            SCD_list.append(SCD_tensor)
            VIF_tensor = torch.tensor(VIF_list).mean().item()
            VIF_list.append(VIF_tensor)
            MSE_tensor = torch.tensor(MSE_list).mean().item()
            MSE_list.append(MSE_tensor)
            PSNR_tensor = torch.tensor(PSNR_list).mean().item()
            PSNR_list.append(PSNR_tensor)
            Qabf_list.append(np.mean(Qabf_list))
            Nabf_tensor = torch.tensor(Nabf_list).mean().item()
            Nabf_list.append(Nabf_tensor)
            SSIM_tensor = torch.tensor(SSIM_list).mean().item()
            SSIM_list.append(SSIM_tensor)
            MS_SSIM_tensor = torch.tensor(MS_SSIM_list).mean().item()
            MS_SSIM_list.append(MS_SSIM_tensor)
            filename_list.append('mean')


        CE_list.insert(0, '{}'.format(Method))
        NMI_list.insert(0, '{}'.format(Method))
        QNCIE_list.insert(0, '{}'.format(Method))
        TE_list.insert(0, '{}'.format(Method))
        EI_list.insert(0, '{}'.format(Method))
        Qy_list.insert(0, '{}'.format(Method))
        Qcb_list.insert(0, '{}'.format(Method))
        EN_list.insert(0, '{}'.format(Method))
        MI_list.insert(0, '{}'.format(Method))
        SF_list.insert(0, '{}'.format(Method))
        AG_list.insert(0, '{}'.format(Method))
        SD_list.insert(0, '{}'.format(Method))
        CC_list.insert(0, '{}'.format(Method))
        SCD_list.insert(0, '{}'.format(Method))
        VIF_list.insert(0, '{}'.format(Method))
        MSE_list.insert(0, '{}'.format(Method))
        PSNR_list.insert(0, '{}'.format(Method))
        Qabf_list.insert(0, '{}'.format(Method))
        Nabf_list.insert(0, '{}'.format(Method))
        SSIM_list.insert(0, '{}'.format(Method))
        MS_SSIM_list.insert(0, '{}'.format(Method))

        if i == start_index:
            write_excel(metric_save_name, 'CE', 0, filename_list)
            write_excel(metric_save_name, 'NMI', 0, filename_list)
            write_excel(metric_save_name, 'QNCIE', 0, filename_list)
            write_excel(metric_save_name, 'TE', 0, filename_list)
            write_excel(metric_save_name, 'EI', 0, filename_list)
            write_excel(metric_save_name, 'Qy', 0, filename_list)
            write_excel(metric_save_name, 'Qcb', 0, filename_list)
            write_excel(metric_save_name, 'EN', 0, filename_list)
            write_excel(metric_save_name, "MI", 0, filename_list)
            write_excel(metric_save_name, "SF", 0, filename_list)
            write_excel(metric_save_name, "AG", 0, filename_list)
            write_excel(metric_save_name, "SD", 0, filename_list)
            write_excel(metric_save_name, "CC", 0, filename_list)
            write_excel(metric_save_name, "SCD", 0, filename_list)
            write_excel(metric_save_name, "VIF", 0, filename_list)
            write_excel(metric_save_name, "MSE", 0, filename_list)
            write_excel(metric_save_name, "PSNR", 0, filename_list)
            write_excel(metric_save_name, "Qabf", 0, filename_list)
            write_excel(metric_save_name, "Nabf", 0, filename_list)
            write_excel(metric_save_name, "SSIM", 0, filename_list)
            write_excel(metric_save_name, "MS_SSIM", 0, filename_list)

        write_excel(metric_save_name, 'CE', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in CE_list])
        write_excel(metric_save_name, 'NMI', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in NMI_list])
        write_excel(metric_save_name, 'QNCIE', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in QNCIE_list])
        write_excel(metric_save_name, 'TE', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in TE_list])
        write_excel(metric_save_name, 'EI', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in EI_list])
        write_excel(metric_save_name, 'Qy', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in Qy_list])
        write_excel(metric_save_name, 'Qcb', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in Qcb_list])
        write_excel(metric_save_name, 'EN', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in EN_list])
        write_excel(metric_save_name, 'MI', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in MI_list])
        write_excel(metric_save_name, 'SF', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in SF_list])
        write_excel(metric_save_name, 'AG', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in AG_list])
        write_excel(metric_save_name, 'SD', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in SD_list])
        write_excel(metric_save_name, 'CC', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in CC_list])
        write_excel(metric_save_name, 'SCD', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in SCD_list])
        write_excel(metric_save_name, 'VIF', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in VIF_list])
        write_excel(metric_save_name, 'MSE', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in MSE_list])
        write_excel(metric_save_name, 'PSNR', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in PSNR_list])
        write_excel(metric_save_name, 'Qabf', i + 1, Qabf_list)
        write_excel(metric_save_name, 'Nabf', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in Nabf_list])
        write_excel(metric_save_name, 'SSIM', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in SSIM_list])
        write_excel(metric_save_name, 'MS_SSIM', i + 1,
                    [x.item() if isinstance(x, torch.Tensor) else float(x) if isinstance(x, (int, float)) else x for x
                     in MS_SSIM_list])


================================================
FILE: Metric/ssim.py
================================================
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
import numpy as np

def _fspecial_gauss_1d(size, sigma):
    coords = torch.arange(size, dtype=torch.float32)
    coords -= size // 2

    g = torch.exp(-(coords ** 2) / (2 * sigma ** 2))
    g /= g.sum()

    return g.unsqueeze(0).unsqueeze(0)


def gaussian_filter(input, win):
    assert all([ws == 1 for ws in win.shape[1:-1]]), win.shape
    if len(input.shape) == 4:
        conv = F.conv2d
    elif len(input.shape) == 5:
        conv = F.conv3d
    else:
        raise NotImplementedError(input.shape)

    C = input.shape[1]
    out = input
    for i, s in enumerate(input.shape[2:]):
        if s >= win.shape[-1]:
            perms = list(range(win.ndim))
            perms[2 + i] = perms[-1]
            perms[-1] = 2 + i
            out = conv(out, weight=win.permute(perms), stride=1, padding=0, groups=C)
        else:
            warnings.warn(
                f"Skipping Gaussian Smoothing at dimension 2+{i} for input: {input.shape} and win size: {win.shape[-1]}"
            )

    return out


def _ssim(X, Y, data_range, win, K=(0.01, 0.03)):
    K1, K2 = K
    compensation = 1.0

    C1 = (K1 * data_range) ** 2
    C2 = (K2 * data_range) ** 2

    win = win.type_as(X)

    mu1 = gaussian_filter(X, win)
    mu2 = gaussian_filter(Y, win)

    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1 * mu2

    sigma1_sq = compensation * (gaussian_filter(X * X, win) - mu1_sq)
    sigma2_sq = compensation * (gaussian_filter(Y * Y, win) - mu2_sq)
    sigma12 = compensation * (gaussian_filter(X * Y, win) - mu1_mu2)

    cs_map = (2 * sigma12 + C2) / (sigma1_sq + sigma2_sq + C2)
    ssim_map = ((2 * mu1_mu2 + C1) / (mu1_sq + mu2_sq + C1)) * cs_map

    ssim_per_channel = torch.flatten(ssim_map, 2).mean(-1)
    cs = torch.flatten(cs_map, 2).mean(-1)
    return ssim_per_channel, cs

def ssim(X,
         Y,
         data_range=255,
         size_average=True,
         win_size=11,
         win_sigma=1.5,
         win=None,
         K=(0.01, 0.03),
         nonnegative_ssim=False):
    X = TF.to_tensor(X).unsqueeze(0).unsqueeze(0)
    Y = TF.to_tensor(Y).unsqueeze(0).unsqueeze(0)
    if not X.shape == Y.shape:
        raise ValueError("Input images should have the same dimensions.")

    for d in range(len(X.shape) - 1, 1, -1):
        X = torch.squeeze(X, dim=d)
        Y = torch.squeeze(Y, dim=d)

    if len(X.shape) not in (4, 5):
        raise ValueError(f"Input images should be 4-d or 5-d tensors, but got {X.shape}")

    if not X.dtype == Y.dtype:
        raise ValueError("Input images should have the same dtype.")

    if win is not None:  # set win_size
        win_size = win.shape[-1]

    if not (win_size % 2 == 1):
        raise ValueError("Window size should be odd.")

    if win is None:
        win = _fspecial_gauss_1d(win_size, win_sigma)
        win = win.repeat([X.shape[1]] + [1] * (len(X.shape) - 1))

    ssim_per_channel, _ = _ssim(X, Y, data_range=data_range, win=win, K=K)
    if nonnegative_ssim:
        ssim_per_channel = F.relu(ssim_per_channel)

    if size_average:
        return ssim_per_channel.mean()
    else:
        return ssim_per_channel.mean(dim=1)


def ms_ssim(
        X,
        Y,
        data_range=255,
        size_average=True,
        win_size=11,
        win_sigma=1.5,
        win=None,
        weights=None,
        K=(0.01, 0.03)
    ):
    X = TF.to_tensor(X).unsqueeze(0).unsqueeze(0)
    Y = TF.to_tensor(Y).unsqueeze(0).unsqueeze(0)
    if not X.shape == Y.shape:
        raise ValueError("Input images should have the same dimensions.")

    for d in range(len(X.shape) - 1, 1, -1):
        X = X.squeeze(dim=d)
        Y = Y.squeeze(dim=d)

    if not X.dtype == Y.dtype:
        raise ValueError("Input images should have the same dtype.")

    if len(X.shape) == 4:
        avg_pool = F.avg_pool2d
    elif len(X.shape) == 5:
        avg_pool = F.avg_pool3d
    else:
        raise ValueError(f"Input images should be 4-d or 5-d tensors, but got {X.shape}")

    if win is not None:
        win_size = win.shape[-1]

    if not (win_size % 2 == 1):
        raise ValueError("Window size should be odd.")

    smaller_side = min(X.shape[-2:])
    assert smaller_side > (win_size - 1) * (
        2 ** 4
    ), "Image size should be larger than %d due to the 4 downsamplings in ms-ssim" % ((win_size - 1) * (2 ** 4))

    if weights is None:
        weights = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]
    weights = torch.tensor(weights, dtype=X.dtype)

    if win is None:
        win = _fspecial_gauss_1d(win_size, win_sigma)
        win = win.repeat([X.shape[1]] + [1] * (len(X.shape) - 1))

    levels = weights.shape[0]
    mcs = []
    for i in range(levels):
        ssim_per_channel, cs = _ssim(X, Y, win=win, data_range=data_range, K=K)

        if i < levels - 1:
            mcs.append(F.relu(cs))
            padding = [s % 2 for s in X.shape[2:]]
            X = avg_pool(X, kernel_size=2, padding=padding)
            Y = avg_pool(Y, kernel_size=2, padding=padding)

    ssim_per_channel = F.relu(ssim_per_channel)
    mcs_and_ssim = torch.stack(mcs + [ssim_per_channel], dim=0)
    ms_ssim_val = torch.prod(mcs_and_ssim ** weights.reshape((-1, 1, 1)), dim=0)

    if size_average:
        return ms_ssim_val.mean()
    else:
        return ms_ssim_val.mean(dim=1)

class SSIM(nn.Module):
    def __init__(
        self,
        data_range=255,
        size_average=True,
        win_size=11,
        win_sigma=1.5,
        channel=3,
        spatial_dims=2,
        K=(0.01, 0.03),
        nonnegative_ssim=False,
    ):
        super(SSIM, self).__init__()
        self.win_size = win_size
        self.win = _fspecial_gauss_1d(win_size, win_sigma).tile([channel, 1] + [1] * spatial_dims)
        self.size_average = size_average
        self.data_range = data_range
        self.K = K
        self.nonnegative_ssim = nonnegative_ssim

    def forward(self, X, Y):
        return ssim(
            X,
            Y,
            data_range=self.data_range,
            size_average=self.size_average,
            win=self.win,
            K=self.K,
            nonnegative_ssim=self.nonnegative_ssim,
        ).item()


class MS_SSIM(nn.Module):
    def __init__(
        self,
        data_range=255,
        size_average=True,
        win_size=11,
        win_sigma=1.5,
        channel=3,
        spatial_dims=2,
        weights=None,
        K=(0.01, 0.03),
    ):
        super(MS_SSIM, self).__init__()
        self.win_size = win_size
        self.win = _fspecial_gauss_1d(win_size, win_sigma).tile([channel, 1] + [1] * spatial_dims)
        self.size_average = size_average
        self.data_range = data_range
        self.weights = weights
        self.K = K

    def forward(self, X, Y):
        return ms_ssim(
            X,
            Y,
            data_range=self.data_range,
            size_average=self.size_average,
            win=self.win,
            weights=self.weights,
            K=self.K,

        ).item()


================================================
FILE: README.md
================================================

## Latest News 🔥🔥
[2024-12-12] Our survey paper [__Infrared and Visible Image Fusion: From Data Compatibility to Task Adaption.__] has been accepted by IEEE Transactions on Pattern Analysis and Machine Intelligence!
([Paper](https://ieeexplore.ieee.org/abstract/document/10812907))([中文版](https://pan.baidu.com/s/1EIRYSULa-pd2FRmIdG693g?pwd=aiey))

[2026-04-15] We have updated the repository with state-of-the-art methods for both Image Fusion and Video Fusion.

# IVIF Zoo
Welcome to IVIF Zoo, a comprehensive repository dedicated to Infrared and Visible Image Fusion (IVIF). Based on our survey paper [__Infrared and Visible Image Fusion: From Data Compatibility to Task Adaption.__ *Jinyuan Liu, Guanyao Wu, Zhu Liu, Di Wang, Zhiying Jiang, Long Ma, Wei Zhong, Xin Fan, Risheng Liu**], this repository aims to serve as a central hub for researchers, engineers, and enthusiasts in the field of IVIF. Here, you'll find a wide array of resources, tools, and datasets, curated to accelerate advancements and foster collaboration in infrared-visible image fusion technologies.

***

![preview](assets/light2.png)
<sub>A detailed spectrogram depicting almost all wavelength and frequency ranges, particularly expanding the range of the human visual system and annotating corresponding computer vision and image fusion datasets.</sub>

![preview](assets/pipeline1.png)
The diagram of infrared and visible image fusion for practical applications. Existing image fusion methods majorly focus on the design of architectures and training strategies for visual enhancement, few considering the adaptation for downstream visual perception tasks. Additionally, from the data compatibility perspective, pixel misalignment and adversarial attacks of image fusion are two major challenges. Additionally, integrating comprehensive semantic information for tasks like semantic segmentation, object detection, and salient object detection remains underexplored, posing a critical obstacle in image fusion.

![preview](assets/sankey1.png)
 A classification sankey diagram containing typical fusion methods.

***

## 导航(Navigation)

- [数据集 (Datasets)](#数据集datasets)
- [方法集 (Method Set)](#方法集method-set)
  - [纯融合方法 (Fusion for Visual Enhancement)](#纯融合方法fusion-for-visual-enhancement)
  - [数据兼容方法 (Data Compatible)](#数据兼容方法data-compatible)
  - [面向应用方法 (Application-oriented)](#面向应用方法application-oriented)
- [评价指标 (Evaluation Metric)](#评价指标evaluation-metric)
###  [🔥🚀资源库 (Resource Library)](#资源库resource-library)  
`It covers all results of our survey paper, available for download from Baidu Cloud.`
  - 💥[融合 (Fusion)](#融合fusion) 
  - ✂️[分割 (Segmentation)](#分割segmentation) `Based on SegFormer`
  - 🔍[检测 (Detection)](#检测detection) `Based on YOLO-v5`
  - [计算效率 (Computational Efficiency)](#计算效率computational-efficiency)
# 数据集(Datasets)
## 图像数据集（Image Datasets）
<table>
    <thead>
        <tr>
            <th>Dataset</th>
            <th>Img pairs</th>
            <th>Resolution</th>
            <th>Color</th>
            <th>Obj/Cats</th>
            <th>Cha-Sc</th>
            <th>Anno</th>
            <th>DownLoad</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>TNO</td>
            <td>261</td>
            <td>768×576</td>
            <td>❌</td>
            <td>few</td>
            <td>✔</td>
            <td>❌</td>
            <td><a href="https://figshare.com/articles/dataset/TNO_Image_Fusion_Dataset/1008029">Link</a></td>
        </tr>
        <tr>
            <td>RoadScene 🔥</td>
            <td>221</td>
            <td>Various</td>
            <td>✔</td>
            <td>medium</td>
            <td>❌</td>
            <td>❌</td>
            <td><a href="https://github.com/hanna-xu/RoadScene">Link</a></td>
        </tr>
        <tr>
            <td>VIFB</td>
            <td>21</td>
            <td>Various</td>
            <td>Various</td>
            <td>few</td>
            <td>❌</td>
            <td>❌</td>
            <td><a href="https://github.com/xingchenzhang/Visible-infrared-image-fusion-benchmark">Link</a></td>
        </tr>
        <tr>
            <td>MS</td>
            <td>2999</td>
            <td>768×576</td>
            <td>✔</td>
            <td>14146 / 6</td>
            <td>❌</td>
            <td>✔</td>
            <td><a href="https://www.mi.t.u-tokyo.ac.jp/projects/mil_multispectral/index.html">Link</a></td>
        </tr>
        <tr>
            <td>LLVIP</td>
            <td>16836</td>
            <td>1280×720</td>
            <td>✔</td>
            <td>pedestrian / 1</td>
            <td>❌</td>
            <td>✔</td>
            <td><a href="https://bupt-ai-cz.github.io/LLVIP/">Link</a></td>
        </tr>
        <tr>
            <td>M<sup>3</sup>FD 🔥</td>
            <td>4200</td>
            <td>1024×768</td>
            <td>✔</td>
            <td>33603 / 6</td>
            <td>✔</td>
            <td>✔</td>
            <td><a href="https://github.com/JinyuanLiu-CV/TarDAL">Link</a></td>
        </tr>
        <tr>
            <td>MFNet</td>
            <td>1569</td>
            <td>640×480</td>
            <td>✔</td>
            <td>abundant / 8</td>
            <td>❌</td>
            <td>✔</td>
            <td><a href="https://www.mi.t.u-tokyo.ac.jp/static/projects/mil_multispectral/">Link</a></td>
        </tr>
        <tr>
            <td>FMB 🔥</td>
            <td>1500</td>
            <td>800×600</td>
            <td>✔</td>
            <td>abundant / 14</td>
            <td>❌</td>
            <td>✔</td>
            <td><a href="https://github.com/JinyuanLiu-CV/SegMiF">Link</a></td>
        </tr>
    </tbody>
</table>

## 视频数据集（Video Datasets）

<table>
    <thead>
        <tr>
            <th>Dataset</th>
            <th>Video Count</th>
            <th>Total Frames</th>
            <th>Resolution</th>
            <th>DownLoad</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>VF-Bench</td>
            <td>797</td>
            <td>Over 200,000</td>
            <td>2K/540p/480p</td>
            <td><a href="https://share.phys.ethz.ch/~pf/zixiangdata/vfbench/">Link</a></td>
        </tr>
    </tbody>
    <tbody>
        <tr>
            <td>HDO</td>
            <td>24</td>
            <td>7,500</td>
            <td>640×480</td>
            <td><a href="https://github.com/xiehousheng/HDO">Link</a></td>
        </tr>
    </tbody>
    <tbody>
        <tr>
            <td>M3SVD</td>
            <td>220</td>
            <td>153,797</td>
            <td>640×480</td>
            <td><a href="https://github.com/Linfeng-Tang/M3SVD">Link</a></td>
        </tr>
    </tbody>
</table>


If the M<sup>3</sup>FD and FMB datasets are helpful to you, please cite the following paper:

```
@inproceedings{liu2022target,
  title={Target-aware dual adversarial learning and a multi-scenario multi-modality benchmark to fuse infrared and visible for object detection},
  author={Liu, Jinyuan and Fan, Xin and Huang, Zhanbo and Wu, Guanyao and Liu, Risheng and Zhong, Wei and Luo, Zhongxuan},
  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages={5802--5811},
  year={2022}
}
```

```
@inproceedings{liu2023multi,
  title={Multi-interactive feature learning and a full-time multi-modality benchmark for image fusion and segmentation},
  author={Liu, Jinyuan and Liu, Zhu and Wu, Guanyao and Ma, Long and Liu, Risheng and Zhong, Wei and Luo, Zhongxuan and Fan, Xin},
  booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
  pages={8115--8124},
  year={2023}
}
```

# 方法集(Method Set)
## 纯融合方法(Fusion for Visual Enhancement)
<table>
    <thead>
        <tr>
            <th>Aspects<br>(分类)</th>
            <th>Methods<br>(方法)</th>
            <th>Title<br>(标题)</th>
            <th>Venue<br>(发表场所)</th>
            <th>Source<br>(资源)</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Auto-Encoder</td>
            <td>DenseFuse</td>
            <td>Densefuse: A fusion approach to infrared and visible images</td>
            <td>TIP '18</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/8580578/">Paper</a>/<a href="https://github.com/hli1221/imagefusion_densefuse">Code</a></td>
        </tr>
        <tr>
            <td>Auto-Encoder</td>
            <td>SEDRFuse</td>
            <td>Sedrfuse: A symmetric encoder–decoder with residual block network for infrared and visible image fusion</td>
            <td>TIM '20</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9187663/">Paper</a>/<a href="https://github.com/jianlihua123/SEDRFuse">Code</a></td>
        </tr>
        <tr>
            <td>Auto-Encoder</td>
            <td>DIDFuse</td>
            <td>Didfuse: Deep image decomposition for infrared and visible image fusion</td>
            <td>IJCAI '20</td>
            <td><a href="https://arxiv.org/abs/2003.09210">Paper</a>/<a href="https://github.com/Zhaozixiang1228/IVIF-DIDFuse">Code</a></td>
        </tr>
        <tr>
            <td>Auto-Encoder</td>
            <td>MFEIF</td>
            <td>Learning a deep multi-scale feature ensemble and an edge-attention guidance for image fusion</td>
            <td>TCSVT '21</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9349250/">Paper</a>/<a href="https://github.com/JinyuanLiu-CV/MFEIF">Code</a></td>
        </tr>
        <tr>
            <td>Auto-Encoder</td>
            <td>RFN-Nest</td>
            <td>Rfn-nest: An end-to-end residual fusion network for infrared and visible images</td>
            <td>TIM '21</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253521000440">Paper</a>/<a href="https://github.com/hli1221/imagefusion-rfn-nest">Code</a></td>
        </tr>
        <tr>
            <td>Auto-Encoder</td>
            <td>SFAFuse</td>
            <td>Self-supervised feature adaption for infrared and visible image fusion</td>
            <td>InfFus '21</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253521001287">Paper</a>/<a href="https://github.com/zhoafan/SFA-Fuse">Code</a></td>
        </tr>
        <tr>
            <td>Auto-Encoder</td>
            <td>SMoA</td>
            <td>Smoa: Searching a modality-oriented architecture for infrared and visible image fusion</td>
            <td>SPL '21</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9528046/">Paper</a>/<a href="https://github.com/JinyuanLiu-CV/SMoA">Code</a></td>
        </tr>
        <tr>
            <td>Auto-Encoder</td>
            <td>Re2Fusion</td>
            <td>Res2fusion: Infrared and visible image fusion based on dense res2net and double nonlocal attention models</td>
            <td>TIM '22</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9670874/">Paper</a>/<a href="https://github.com/Zhishe-Wang/Res2Fusion">Code</a></td>
        </tr>
             <tr>
        <td>Auto-Encoder</td>
        <td>RPFNet</td>
        <td>Residual Prior-driven Frequency-aware Network for Image Fusion</td>
        <td>ACM MM '25</td>
        <td><a href="https://arxiv.org/abs/2507.06735">Paper</a>/<a href="https://github.com/wang-x-1997/RPFNet">Code</a></td>
    </tr>
    <tr>
        <td>Auto-Encoder</td>
        <td>TTD</td>
        <td>Test-Time Dynamic Image Fusion</td>
        <td>NeurIPS '24</td>
        <td><a href="https://nips.cc/virtual/2024/poster/95415">Paper</a>/<a href="https://github.com/Yinan-Xia/TTD">Code</a></td>
    </tr>
        <tr>
            <td>GAN</td>
            <td>FusionGAN</td>
            <td>Fusiongan: A generative adversarial network for infrared and visible image fusion</td>
            <td>InfFus '19</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253518301143">Paper</a>/<a href="https://github.com/jiayi-ma/FusionGAN">Code</a></td>
        </tr>
        <tr>
            <td>GAN</td>
            <td>DDcGAN</td>
            <td>Learning a generative model for fusing infrared and visible images via conditional generative adversarial network with dual discriminators</td>
            <td>TIP '19</td>
            <td><a href="https://www.ijcai.org/proceedings/2019/0549.pdf">Paper</a>/<a href="https://github.com/hanna-xu/DDcGAN">Code</a></td>
        </tr>
        <tr>
            <td>GAN</td>
            <td>AtFGAN</td>
            <td>Attentionfgan: Infrared and visible image fusion using attention-based generative adversarial networks</td>
            <td>TMM '20</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9103116">Paper</a></td>
        </tr>
        <tr>
            <td>GAN</td>
            <td>DPAL</td>
            <td>Infrared and visible image fusion via detail preserving adversarial learning</td>
            <td>InfFus '20</td>
            <td><a href="https://www.sciencedirect.com/science/article/abs/pii/S1566253519300314">Paper</a>/<a href="https://github.com/StaRainJ/ResNetFusion">Code</a></td>
        </tr>
        <tr>
            <td>GAN</td>
            <td>D2WGAN</td>
            <td>Infrared and visible image fusion using dual discriminators generative adversarial networks with wasserstein distance</td>
            <td>InfSci '20</td>
            <td><a href="https://www.sciencedirect.com/science/article/abs/pii/S0020025520303431">Paper</a></td>
        </tr>
        <tr>
            <td>GAN</td>
            <td>GANMcC</td>
            <td>Ganmcc: A generative adversarial network with multiclassification constraints for infrared and visible image fusion</td>
            <td>TIM '20</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9274337/">Paper</a>/<a href="https://github.com/HaoZhang1018/GANMcC">Code</a></td>
        </tr>
        <tr>
            <td>GAN</td>
            <td>ICAFusion</td>
            <td>Infrared and visible image fusion via interactive compensatory attention adversarial learning</td>
            <td>TMM '22</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9982426/">Paper</a>/<a href="https://github.com/Zhishe-Wang/ICAFusion">Code</a></td>
        </tr>
        <tr>
            <td>GAN</td>
            <td>TCGAN</td>
            <td>Transformer based conditional gan for multimodal image fusion</td>
            <td>TMM '23</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10041783/">Paper</a>/<a href="https://github.com/jinxiqinghuan/TCGAN">Code</a></td>
        </tr>
        <tr>
        <tr>
            <td>GAN</td>
            <td>DCFusion</td>
            <td>DCFusion: A Dual-Frequency Cross-Enhanced Fusion Network for Infrared and Visible Image Fusion</td>
            <td>TIM '23</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10102546">Paper</a></td>
        </tr>
        <tr>
            <td>GAN</td>
            <td>FreqGAN</td>
            <td>Freqgan: Infrared and visible image fusion via unified frequency adversarial learning</td>
            <td>TCSVT '24</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10680110/">Paper</a>/<a href="https://github.com/Zhishe-Wang/FreqGAN">Code</a></td>
        </tr>
     <tr>
        <td>GAN</td>
        <td>DDBF</td>
        <td>Dispel Darkness for Better Fusion: A Controllable Visual Enhancer based on Cross-modal Conditional Adversarial Learning</td>
        <td>CVPR '24</td>
        <td><a href="https://openaccess.thecvf.com/content/CVPR2024/html/Zhang_Dispel_Darkness_for_Better_Fusion_A_Controllable_Visual_Enhancer_based_CVPR_2024_paper.html">Paper</a>/<a href="https://github.com/HaoZhang1018/DDBF">Code</a></td>
    </tr>
    <tr>
        <td>GAN</td>
        <td>CCF</td>
        <td>Conditional Controllable Image Fusion</td>
        <td>NeurIPS '24</td>
        <td><a href="https://proceedings.neurips.cc/paper_files/paper/2024/file/d99e8e80a6c41e148db686918dd7eab3-Paper-Conference.pdf">Paper</a>/<a href="https://github.com/jehovahxu/CCF">Code</a></td>
    </tr>
        <tr>
            <td>CNN</td>
            <td>BIMDL</td>
            <td>A bilevel integrated model with data-driven layer ensemble for multi-modality image fusion</td>
            <td>TIP '20</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9293146">Paper</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>MgAN-Fuse</td>
            <td>Multigrained attention network for infrared and visible image fusion</td>
            <td>TIM '20</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9216075">Paper</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>AUIF</td>
            <td>Efficient and model-based infrared and visible image fusion via algorithm unrolling</td>
            <td>TCSVT '21</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9416456">Paper</a>/<a href="https://github.com/Zhaozixiang1228/IVIF-AUIF-Net">Code</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>RXDNFuse</td>
            <td>Rxdnfuse: A aggregated residual dense network for infrared and visible image fusion</td>
            <td>InfFus '21</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253520304152">Paper</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>STDFusionNet</td>
            <td>Stdfusionnet: An infrared and visible image fusion network based on salient target detection</td>
            <td>TIM '21</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9416507">Paper</a>/<a href="https://github.com/jiayi-ma/STDFusionNet">Code</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>CUFD</td>
            <td>Cufd: An encoder–decoder network for visible and infrared image fusion based on common and unique feature decomposition</td>
            <td>CVIU '22</td>
            <td><a href="https://www.sciencedirect.com/science/article/abs/pii/S1077314222000352">Paper</a>/<a href="https://github.com/Meiqi-Gong/CUFD">Code</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>Dif-Fusion</td>
            <td>Dif-fusion: Towards high color fidelity in infrared and visible image fusion with diffusion models</td>
            <td>TIP '23</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10286359/">Paper</a>/<a href="https://github.com/GeoVectorMatrix/Dif-Fusion">Code</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>L2Net</td>
            <td>L2Net: Infrared and Visible Image Fusion Using Lightweight Large Kernel Convolution Network</td>
            <td>TIP '23</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10301581">Paper</a>/<a href="https://github.com/chang-le-11/L2Net">Code</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>IGNet</td>
            <td>Learning a graph neural network with cross modality interaction for image fusion</td>
            <td>ACMMM '23</td>
            <td><a href="https://dl.acm.org/doi/abs/10.1145/3581783.3612135">Paper</a>/<a href="https://github.com/lok-18/IGNet">Code</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>LRRNet</td>
            <td>Lrrnet: A novel representation learning guided fusion network for infrared and visible images</td>
            <td>TPAMI '23</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10105495/">Paper</a>/<a href="https://github.com/hli1221/imagefusion-LRRNet">Code</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>MetaFusion</td>
            <td>Metafusion: Infrared and visible image fusion via meta-feature embedding from object detection</td>
            <td>CVPR '23</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2023/html/Zhao_MetaFusion_Infrared_and_Visible_Image_Fusion_via_Meta-Feature_Embedding_From_CVPR_2023_paper.html">Paper</a>/<a href="https://github.com/wdzhao123/MetaFusion">Code</a></td>
        </tr>
        <tr>
            <td>CNN</td>
            <td>PSFusion</td>
            <td>Rethinking the necessity of image fusion in high-level vision tasks: A practical infrared and visible image fusion network based on progressive semantic injection and scene fidelity</td>
            <td>InfFus '23</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253523001860">Paper</a>/<a href="https://github.com/Linfeng-Tang/PSFusion">Code</a></td>
        </tr>
         <tr>
        <td>CNN</td>
        <td>LUT-Fuse</td>
        <td>LUT-Fuse: Towards Extremely Fast Infrared and Visible Image Fusion via Distillation to Learnable Look-Up Tables</td>
        <td>ICCV '25</td>
        <td><a href="https://arxiv.org/abs/2509.00346">Paper</a>/<a href="https://github.com/zyb5/LUT-Fuse">Code</a></td>
    </tr>
        <tr>
        <td>CNN</td>
        <td>PMAINet</td>
        <td>Progressive Modality-Adaptive Interactive Network for Multi-Modality Image Fusion</td>
        <td>IJCAI '25</td>
        <td><a href="https://ijcai-preprints.s3.us-west-1.amazonaws.com/2025/1791.pdf">Paper</a></td>
    </tr>
        <tr>
            <td>Transformer</td>
            <td>SwinFusion</td>
            <td>Swinfusion: Cross-domain long-range learning for general image fusion via swin transformer</td>
            <td>JAS '22</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9812535">Paper</a>/<a href="https://github.com/Linfeng-Tang/SwinFusion">Code</a></td>
        </tr>
        <tr>
            <td>Transformer</td>
            <td>YDTR</td>
            <td>Ydtr: Infrared and visible image fusion via y-shape dynamic transformer</td>
            <td>TMM '22</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9834137">Paper</a>/<a href="https://github.com/tthinking/YDTR">Code</a></td>
        </tr>
        <tr>
            <td>Transformer</td>
            <td>IFT</td>
            <td>Image fusion transformer</td>
            <td>ICIP '22</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9897280">Paper</a>/<a href="https://github.com/Vibashan/Image-Fusion-Transformer">Code</a></td>
        </tr>
        <tr>
            <td>Transformer</td>
            <td>CDDFuse</td>
            <td>Cddfuse: Correlation-driven dual-branch feature decomposition for multi-modality image fusion</td>
            <td>CVPR '23</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2023/html/Zhao_CDDFuse_Correlation-Driven_Dual-Branch_Feature_Decomposition_for_Multi-Modality_Image_Fusion_CVPR_2023_paper.html">Paper</a>/<a href="https://github.com/Zhaozixiang1228/MMIF-CDDFuse">Code</a></td>
        </tr>
        <tr>
            <td>Transformer</td>
            <td>TGFuse</td>
            <td>Tgfuse: An infrared and visible image fusion approach based on transformer and generative adversarial network</td>
            <td>TIP '23</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10122870">Paper</a>/<a href="https://github.com/dongyuya/TGFuse">Code</a></td>
        </tr>
        <tr>
            <td>Transformer</td>
            <td>CMTFusion</td>
            <td>Cross-modal transformers for infrared and visible image fusion</td>
            <td>TCSVT '23</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10163247">Paper</a>/<a href="https://github.com/seonghyun0108/CMTFusion">Code</a></td>
        </tr>
        <tr>
            <td>Transformer</td>
            <td>Text-IF</td>
            <td>Text-if: Leveraging semantic text guidance for degradation-aware and interactive image fusion</td>
            <td>CVPR '24</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2024/html/Yi_Text-IF_Leveraging_Semantic_Text_Guidance_for_Degradation-Aware_and_Interactive_Image_CVPR_2024_paper.html">Paper</a>/<a href="https://github.com/XunpengYi/Text-IF">Code</a></td>
        </tr>
        <tr>
            <td>Transformer</td>
            <td>PromptF</td>
            <td>Promptfusion: Harmonized semantic prompt learning for infrared and visible image fusion</td>
            <td>JAS '24</td>
            <td></td>
        </tr>
        <tr>
            <td>Transformer</td>
            <td>MaeFuse</td>
            <td>MaeFuse: Transferring Omni Features With Pretrained Masked Autoencoders for Infrared and Visible Image Fusion via Guided Training</td>
            <td>TIP '25</td>
            <td><a href="https://arxiv.org/pdf/2404.11016">Paper</a>/<a href="https://github.com/Henry-Lee-real/MaeFuse">Code</a></td>
        </tr>
    <tr>
        <td>Transformer</td>
        <td>Fusion with Language-driven</td>
        <td>Infrared and Visible Image Fusion with Language-Driven Loss in CLIP Embedding Space</td>
        <td>ACM MM '24</td>
        <td><a href="https://arxiv.org/abs/2402.16267">Paper</a>/<a href="null">Code</a></td>
    </tr>
    </tbody>
</table>

## 数据兼容方法(Data Compatible)
<table>
            <thead>
                <tr>
                    <th>Aspects<br>(分类)</th>
                    <th>Methods<br>(方法)</th>
                    <th>Title<br>(标题)</th>
                    <th>Venue<br>(发表场所)</th>
                    <th>Source<br>(资源)</th>
                </tr>
            </thead>
    <tbody>
        <tr>
            <td>Registration</td>
            <td>UMIR</td>
            <td>Unsupervised multi-modal image registration via geometry preserving image-to-image translation</td>
            <td>CVPR ‘20</td>
            <td><a href="https://openaccess.thecvf.com/content_CVPR_2020/html/Arar_Unsupervised_Multi-Modal_Image_Registration_via_Geometry_Preserving_Image-to-Image_Translation_CVPR_2020_paper.html">Paper</a>/<a href="https://github.com/moabarar/nemar">Code</a></td>
        </tr>
        <tr>
            <td>Registration</td>
            <td>ReCoNet</td>
            <td>Reconet: Recurrent correction network for fast and efficient multi-modality image fusion</td>
            <td>ECCV ‘22</td>
            <td><a href="https://link.springer.com/chapter/10.1007/978-3-031-19797-0_31">Paper</a>/<a href="https://github.com/dlut-dimt/ReCoNet">Code</a></td>
        </tr>
        <tr>
            <td>Registration</td>
            <td>SuperFusion</td>
            <td>Superfusion: A versatile image registration and fusion network with semantic awareness</td>
            <td>JAS ‘22</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9970457">Paper</a>/<a href="https://github.com/Linfeng-Tang/SuperFusion">Code</a></td>
        </tr>
        <tr>
            <td>Registration</td>
            <td>UMFusion</td>
            <td>Unsupervised misaligned infrared and visible image fusion via cross-modality image generation and registration</td>
            <td>IJCAI ‘22</td>
            <td><a href="https://arxiv.org/abs/2205.11876">Paper</a>/<a href="https://github.com/wdhudiekou/UMF-CMGR">Code</a></td>
        </tr>
        <tr>
            <td>Registration</td>
            <td>GCRF</td>
            <td>General cross-modality registration framework for visible and infrared UAV target image registration</td>
            <td>SR ‘23</td>
            <td><a href="https://www.nature.com/articles/s41598-023-39863-3">Paper</a></td>
        </tr>
        <tr>
            <td>Registration</td>
            <td>MURF</td>
            <td>MURF: mutually reinforcing multi-modal image registration and fusion</td>
            <td>TPAMI ‘23</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10145843">Paper</a>/<a href="https://github.com/hanna-xu/MURF">Code</a></td>
        </tr>
        <tr>
            <td>Registration</td>
            <td>SemLA</td>
            <td>Semantics lead all: Towards unified image registration and fusion from a semantic perspective</td>
            <td>InfFus ‘23</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253523001513">Paper</a>/<a href="https://github.com/xiehousheng/SemLA">Code</a></td>
        </tr>
        <tr>
            <td>Registration</td>
            <td>-</td>
            <td>A Deep Learning Framework for Infrared and Visible Image Fusion Without Strict Registration</td>
            <td>IJCV ‘23</td>
            <td><a href="https://link.springer.com/article/10.1007/s11263-023-01948-x">Paper</a></td>
        </tr>
        <tr>
            <td>Attack</td>
            <td>PAIFusion</td>
            <td>PAIF: Perception-aware infrared-visible image fusion for attack-tolerant semantic segmentation</td>
            <td>ACMMM ‘23</td>
            <td><a href="https://dl.acm.org/doi/abs/10.1145/3581783.3611928">Paper</a>/<a href="https://github.com/LiuZhu-CV/PAIF">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>FusionDN</td>
            <td>FusionDN: A unified densely connected network for image fusion</td>
            <td>AAAI ‘20</td>
            <td><a href="https://aaai.org/ojs/index.php/AAAI/article/view/6936">Paper</a>/<a href="https://github.com/hanna-xu/FusionDN">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>IFCNN</td>
            <td>IFCNN: A general image fusion framework based on convolutional neural network</td>
            <td>InfFus ‘20</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253518305505">Paper</a>/<a href="https://github.com/uzeful/IFCNN">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>PMGI</td>
            <td>Rethinking the image fusion: A fast unified image fusion network based on proportional maintenance of gradient and intensity</td>
            <td>AAAI ‘20</td>
            <td><a href="https://ojs.aaai.org/index.php/AAAI/article/view/6975">Paper</a>/<a href="https://github.com/HaoZhang1018/PMGI_AAAI2020">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>U2Fusion</td>
            <td>U2Fusion: A unified unsupervised image fusion network</td>
            <td>TPAMI ‘20</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/9151265">Paper</a>/<a href="https://github.com/hanna-xu/U2Fusion">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>SDNet</td>
            <td>SDNet: A versatile squeeze-and-decomposition network for real-time image fusion</td>
            <td>IJCV ‘21</td>
            <td><a href="https://link.springer.com/article/10.1007/s11263-021-01501-8">Paper</a>/<a href="https://github.com/HaoZhang1018/SDNet">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>CoCoNet</td>
            <td>CoCoNet: Coupled contrastive learning network with multi-level feature ensemble for multi-modality image fusion</td>
            <td>IJCV ‘23</td>
            <td><a href="https://link.springer.com/article/10.1007/s11263-023-01952-1">Paper</a>/<a href="https://github.com/runjia0124/CoCoNet">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>DDFM</td>
            <td>DDFM: Denoising diffusion model for multi-modality image fusion</td>
            <td>ICCV ‘23</td>
            <td><a href="https://openaccess.thecvf.com/content/ICCV2023/html/Zhao_DDFM_Denoising_Diffusion_Model_for_Multi-Modality_Image_Fusion_ICCV_2023_paper.html">Paper</a>/<a href="https://github.com/Zhaozixiang1228/MMIF-DDFM">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>EMMA</td>
            <td>Equivariant multi-modality image fusion</td>
            <td>CVPR ‘24</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2024/html/Zhao_Equivariant_Multi-Modality_Image_Fusion_CVPR_2024_paper.html">Paper</a>/<a href="https://github.com/Zhaozixiang1228/MMIF-EMMA">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>FILM</td>
            <td>Image fusion via vision-language model</td>
            <td>ICML ‘24</td>
            <td><a href="https://arxiv.org/abs/2402.02235">Paper</a>/<a href="https://github.com/Zhaozixiang1228/IF-FILM">Code</a></td>
        </tr>
        <tr>
            <td>General</td>
            <td>VDMUFusion</td>
            <td>VDMUFusion: A Versatile Diffusion Model-Based Unsupervised Framework for Image Fusion</td>
            <td>TIP ‘24</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10794610">Paper</a>/<a href="https://github.com/yuliu316316/VDMUFusion">Code</a></td>
        </tr>
             <tr>
           <td>General</td>
           <td>TC-MoA</td>
           <td>Task-Customized Mixture of Adapters for General Image Fusion</td>
           <td>CVPR '24</td>
           <td><a href="https://openaccess.thecvf.com/content/CVPR2024/html/Zhu_Task-Customized_Mixture_of_Adapters_for_General_Image_Fusion_CVPR_2024_paper.html">Paper</a>/<a href="https://github.com/YangSun22/TC-MoA">Code</a></td>
        </tr>
        <tr>
           <td>General</td>
           <td>SHIP</td>
           <td>Probing Synergistic High-Order Interaction in Infrared and Visible Image Fusion</td>
           <td>CVPR '24</td>
           <td><a href="https://openaccess.thecvf.com/content/CVPR2024/papers/Zheng_Probing_Synergistic_High-Order_Interaction_in_Infrared_and_Visible_Image_Fusion_CVPR_2024_paper.pdf">Paper</a>/<a href="https://github.com/zheng980629/SHIP">Code</a></td>
        </tr>
              <tr>
        <td>Transformer</td>
        <td>GIFNet</td>
        <td>One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion</td>
        <td>CVPR '25</td>
        <td><a href="https://openaccess.thecvf.com/content/CVPR2025/html/Cheng_One_Model_for_ALL_Low-Level_Task_Interaction_Is_a_Key_CVPR_2025_paper.html">Paper</a>/<a href="https://github.com/AWCXV/GIFNet">Code</a></td>
    </tr>
    </tbody>
</table>

## 面向应用方法(Application-oriented)
<table>
    <thead>
        <tr>
            <th>Aspects<br>(分类)</th>
            <th>Methods<br>(方法)</th>
            <th>Title<br>(标题)</th>
            <th>Venue<br>(发表场所)</th>
            <th>Source<br>(资源)</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Perception</td>
            <td>DetFusion</td>
            <td>A detection-driven infrared and visible image fusion network</td>
            <td>ACMMM ‘22</td>
            <td><a href="https://dl.acm.org/doi/abs/10.1145/3503161.3547902">Paper</a>/<a href="https://github.com/SunYM2020/DetFusion">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>SeAFusion</td>
            <td>Image fusion in the loop of high-level vision tasks: A semantic-aware real-time infrared and visible image fusion network</td>
            <td>InfFus ‘22</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253521002542">Paper</a>/<a href="https://github.com/Linfeng-Tang/SeAFusion">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>TarDAL</td>
            <td>Target-aware dual adversarial learning and a multi-scenario multimodality benchmark to fuse infrared and visible for object detection</td>
            <td>CVPR ‘22</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2022/html/Liu_Target-Aware_Dual_Adversarial_Learning_and_a_Multi-Scenario_Multi-Modality_Benchmark_To_CVPR_2022_paper.html">Paper</a>/<a href="https://github.com/dlut-dimt/TarDAL">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>BDLFusion</td>
            <td>Bi-level dynamic learning for jointly multi-modality image fusion and beyond</td>
            <td>IJCAI ‘23</td>
            <td><a href="https://arxiv.org/abs/2305.06720">Paper</a>/<a href="https://github.com/LiuZhu-CV/BDLFusion">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>IRFS</td>
            <td>An interactively reinforced paradigm for joint infrared-visible image fusion and saliency object detection</td>
            <td>InfFus ‘23</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253523001446">Paper</a>/<a href="https://github.com/wdhudiekou/IRFS">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>MetaFusion</td>
            <td>Metafusion: Infrared and visible image fusion via meta-feature embedding from object detection</td>
            <td>CVPR ‘23</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2023/html/Zhao_MetaFusion_Infrared_and_Visible_Image_Fusion_via_Meta-Feature_Embedding_From_CVPR_2023_paper.html">Paper</a>/<a href="https://github.com/wdzhao123/MetaFusion">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>MoE-Fusion</td>
            <td>Multi-modal gated mixture of local-to-global experts for dynamic image fusion</td>
            <td>ICCV ‘23</td>
            <td><a href="https://openaccess.thecvf.com/content/ICCV2023/html/Cao_Multi-Modal_Gated_Mixture_of_Local-to-Global_Experts_for_Dynamic_Image_Fusion_ICCV_2023_paper.html">Paper</a>/<a href="https://github.com/SunYM2020/MoE-Fusion">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>SegMiF</td>
            <td>Multi-interactive feature learning and a full-time multimodality benchmark for image fusion and segmentation</td>
            <td>ICCV ‘23</td>
            <td><a href="https://openaccess.thecvf.com/content/ICCV2023/html/Liu_Multi-interactive_Feature_Learning_and_a_Full-time_Multi-modality_Benchmark_for_Image_ICCV_2023_paper.html">Paper</a>/<a href="https://github.com/JinyuanLiu-CV/SegMiF">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>CAF</td>
            <td>Where elegance meets precision: Towards a compact, automatic, and flexible framework for multi-modality image fusion and applications</td>
            <td>IJCAI ‘24</td>
            <td><a href="https://www.ijcai.org/proceedings/2024/0123.pdf">Paper</a>/<a href="https://github.com/RollingPlain/CAF_IVIF">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>MRFS</td>
            <td>Mrfs: Mutually reinforcing image fusion and segmentation</td>
            <td>CVPR ‘24</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2024/html/Zhang_MRFS_Mutually_Reinforcing_Image_Fusion_and_Segmentation_CVPR_2024_paper.html">Paper</a>/<a href="https://github.com/HaoZhang1018/MRFS">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>TIMFusion</td>
            <td>A task-guided, implicitly searched and meta-initialized deep model for image fusion</td>
            <td>TPAMI ‘24</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10480582">Paper</a>/<a href="https://github.com/LiuZhu-CV/TIMFusion">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>SAGE</td>
            <td>Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond</td>
            <td>CVPR ‘25</td>
            <td><a href="https://arxiv.org/pdf/2503.01210">Paper</a>/<a href="https://github.com/RollingPlain/SAGE_IVIF">Code</a></td>
        </tr>
             <tr>
            <td>Perception</td>
            <td>DCEvo</td>
            <td>DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion</td>
            <td>CVPR ‘25</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2025/html/Liu_DCEvo_Discriminative_Cross-Dimensional_Evolutionary_Learning_for_Infrared_and_Visible_Image_CVPR_2025_paper.html">Paper</a>/<a href="https://github.com/Beate-Suy-Zhang/DCEvo">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>TDFusion</td>
            <td>Task-driven Image Fusion with Learnable Fusion Loss</td>
            <td>CVPR ‘25</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2025/html/Bai_Task-driven_Image_Fusion_with_Learnable_Fusion_Loss_CVPR_2025_paper.html">Paper</a>/<a href="https://github.com/HaowenBai/TDFusion">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>EVIF</td>
            <td>Event-based Visible and Infrared Fusion via Multi-task Collaboration </td>
            <td>CVPR ‘24</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2024/html/Geng_Event-based_Visible_and_Infrared_Fusion_via_Multi-task_Collaboration_CVPR_2024_paper.html">Paper</a>/<a href="https://github.com/NetaPanda/EVIF">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>MMAIF</td>
            <td>MMAIF: Multi-task and Multi-degradation All-in-One for Image Fusion with Language Guidance</td>
            <td>ICCV ‘25</td>
            <td><a href="https://arxiv.org/abs/2503.14944">Paper</a>/<a href="https://github.com/294coder/MMAIF">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>CMFS</td>
            <td>CMFS: CLIP-Guided Modality Interaction for Mitigating Noise in Multi-Modal Image Fusion and Segmentation</td>
            <td>IJCAI ‘25</td>
            <td><a href="https://ijcai-preprints.s3.us-west-1.amazonaws.com/2025/2440.pdf">Paper</a>/<a href="https://github.com/SuGuilin/IJCAI2025-CMFS">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>A²RNet</td>
            <td>A²RNet: Adversarial Attack Resilient Network for Robust Infrared and Visible Image Fusion</td>
            <td>AAAI ‘25</td>
            <td><a href="https://ojs.aaai.org/index.php/AAAI/article/view/32504">Paper</a>/<a href="https://github.com/lok-18/A2RNet">Code</a></td>
        </tr>
        <tr>
            <td>Perception</td>
            <td>SDSFusion</td>
            <td>SDSFusion: A Semantic-Aware Infrared and Visible Image Fusion Network for Degraded Scenes</td>
            <td>TIP ‘25</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/11014600">Paper</a>/<a href="https://github.com/Liling-yang/SDSFusion">Code</a></td>
        </tr>        
        <tr>
            <td>Perception</td>
            <td>S4Fusion</td>
            <td>S4Fusion: Saliency-Aware Selective State Space Model for Infrared and Visible Image Fusion</td>
            <td>TIP ‘25</td>
            <td><a href="https://ieeexplore.ieee.org/document/11062462">Paper</a>/<a href="https://github.com/zipper112/S4Fusion">Code</a></td>
        </tr>  
        <tr>
            <td>Perception</td>
            <td>FreeFusion</td>
            <td>FreeFusion: Infrared and Visible Image Fusion via Cross Reconstruction Learning</td>
            <td>TPAMI ‘25</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/11010882">Paper</a></td>
        </tr>  
        <tr>
            <td>Perception</td>
            <td>MulFS-CAP</td>
            <td>MulFS-CAP: Multimodal Fusion-Supervised Cross-Modality Alignment Perception for Unregistered Infrared-Visible Image Fusion</td>
            <td>TPAMI ‘25</td>
            <td><a href="https://ieeexplore.ieee.org/document/10856402">Paper</a>/<a href="https://github.com/YR0211/MulFS-CAP">Code</a></td>
        </tr>  
    </tbody>
</table>


# 最新研究进展(Latest Research Progress)
<table>
    <thead>
        <tr>
            <th>Methods<br>(方法)</th>
            <th>Title<br>(标题)</th>
            <th>Venue<br>(发表场所)</th>
            <th>Source<br>(资源)</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>SAGE</td>
            <td>Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond</td>
            <td>CVPR ‘25</td>
            <td><a href="https://arxiv.org/pdf/2503.01210">Paper</a>/<a href="https://github.com/RollingPlain/SAGE_IVIF">Code</a></td>
        </tr>
             <tr>
            <td>DCEvo</td>
            <td>DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion</td>
            <td>CVPR ‘25</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2025/html/Liu_DCEvo_Discriminative_Cross-Dimensional_Evolutionary_Learning_for_Infrared_and_Visible_Image_CVPR_2025_paper.html">Paper</a>/<a href="https://github.com/Beate-Suy-Zhang/DCEvo">Code</a></td>
        </tr>
        <tr>
            <td>TDFusion</td>
            <td>Task-driven Image Fusion with Learnable Fusion Loss</td>
            <td>CVPR ‘25</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2025/html/Bai_Task-driven_Image_Fusion_with_Learnable_Fusion_Loss_CVPR_2025_paper.html">Paper</a>/<a href="https://github.com/HaowenBai/TDFusion">Code</a></td>
        </tr>
        <tr>
            <td>GIFNet</td>
            <td>One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion</td>
            <td>CVPR '25</td>
            <td><a href="https://openaccess.thecvf.com/content/CVPR2025/html/Cheng_One_Model_for_ALL_Low-Level_Task_Interaction_Is_a_Key_CVPR_2025_paper.html">Paper</a>/<a href="https://github.com/AWCXV/GIFNet">Code</a></td>
        </tr>
        <tr>
            <td>RIS-Fuse</td>
            <td>Highlight What You Want: Weakly-Supervised Instance-Level Controllable Infrared-Visible Image Fusion</td>
            <td>ICCV '25</td>
            <td><a href="https://openaccess.thecvf.com/content/ICCV2025/papers/Wang_Highlight_What_You_Want_Weakly-Supervised_Instance-Level_Controllable_Infrared-Visible_Image_Fusion_ICCV_2025_paper.pdf">Paper</a>/<a href="https://github.com/GMY628/RIS-Fuse">Code</a></td>
        </tr>
        <tr>
            <td>SCA</td>
            <td>The Source Image is the Best Attention for Infrared and Visible Image Fusion</td>
            <td>ICCV '25</td>
            <td><a href="https://openaccess.thecvf.com/content/ICCV2025/papers/Wang_The_Source_Image_is_the_Best_Attention_for_Infrared_and_ICCV_2025_paper.pdf">Paper</a></td>
        </tr>
        <tr>
            <td>TITA</td>
            <td>Balancing Task-invariant Interaction and Task-specific Adaptation for Unified Image Fusion</td>
            <td>ICCV '25</td>
            <td><a href="https://openaccess.thecvf.com/content/ICCV2025/papers/Hu_Balancing_Task-invariant_Interaction_and_Task-specific_Adaptation_for_Unified_Image_Fusion_ICCV_2025_paper.pdf">Paper</a>/<a href="https://github.com/huxingyuabc/TITA">Code</a></td>
        </tr>
        <tr>
            <td>DreamFuse</td>
            <td>DreamFuse: Adaptive Image Fusion with Diffusion Transformer</td>
            <td>ICCV '25</td>
            <td><a href="https://arxiv.org/pdf/2504.08291">Paper</a>/<a href="https://ll3rd.github.io/DreamFuse/">Code</a></td>
        </tr>
        <tr>
            <td>TRACE</td>
            <td>Toward a Training-Free Plug-and-Play Refinement Framework for Infrared and Visible Image Registration and Fusion</td>
            <td>ACM MM '25</td>
            <td><a href="https://dl.acm.org/doi/epdf/10.1145/3746027.3755087">Paper</a>/<a href="https://github.com/pubyLu/TRACE">Code</a></td>
        </tr>
        <tr>
            <td>HRFusion</td>
            <td>Prior-Constrained Relevant Feature driven Image Fusion with Hybrid Feature via Mode Decomposition</td>
            <td>ACM MM '25</td>
            <td><a href="https://dl.acm.org/doi/epdf/10.1145/3746027.3755343">Paper</a>/<a href="https://github.com/liuuuuu777/HRFusion">Code</a></td>
        </tr>
        <tr>
            <td>TG-ECNet</td>
            <td>Task-Gated Multi-Expert Collaboration Network for Degraded Multi-Modal Image Fusion</td>
            <td>ICML '25</td>
            <td><a href="https://openreview.net/pdf?id=OcFsPBXREI">Paper</a>/<a href="https://github.com/LeeX54946/TG-ECNet">Code</a></td>
        </tr>
        <tr>
            <td>Deno-IF</td>
            <td>Deno-IF: Unsupervised Noisy Visible and Infrared Image Fusion via Mimicking Textures from Clean Images</td>
            <td>NeurIPS '25</td>
            <td><a href="https://openreview.net/pdf?id=36cKp4tsHF">Paper</a>/<a href="https://github.com/hanna-xu/Deno-IF">Code</a></td>
        </tr>
        <tr>
            <td>HCLFuse</td>
            <td>Revisiting Generative Infrared and Visible Image Fusion Based on Human Cognitive Laws</td>
            <td>NeurIPS '25</td>
            <td><a href="https://openreview.net/pdf?id=wvcYIEaD5X">Paper</a>/<a href="https://openreview.net/pdf?id=wvcYIEaD5X">Code</a></td>
        </tr>
        <tr>
            <td>RFfusion</td>
            <td>Efficient Rectified Flow for Image Fusion</td>
            <td>NeurIPS '25</td>
            <td><a href="https://arxiv.org/pdf/2509.16549">Paper</a>/<a href="https://github.com/zirui0625/RFfusion">Code</a></td>
        </tr> 
       <tr>
            <td>ControlFusion</td>
            <td>ControlFusion: A Controllable Image Fusion Framework with Language-Vision Degradation Prompts</td>
            <td>NeurIPS '25</td>
            <td><a href="https://openreview.net/pdf?id=aLhA7AYLLR">Paper</a>/<a href="https://github.com/Linfeng-Tang/ControlFusion">Code</a></td>
        </tr>
        <tr>
            <td>PDFuse</td>
            <td>Projection-Manifold Regularized Latent Diffusion for Robust General Image Fusion</td>
            <td>NeurIPS '25</td>
            <td><a href="https://openreview.net/pdf?id=RqE5PlQsU5">Paper</a>/<a href="https://github.com/Leiii-Cao/PDFuse">Code</a></td>
        </tr>   
       <tr>
            <td>DAFusion</td>
            <td>Domain Adaptation Guided Infrared and Visible Image Fusion</td>
            <td>AAAI '26</td>
            <td><a href="https://doi.org/10.1609/aaai.v40i6.42435">Paper</a>/<a href="https://github.com/wellwhz/DAFusion">Code</a></td>
        </tr>
        <tr>
            <td>HMMF</td>
            <td>A Hybrid Space Model for Misaligned Multi-modality Image Fusion</td>
            <td>AAAI '26</td>
            <td><a href="https://openreview.net/attachment?id=Qwj8KZ45jL&name=pdf">Paper</a>/<a href="https://github.com/xiao-eee/HMMF">Code</a></td>
        </tr>   
       <tr>
            <td>CtrlFuse</td>
            <td>CtrlFuse: Mask-Prompt Guided Controllable Infrared and Visible Image Fusion</td>
            <td>AAAI '26</td>
            <td><a href="https://arxiv.org/pdf/2601.08619">Paper</a>/<a href="https://github.com/Sevryy/CtrlFuse">Code</a></td>
        </tr>
        <tr>
            <td>MdaIF</td>
            <td>MdaIF: Robust One-Stop Multi-Degradation-Aware Image Fusion with Language-Driven Semantics</td>
            <td>AAAI '26</td>
            <td><a href="https://arxiv.org/pdf/2511.12525">Paper</a>/<a href="https://github.com/doudou845133/MdaIF">Code</a></td>
        </tr>   
       <tr>
            <td>SMC</td>
            <td>Self-supervised Multiplex Consensus Mamba for General Image Fusion</td>
            <td>AAAI '26</td>
            <td><a href="https://arxiv.org/pdf/2512.20921">Paper</a></td>
        </tr>
        <tr>
            <td>OCCO</td>
            <td>OCCO: LVM-Guided Infrared and Visible Image Fusion Framework Based on Object-Aware and Contextual Contrastive Learning</td>
            <td>IJCV '25</td>
            <td><a href="https://link.springer.com/content/pdf/10.1007/s11263-025-02507-2.pdf">Paper</a>/<a href="https://github.com/bociic/OCCO">Code</a></td>
        </tr>   
       <tr>
            <td>MagicFuse</td>
            <td>One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion</td>
            <td>CVPR '26</td>
            <td><a href="https://arxiv.org/html/2602.01760v1">Paper</a></td>
        </tr>
        <tr>
            <td>UniFusion</td>
            <td>One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion</td>
            <td>CVPR '26</td>
            <td><a href="https://arxiv.org/pdf/2603.14214">Paper</a>/<a href="https://github.com/dusongcheng/UniFusion">Code</a></td>
        </tr> 
        <tr>
            <td>OmniFuse</td>
            <td>OmniFuse: Composite Degradation-Robust Image Fusion With Language-Driven Semantics</td>
            <td>TPAMI '25</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/10994384">Paper</a>/<a href="https://github.com/HaoZhang1018/OmniFuse">Code</a></td>
        </tr>   
       <tr>
            <td>DiTFuse</td>
            <td>Towards Uniffed Semantic and Controllable Image Fusion: A Diffusion Transformer Approach</td>
            <td>TPAMI '25</td>
            <td><a href="https://ieeexplore.ieee.org/document/11297852">Paper</a>/<a href="https://github.com/Henry-Lee-real/DiTFuse">Code</a></td>
        </tr>
        <tr>
            <td>Mask-DiFuser</td>
            <td>Mask-DiFuser: A Masked Diffusion Model for Unified Unsupervised Image Fusion</td>
            <td>TPAMI '25</td>
            <td><a href="https://ieeexplore.ieee.org/document/11162636">Paper</a>/<a href="https://github.com/Linfeng-Tang/Mask-DiFuser/blob/main/README.md">Code</a></td>
        </tr>   
       <tr>
            <td>FuseAgent</td>
            <td>FUSEAGENT: A VLM-DRIVEN AGENT FOR UNIFIED IN-THE-WILD IMAGE FUSION</td>
            <td>ICLR '26</td>
            <td><a href="https://openreview.net/pdf?id=FUiOZXchu5">Paper</a>/<a href="https://github.com/Tyunsen/fuseAgent_v2/blob/main/docs/zh-CN/design/vision_index_creation.md">Code</a></td>
        </tr>
        <tr>
            <td>URFusion</td>
            <td>URFusion: Unsupervised Unified Degradation-Robust Image Fusion Network</td>
            <td>TIP '25</td>
            <td><a href="https://ieeexplore.ieee.org/document/11164897">Paper</a>/<a href="https://github.com/hanna-xu/URFusion">Code</a></td>
        </tr>   
</table>

# 视频融合(Video Fusion)
<table>
    <thead>
        <tr>
            <th>Methods<br>(方法)</th>
            <th>Title<br>(标题)</th>
            <th>Venue<br>(发表场所)</th>
            <th>Source<br>(资源)</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>UniVF</td>
            <td>A Unified Solution to Video Fusion: FromMulti-Frame Learning to Benchmarking</td>
            <td>NeurIPS '25</td>
            <td><a href="https://arxiv.org/abs/2505.19858">Paper</a>/<a href="https://github.com/Zhaozixiang1228/VF-Bench">Code</a></td>
        </tr>   
       <tr>
            <td>RCVS</td>
            <td>RCVS: A Unified Registration and Fusion Framework for Video Streams</td>
            <td>TMM '24</td>
            <td><a href="https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10636834">Paper</a></td>
        </tr>
        <tr>
            <td>VideoFusion</td>
            <td>VideoFusion: A Spatio-Temporal Collaborative Network for Multi-modal Video Fusion</td>
            <td>CVPR '26</td>
            <td><a href="https://arxiv.org/pdf/2503.23359?">Paper</a>/<a href="https://github.com/Linfeng-Tang/VideoFusion">Code</a></td>
        </tr>   
       <tr>
            <td>CMVF</td>
            <td>CMVF: Cross-modal unregistered video fusion via spatio-temporal consistency</td>
            <td>InfFus '26</td>
            <td><a href="https://www.sciencedirect.com/science/article/pii/S1566253526000916">Paper</a>/<a href="https://github.com/jianfeng0369/CMVF">Code</a></td>
        </tr>
        <tr>
            <td>Seq-IF</td>
            <td>Seq-IF: Sequentially Consistent Infrared-Visible Video Fusion under Time-Varying Illumination for Perception Enhancement</td>
            <td>TCSVT '26</td>
            <td><a href="https://ieeexplore.ieee.org/abstract/document/11417980">Paper</a></td>
        </tr> 
        <tr>
            <td>TemCoCo</td>
            <td>TemCoCo: Temporally Consistent Multi-modal Video Fusion with Visual-Semantic Collaboration</td>
            <td>ICCV '25</td>
            <td><a href="https://openaccess.thecvf.com/content/ICCV2025/papers/Gong_TemCoCo_Temporally_Consistent_Multi-modal_Video_Fusion_with_Visual-Semantic_Collaboration_ICCV_2025_paper.pdf">Paper</a>/<a href="https://github.com/Meiqi-Gong/TemCoCo">Code</a></td>
        </tr>   
</table>
#  评价指标(Evaluation Metric)
We integrated the code for calculating metrics and used GPU acceleration with PyTorch, significantly improving the speed of computing metrics across multiple methods and images.
You can find it at [Metric](https://github.com/RollingPlain/IVIF_ZOO/tree/main/Metric)

If you want to calculate metrics using our code, you can run:
```python
# Please modify the data path in 'eval_torch.py'.
python eval_torch.py
 ```

#  资源库(Resource Library)
##  融合(Fusion)

Fusion images from multiple datasets in the IVIF domain are organized in the following form: each subfolder contains fusion images generated by different methods, facilitating research and comparison for users.
```
Fusion ROOT
├── IVIF
|   ├── FMB
|   |   ├── ... 
|   |   ├── CAF # All the file names are named after the methods
|   |   └── ...
|   ├── # The other files follow the same structure shown above.
|   ├── M3FD_300 # Mini version of M3FD dataset with 300 images
|   ├── RoadScene
|   ├── TNO
|   └── M3FD_4200.zip # Full version of the M3FD dataset with 4200 images
```
You can directly download from here.

Download：[Baidu Yun](https://pan.baidu.com/s/1S6l-CUqE2nRPXeX2P_VScg?pwd=wgtn)


##  分割(Segmentation)

Segmentation data is organized in the following form: it contains multiple directories to facilitate the management of segmentation-related data and results.

```
Segmentation ROOT
├── Segformer
|   ├── datasets
|   |   ├── ... 
|   |   ├── CAF # All the file names are named after the methods
|   |   |    └──VOC2007
|   |   |         ├── JPEGImages # Fusion result images in JPG format
|   |   |         └── SegmentationClass # Ground truth for segmentation
|   |   └── ... # The other files follow the same structure shown above.
|   ├── model_data 
|   |   ├── backbone # Backbone used for segmentation
|   |   └── model # Saved model files
|   |        ├── ...
|   |        ├── CAF.pth # All the model names are named after the methods
|   |        └── ... 
|   ├── results # Saved model files and training results
|   |   ├── iou # IoU results for segmentation validation
|   |        ├── ...
|   |        ├── CAF.txt # All the file names are named after the methods
|   |        └── ... 
|   |   └── predict #Visualization of segmentation
|   |        ├── ...
|   |        ├── CAF # All the file names are named after the methods
|   |        └── ... 
|   └── hyperparameters.md # Hyperparameter settings
```

You can directly download from here.

Download：[Baidu Yun](https://pan.baidu.com/s/1IZOZU17CA6-zeR8zb1LW3Q?pwd=5rcp)

##  检测(Detection)
Detection data is organized in the following form:
it contains multiple directories to facilitate the management of detection-related data and results.

```
Detection ROOT
├── M3FD
|   ├── Fused Results
|   |   ├── ... 
|   |   ├── CAF # All the file names are named after the methods
|   |   |   ├── Images # Fusion result images in PNG format
|   |   |   └── Labels # Ground truth for detection
|   |   └── ... # The other files follow the same structure shown above.
|   ├── model_data 
|   |   └── model # Saved model files
|   |        ├── ...
|   |        ├── CAF.pth # All the model names are named after the methods
|   |        └── ... 
|   ├── results # Saved model files and training results
|   |   └── predict #Visualization of detection
|   |        ├── ...
|   |        ├── CAF # All the file names are named after the methods
|   |        └── ... 
|   └── hyperparameters.md # Hyperparameter settings
```

You can directly download from here.

Download：[Baidu Yun](https://pan.baidu.com/s/1mC3wTM1DjbBz5mIaDYJLDQ?pwd=a36k)
##  计算效率(Computational Efficiency)
- **FLOPS and Params**:
    - We utilize the `profile` function from the `thop` package to compute the FLOPs (G) and Params (M) counts of the model.
```python
from thop import profile

# Create ir, vi input tensor
ir = torch.randn(1, 1, 1024, 768).to(device)
vi = torch.randn(1, 3, 1024, 768).to(device)
# Assume 'model' is your network model
flops, params = profile(model, inputs=(ir, vi))
 ```

- **Time**:
    - To measure the Time (ms) of the model, we exclude the initial image to compute the average while testing a random selection of 10 image sets from the M3FD dataset, each with a resolution of 1024×768, on the Nvidia GeForce 4090. To eliminate CPU influence, we employ CUDA official event functions to measure running time on the GPU.

```python
import torch
  
# Create CUDA events
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
# Record the start time
start.record()
# Execute the model
# Assume 'model' is your network model
fus = model(ir, vi)   
# Record the end time
end.record()
```