Full Code of thuhcsi/SECap for AI

main 1743904eb9bc cached

18 files

360.5 KB

166.2k tokens

199 symbols

1 requests

Download .txt

Showing preview only (527K chars total). Download the full file or copy to clipboard to get everything.

Repository: thuhcsi/SECap
Branch: main
Commit: 1743904eb9bc
Files: 18
Total size: 360.5 KB

Directory structure:
gitextract_kdqa59bx/

├── .gitignore
├── CLUB_modules/
│   ├── __init__.py
│   ├── mi_estimators.py
│   └── mi_estimators_dist.py
├── dataloader/
│   └── dataloader.py
├── dataset/
│   ├── fid2captions.json
│   ├── text.txt
│   └── wav.scp
├── environment.yml
├── model2.py
├── module/
│   ├── Qformer.py
│   └── modeling_llama.py
├── readme.md
├── result/
│   └── result.txt
├── scripts/
│   ├── inference.py
│   ├── test.py
│   └── train.py
└── tool/
    └── get_sentence_simi.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
model.ckpt
weights/
ft_local/
__pycache__/
*.pyc
*.pyo
*.pyd

================================================
FILE: CLUB_modules/__init__.py
================================================


================================================
FILE: CLUB_modules/mi_estimators.py
================================================
"""
Adapted from https://github.com/Linear95/CLUB/blob/master/mi_estimators.py
"""

import numpy as np
import math

import torch 
import torch.nn as nn



class CLUBVec2Seq(nn.Module):
    """ The CLUB estimator for vector-to-sequence pairs.
    """
    def __init__(
        self,
        seq_dim: int,
        vec_dim: int,
        hidden_size: int,
        is_sampled_version: bool = False,
    ):
        super().__init__()
        self.is_sampled_version = is_sampled_version

        self.seq_prenet = nn.Sequential(
            nn.Conv1d(seq_dim, hidden_size, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hidden_size, hidden_size, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hidden_size, hidden_size, kernel_size=5, padding=2),
        )
        # mu net
        self.p_mu = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, vec_dim)
        )
        # variance net
        self.p_logvar = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, vec_dim),
            nn.Tanh()
        )

    def temporal_avg_pool(self, x, mask=None):
        """
        Args:
            x (tensor): shape [B, T, D]
            mask (bool tensor): padding parts with ones
        """
        if mask is None:
            out = torch.mean(x, dim=1)
        else:
            len_ = (~mask).sum(dim=1).unsqueeze(1)
            x = x.masked_fill(mask.unsqueeze(-1), 0)
            x = x.sum(dim=1)
            out = torch.div(x, len_)
        return out

    def get_mu_logvar(self, seq, mask):
        # [B, T, D]
        h = self.seq_prenet(seq.transpose(1, 2)).transpose(1, 2)
        # [B, D]
        h = self.temporal_avg_pool(h, mask)
        mu = self.p_mu(h)
        logvar = self.p_logvar(h)
        return mu, logvar
        
    def loglikeli(self, seq, vec, mask=None):
        """ Compute un-normalized log-likelihood
        Args:
            seq (tensor): sequence feature, shape [B, T, D].
            vec (tensor): vector feature, shape [B, D].
            mask (tensor): padding parts with ones, [B, T].
        """
        # mu/logvar: (bs, vec_dim)
        mu, logvar = self.get_mu_logvar(seq, mask)
        return (-(mu - vec)**2 /logvar.exp() - logvar).sum(dim=1).mean(dim=0) 

    def forward(self, seq, vec, mask=None):
        """ Estimate mutual information CLUB upper bound.
        Args:
            seq (tensor): sequence feature, shape [B, T, D].
            vec (tensor): vector feature, shape [B, D].
            mask (tensor): padding parts with ones, [B, T].
        """

        mu, logvar = self.get_mu_logvar(seq, mask)

        if self.is_sampled_version:
            sample_size = seq.shape[0]
            #random_index = torch.randint(sample_size, (sample_size,)).long()
            random_index = torch.randperm(sample_size).long()

            positive = - (mu - vec)**2 / logvar.exp()
            negative = - (mu - vec[random_index])**2 / logvar.exp()
            upper_bound = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()
            
            mi_upper = upper_bound / 2.
        else:
            # log of conditional probability of positive sample pairs, [B, D]
            positive = - (mu - vec)**2 /2./logvar.exp()
            # [B, 1, D]
            prediction_1 = mu.unsqueeze(1)
            # [1, B, D]
            y_samples_1 = vec.unsqueeze(0)

            # log of conditional probability of negative sample pairs, [B, D]
            negative = - ((y_samples_1 - prediction_1)**2).mean(dim=1)/2./logvar.exp() 

            mi_upper = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()
        # print(mi_upper)
        
        return mi_upper

    def learning_loss(self, seq, vec, mask=None):
        return - self.loglikeli(seq, vec, mask)


class CLUBForCategorical(nn.Module): # Update 04/27/2022
    """
    This class provide a CLUB estimator to calculate MI upper bound between 
    vector-like embeddings and categorical labels.
    Estimate I(X,Y), where X is continuous vector and Y is discrete label.
    """
    def __init__(self, input_dim, label_num, hidden_size=None):
        '''
        input_dim : the dimension of input embeddings
        label_num : the number of categorical labels 
        '''
        super().__init__()
        
        if hidden_size is None:
            self.variational_net = nn.Linear(input_dim, label_num)
        else:
            self.variational_net = nn.Sequential(
                nn.Linear(input_dim, hidden_size),
                nn.ReLU(),
                nn.Linear(hidden_size, label_num)
            )
            
    def forward(self, inputs, labels):
        '''
        inputs : shape [batch_size, input_dim], a batch of embeddings
        labels : shape [batch_size], a batch of label index
        '''
        logits = self.variational_net(inputs)  #[sample_size, label_num]
        
        # log of conditional probability of positive sample pairs
        #positive = - nn.functional.cross_entropy(logits, labels, reduction='none')    
        sample_size, label_num = logits.shape
        
        # shape [sample_size, sample_size, label_num]
        logits_extend = logits.unsqueeze(1).repeat(1, sample_size, 1)
        # shape [sample_size, sample_size]
        labels_extend = labels.unsqueeze(0).repeat(sample_size, 1)

        # log of conditional probability of negative sample pairs
        log_mat = - nn.functional.cross_entropy(
            logits_extend.reshape(-1, label_num),
            labels_extend.reshape(-1, ),
            reduction='none'
        )
        
        log_mat = log_mat.reshape(sample_size, sample_size)
        positive = torch.diag(log_mat).mean()
        negative = log_mat.mean()
        return positive - negative

    def loglikeli(self, inputs, labels):
        logits = self.variational_net(inputs)
        return - nn.functional.cross_entropy(logits, labels)
    
    def learning_loss(self, inputs, labels):
        return - self.loglikeli(inputs, labels)
    

class CLUB(nn.Module):  # CLUB: Mutual Information Contrastive Learning Upper Bound
    '''
        This class provides the CLUB estimation to I(X,Y)
        Method:
            forward() :      provides the estimation with input samples  
            loglikeli() :   provides the log-likelihood of the approximation q(Y|X) with input samples
        Arguments:
            x_dim, y_dim :         the dimensions of samples from X, Y respectively
            hidden_size :          the dimension of the hidden layer of the approximation network q(Y|X)
            x_samples, y_samples : samples from X and Y, having shape [sample_size, x_dim/y_dim] 
    '''
    def __init__(self, x_dim, y_dim, hidden_size, is_sampled_version=False):
        super(CLUB, self).__init__()
        self.is_sampled_version = is_sampled_version
        # p_mu outputs mean of q(Y|X)
        #print("create CLUB with dim {}, {}, hiddensize {}".format(x_dim, y_dim, hidden_size))
        self.p_mu = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_size//2, y_dim))
        # p_logvar outputs log of variance of q(Y|X)
        self.p_logvar = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_size//2, y_dim),
                                       nn.Tanh())

    def get_mu_logvar(self, x_samples):
        mu = self.p_mu(x_samples)
        logvar = self.p_logvar(x_samples)
        return mu, logvar
    
    def forward(self, x_samples, y_samples): 
        mu, logvar = self.get_mu_logvar(x_samples)

        if self.is_sampled_version:
            sample_size = x_samples.shape[0]
            #random_index = torch.randint(sample_size, (sample_size,)).long()
            random_index = torch.randperm(sample_size).long()
            
            positive = - (mu - y_samples)**2 / logvar.exp()
            negative = - (mu - y_samples[random_index])**2 / logvar.exp()
            upper_bound = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()

            mi_upper = upper_bound / 2.
        else:
            
            # log of conditional probability of positive sample pairs
            positive = - (mu - y_samples)**2 /2./logvar.exp()  
            
            prediction_1 = mu.unsqueeze(1)          # shape [nsample,1,dim]
            y_samples_1 = y_samples.unsqueeze(0)    # shape [1,nsample,dim]

            # log of conditional probability of negative sample pairs
            negative = - ((y_samples_1 - prediction_1)**2).mean(dim=1)/2./logvar.exp() 

            mi_upper = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()
        return mi_upper   

    def loglikeli(self, x_samples, y_samples): # unnormalized loglikelihood 
        mu, logvar = self.get_mu_logvar(x_samples)
        return (-(mu - y_samples)**2 /logvar.exp()-logvar).sum(dim=1).mean(dim=0)
    
    def learning_loss(self, x_samples, y_samples):
        return - self.loglikeli(x_samples, y_samples)
 

class MINE(nn.Module):
    def __init__(self, x_dim, y_dim, hidden_size):
        super(MINE, self).__init__()
        self.T_func = nn.Sequential(nn.Linear(x_dim + y_dim, hidden_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_size, 1))
    
    def forward(self, x_samples, y_samples):  # samples have shape [sample_size, dim]
        # shuffle and concatenate
        sample_size = y_samples.shape[0]
        random_index = torch.randint(sample_size, (sample_size,)).long()

        y_shuffle = y_samples[random_index]

        T0 = self.T_func(torch.cat([x_samples,y_samples], dim = -1))
        T1 = self.T_func(torch.cat([x_samples,y_shuffle], dim = -1))

        lower_bound = T0.mean() - torch.log(T1.exp().mean())

        # compute the negative loss (maximise loss == minimise -loss)
        return lower_bound
    
    def learning_loss(self, x_samples, y_samples):
        return -self.forward(x_samples, y_samples)


class NWJ(nn.Module):   
    def __init__(self, x_dim, y_dim, hidden_size):
        super(NWJ, self).__init__()
        self.F_func = nn.Sequential(nn.Linear(x_dim + y_dim, hidden_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_size, 1))
                                    
    def forward(self, x_samples, y_samples): 
        # shuffle and concatenate
        sample_size = y_samples.shape[0]

        x_tile = x_samples.unsqueeze(0).repeat((sample_size, 1, 1))
        y_tile = y_samples.unsqueeze(1).repeat((1, sample_size, 1))

        T0 = self.F_func(torch.cat([x_samples,y_samples], dim = -1))
        T1 = self.F_func(torch.cat([x_tile, y_tile], dim = -1))-1.  #shape [sample_size, sample_size, 1]

        lower_bound = T0.mean() - (T1.logsumexp(dim = 1) - np.log(sample_size)).exp().mean() 
        return lower_bound
    
    def learning_loss(self, x_samples, y_samples):
        return -self.forward(x_samples, y_samples)

    
class InfoNCE(nn.Module):
    def __init__(self, x_dim, y_dim, hidden_size):
        super(InfoNCE, self).__init__()
        self.F_func = nn.Sequential(nn.Linear(x_dim + y_dim, hidden_size),
                                    nn.ReLU(),
                                    nn.Linear(hidden_size, 1),
                                    nn.Softplus())
    
    def forward(self, x_samples, y_samples):  # samples have shape [sample_size, dim]
        # shuffle and concatenate
        sample_size = y_samples.shape[0]

        x_tile = x_samples.unsqueeze(0).repeat((sample_size, 1, 1))
        y_tile = y_samples.unsqueeze(1).repeat((1, sample_size, 1))

        T0 = self.F_func(torch.cat([x_samples,y_samples], dim = -1))
        T1 = self.F_func(torch.cat([x_tile, y_tile], dim = -1))  #[sample_size, sample_size, 1]

        lower_bound = T0.mean() - (T1.logsumexp(dim = 1).mean() - np.log(sample_size)) 
        return lower_bound

    def learning_loss(self, x_samples, y_samples):
        return -self.forward(x_samples, y_samples)


def log_sum_exp(value, dim=None, keepdim=False):
    """Numerically stable implementation of the operation
    value.exp().sum(dim, keepdim).log()
    """
    # TODO: torch.max(value, dim=None) threw an error at time of writing
    if dim is not None:
        m, _ = torch.max(value, dim=dim, keepdim=True)
        value0 = value - m
        if keepdim is False:
            m = m.squeeze(dim)
        return m + torch.log(torch.sum(torch.exp(value0),
                                       dim=dim, keepdim=keepdim))
    else:
        m = torch.max(value)
        sum_exp = torch.sum(torch.exp(value - m))
        if isinstance(sum_exp, Number):
            return m + math.log(sum_exp)
        else:
            return m + torch.log(sum_exp)


class L1OutUB(nn.Module):  # naive upper bound
    def __init__(self, x_dim, y_dim, hidden_size):
        super(L1OutUB, self).__init__()
        self.p_mu = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_size//2, y_dim))

        self.p_logvar = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_size//2, y_dim),
                                       nn.Tanh())

    def get_mu_logvar(self, x_samples):
        mu = self.p_mu(x_samples)
        logvar = self.p_logvar(x_samples)
        return mu, logvar

    def forward(self, x_samples, y_samples): 
        batch_size = y_samples.shape[0]
        mu, logvar = self.get_mu_logvar(x_samples)

        positive = (- (mu - y_samples)**2 /2./logvar.exp() - logvar/2.).sum(dim = -1) #[nsample]

        mu_1 = mu.unsqueeze(1)          # [nsample,1,dim]
        logvar_1 = logvar.unsqueeze(1)
        y_samples_1 = y_samples.unsqueeze(0)            # [1,nsample,dim]
        all_probs =  (- (y_samples_1 - mu_1)**2/2./logvar_1.exp()- logvar_1/2.).sum(dim = -1)  #[nsample, nsample]

        diag_mask =  torch.ones([batch_size]).diag().unsqueeze(-1).cuda() * (-20.)
        negative = log_sum_exp(all_probs + diag_mask,dim=0) - np.log(batch_size-1.) #[nsample]
      
        return (positive - negative).mean()
        
    def loglikeli(self, x_samples, y_samples):
        mu, logvar = self.get_mu_logvar(x_samples)
        return (-(mu - y_samples)**2 /logvar.exp()-logvar).sum(dim=1).mean(dim=0)

    def learning_loss(self, x_samples, y_samples):
        return - self.loglikeli(x_samples, y_samples)

    
class VarUB(nn.Module):  #    variational upper bound
    def __init__(self, x_dim, y_dim, hidden_size):
        super(VarUB, self).__init__()
        self.p_mu = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_size//2, y_dim))

        self.p_logvar = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_size//2, y_dim),
                                       nn.Tanh())

    def get_mu_logvar(self, x_samples):
        mu = self.p_mu(x_samples)
        logvar = self.p_logvar(x_samples)
        return mu, logvar
            
    def forward(self, x_samples, y_samples): #[nsample, 1]
        mu, logvar = self.get_mu_logvar(x_samples)
        return 1./2.*(mu**2 + logvar.exp() - 1. - logvar).mean()
        
    def loglikeli(self, x_samples, y_samples):
        mu, logvar = self.get_mu_logvar(x_samples)
        return (-(mu - y_samples)**2 /logvar.exp()-logvar).sum(dim=1).mean(dim=0)

    def learning_loss(self, x_samples, y_samples):
        return - self.loglikeli(x_samples, y_samples)


================================================
FILE: CLUB_modules/mi_estimators_dist.py
================================================
"""
Adapted from https://github.com/Linear95/CLUB/blob/master/mi_estimators.py
"""

import numpy as np
import math

import torch 
import torch.nn as nn

from src.utils.ddp_utils import SyncFunction


class CLUBVec2Seq(nn.Module):
    """ The CLUB estimator for vector-to-sequence pairs.
    """
    def __init__(
        self,
        seq_dim: int,
        vec_dim: int,
        hidden_size: int,
        is_sampled_version: bool = False,
    ):
        super().__init__()
        self.is_sampled_version = is_sampled_version

        self.seq_prenet = nn.Sequential(
            nn.Conv1d(seq_dim, hidden_size, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hidden_size, hidden_size, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hidden_size, hidden_size, kernel_size=5, padding=2),
        )
        # mu net
        self.p_mu = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, vec_dim)
        )
        # variance net
        self.p_logvar = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, vec_dim),
            nn.Tanh()
        )

    def temporal_avg_pool(self, x, mask=None):
        """
        Args:
            x (tensor): shape [B, T, D]
            mask (bool tensor): padding parts with ones
        """
        if mask is None:
            out = torch.mean(x, dim=1)
        else:
            len_ = (~mask).sum(dim=1).unsqueeze(1)
            x = x.masked_fill(mask.unsqueeze(-1), 0)
            x = x.sum(dim=1)
            out = torch.div(x, len_)
        return out

    def get_mu_logvar(self, seq, mask):
        # [B, T, D]
        h = self.seq_prenet(seq.transpose(1, 2)).transpose(1, 2)
        # [B, D]
        h = self.temporal_avg_pool(h, mask)
        mu = self.p_mu(h)
        logvar = self.p_logvar(h)
        return mu, logvar
        
    def loglikeli(self, seq, vec, mask=None):
        """ Compute un-normalized log-likelihood
        Args:
            seq (tensor): sequence feature, shape [B, T, D].
            vec (tensor): vector feature, shape [B, D].
            mask (tensor): padding parts with ones, [B, T].
        """
        # mu/logvar: (bs, vec_dim)
        mu, logvar = self.get_mu_logvar(seq, mask)
        return (-(mu - vec)**2 /logvar.exp() - logvar).sum(dim=1).mean(dim=0) 

    def forward(self, seq, vec, mask=None):
        """ Estimate mutual information CLUB upper bound.
        Args:
            seq (tensor): sequence feature, shape [B, T, D].
            vec (tensor): vector feature, shape [B, D].
            mask (tensor): padding parts with ones, [B, T].
        """

        mu, logvar = self.get_mu_logvar(seq, mask)

        if self.is_sampled_version:
            sample_size = seq.shape[0]
            #random_index = torch.randint(sample_size, (sample_size,)).long()
            random_index = torch.randperm(sample_size).long()

            positive = - (mu - vec)**2 / logvar.exp()
            negative = - (mu - vec[random_index])**2 / logvar.exp()
            upper_bound = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()
            
            mi_upper = upper_bound / 2.
        else:
            # log of conditional probability of positive sample pairs, [B, D]
            positive = - (mu - vec)**2 /2./logvar.exp()
            # [B, 1, D]
            prediction_1 = mu.unsqueeze(1)
            ## gather representations in case of distributed training
            if torch.distributed.is_available() and torch.distributed.is_initialized():
                # [B * world_size, D]
                y_samples_1_dist = SyncFunction.apply(vec)
                # [1, B * world_size, D]
                y_samples_1 = y_samples_1_dist.unsqueeze(0)
            else:
                # [1, B, D]
                y_samples_1 = vec.unsqueeze(0)

            # log of conditional probability of negative sample pairs, [B, D]
            negative = - ((y_samples_1 - prediction_1)**2).mean(dim=1)/2./logvar.exp() 

            mi_upper = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()
        # print(mi_upper)
        
        return mi_upper

    def learning_loss(self, seq, vec, mask=None):
        return - self.loglikeli(seq, vec, mask)


class CLUBForCategorical(nn.Module):
    """
    This class provide a CLUB estimator to calculate MI upper bound between 
    vector-like embeddings and categorical labels.
    
    Estimate I(X,Y), where X is continuous vector and Y is discrete label.

    """
    def __init__(self, input_dim, label_num, hidden_size=None):
        '''
        input_dim : the dimension of input embeddings
        label_num : the number of categorical labels 
        '''
        super().__init__()
        
        if hidden_size is None:
            self.variational_net = nn.Linear(input_dim, label_num)
        else:
            self.variational_net = nn.Sequential(
                nn.Linear(input_dim, hidden_size),
                nn.ReLU(),
                nn.Linear(hidden_size, label_num)
            )
            
    def forward(self, inputs, labels):
        """
        Args:
            inputs : shape [batch_size, input_dim], a batch of embeddings
            labels : shape [batch_size], a batch of label index
        """
        logits = self.variational_net(inputs)  #[sample_size, label_num]
        
        # log of conditional probability of positive sample pairs
        #positive = - nn.functional.cross_entropy(logits, labels, reduction='none')    
        sample_size, label_num = logits.shape
        
        # shape [B, B, label_num]
        logits_extend = logits.unsqueeze(1).repeat(1, sample_size, 1)
        # shape [B, B]
        labels_extend = labels.unsqueeze(0).repeat(sample_size, 1)
        # log of conditional probability of negative sample pairs
        log_mat = - nn.functional.cross_entropy(
            logits_extend.reshape(-1, label_num),
            labels_extend.reshape(-1, ),
            reduction='none'
        )
        log_mat = log_mat.reshape(sample_size, sample_size)
        positive = torch.diag(log_mat).mean()
        ## gather representations in case of distributed training
        if torch.distributed.is_available() and torch.distributed.is_initialized():
            # [B * world_size, label_num]
            logits_dist = SyncFunction.apply(logits)
            world_size = torch.distributed.get_world_size()
            # [B * world_size, B, label_num]
            logits_dist_exp = logits_dist.unsqueeze(1).repeat(1, sample_size, 1)
            # [B * world_size]
            labels_dist = labels.unsqueeze(0).repeat(world_size, 1).reshape(-1)
            # [B, B * world_size]
            labels_dist_exp = labels_dist.unsqueeze(0).repeat(sample_size, 1)
            log_mat_dist = - nn.functional.cross_entropy(
                logits_dist_exp.reshape(-1, label_num),
                labels_dist_exp.reshape(-1, ),
                reduction='none'
            )
            # [B, B * world_size]
            log_mat_dist = log_mat_dist.reshape(sample_size, -1)
            negative = log_mat_dist.mean()
        else:
            negative = log_mat.mean()
        return positive - negative

    def loglikeli(self, inputs, labels):
        logits = self.variational_net(inputs)
        return - nn.functional.cross_entropy(logits, labels)
    
    def learning_loss(self, inputs, labels):
        return - self.loglikeli(inputs, labels)
    

class CLUB(nn.Module):  # CLUB: Mutual Information Contrastive Learning Upper Bound
    '''
        This class provides the CLUB estimation to I(X,Y)
        Method:
            forward() :      provides the estimation with input samples  
            loglikeli() :   provides the log-likelihood of the approximation q(Y|X) with input samples
        Arguments:
            x_dim, y_dim :         the dimensions of samples from X, Y respectively
            hidden_size :          the dimension of the hidden layer of the approximation network q(Y|X)
            x_samples, y_samples : samples from X and Y, having shape [sample_size, x_dim/y_dim] 
    '''
    def __init__(self, x_dim, y_dim, hidden_size, is_sampled_version=False):
        super(CLUB, self).__init__()
        self.is_sampled_version = is_sampled_version
        # p_mu outputs mean of q(Y|X)
        #print("create CLUB with dim {}, {}, hiddensize {}".format(x_dim, y_dim, hidden_size))
        self.p_mu = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_size//2, y_dim))
        # p_logvar outputs log of variance of q(Y|X)
        self.p_logvar = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
                                       nn.ReLU(),
                                       nn.Linear(hidden_size//2, y_dim),
                                       nn.Tanh())

    def get_mu_logvar(self, x_samples):
        mu = self.p_mu(x_samples)
        logvar = self.p_logvar(x_samples)
        return mu, logvar
    
    def forward(self, x_samples, y_samples): 
        mu, logvar = self.get_mu_logvar(x_samples)

        if self.is_sampled_version:
            sample_size = x_samples.shape[0]
            #random_index = torch.randint(sample_size, (sample_size,)).long()
            random_index = torch.randperm(sample_size).long()
            
            positive = - (mu - y_samples)**2 / logvar.exp()
            negative = - (mu - y_samples[random_index])**2 / logvar.exp()
            upper_bound = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()

            mi_upper = upper_bound / 2.
        else:
            
            # log of conditional probability of positive sample pairs
            positive = - (mu - y_samples)**2 /2./logvar.exp()  
            
            prediction_1 = mu.unsqueeze(1)          # shape [nsample,1,dim]
            # y_samples_1 = y_samples.unsqueeze(0)    # shape [1,nsample,dim]
            ## gather representations in case of distributed training
            if torch.distributed.is_available() and torch.distributed.is_initialized():
                # [B * world_size, D]
                y_samples_1_dist = SyncFunction.apply(y_samples)
                # [1, B * world_size, D]
                y_samples_1 = y_samples_1_dist.unsqueeze(0)
            else:
                # [1, B, D]
                y_samples_1 = y_samples.unsqueeze(0)

            # log of conditional probability of negative sample pairs
            negative = - ((y_samples_1 - prediction_1)**2).mean(dim=1)/2./logvar.exp() 

            mi_upper = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()
        return mi_upper   

    def loglikeli(self, x_samples, y_samples): # unnormalized loglikelihood 
        mu, logvar = self.get_mu_logvar(x_samples)
        return (-(mu - y_samples)**2 /logvar.exp()-logvar).sum(dim=1).mean(dim=0)
    
    def learning_loss(self, x_samples, y_samples):
        return - self.loglikeli(x_samples, y_samples)


================================================
FILE: dataloader/dataloader.py
================================================
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader
import json
import random
import os
class AudioMotionDataset(Dataset):
    def __init__(self, text_file, wav_scp_file,description_file):
        self.transcriptions = {}
        with open(text_file, 'r') as f:
            for line in f:
                parts = line.strip().split(maxsplit=1)
                utt_id = parts[0]
                transcription = parts[1]
                self.transcriptions[utt_id] = transcription
        with open(description_file, 'r') as f:
            self.description = json.load(f)

        self.wav_paths = []
        path=os.path.dirname(os.path.abspath(__file__))
        path=os.path.dirname(path)
        with open(wav_scp_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                utt_id = parts[0]
                wav_path = os.path.join(path,parts[1])
                if utt_id in self.description:
                    self.wav_paths.append((utt_id, wav_path))

    def __getitem__(self, index):
        utt_id, wav_path = self.wav_paths[index]
        describs=self.description[utt_id]
        describ=describs
        transcription = self.transcriptions[utt_id]
        return wav_path, transcription,describ

    def __len__(self):
        return len(self.wav_paths)
import soundfile as sf


def collate_fn(batch):
    wav_paths, transcriptions,describ = zip(*batch)
    waveforms = []
    trans=[]
    describs=[]
    paths=[]
    for wav,tran,des in zip(wav_paths,transcriptions,describ):
        path=wav.split('/')[-1]
        paths.append(path)
        waveform, sample_rate = sf.read(wav)
        if sample_rate != 16000:
            waveform = torchaudio.transforms.Resample(sample_rate, 16000)(torch.tensor(waveform).unsqueeze(0).to(torch.float32)).squeeze(0).numpy()        #print(sample_rate)
        waveforms.append(waveform)
        trans.append(tran)
        describs.append(des)    
    return waveforms,trans,describs, paths
import time
if __name__ == '__main__':
    batch_size = 32
    time1=time.time()
    AM_Dataset = AudioMotionDataset("../dataset/text.txt","../dataset/wav.scp","../dataset/fid2captions.json")
    AM_Dataloader = DataLoader(AM_Dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    data=[]
    wavforms=[]
    trans=[]
    describs=[]
    for batch_idx, (waveforms,trans,describs,_) in enumerate(AM_Dataloader):
        print(batch_idx, trans,describs)
    print(time.time()-time1)
        
    
    


================================================
FILE: dataset/fid2captions.json
================================================
{
    "tx_emotion_00201000015": "悲伤逆流成河",
    "tx_emotion_00201000107": "伤心难过，声音颤抖，情绪激动失望",
    "tx_emotion_00201000148": "伤心难过，又无能为力",
    "tx_emotion_00201000209": "内心对他人感到抱歉，责怪自己",
    "tx_emotion_00201000220": "悲痛到无法自拔",
    "tx_emotion_00201000274": "伤心不已，悲伤凄凉",
    "tx_emotion_00201000323": "心里无比疼痛，伤心",
    "tx_emotion_00201000340": "心里委屈，烦恼，窝火",
    "tx_emotion_00201000467": "透漏出的是无法掩饰的悲伤",
    "tx_emotion_00202000047": "心情平静，不心急，不急躁。",
    "tx_emotion_00202000118": "发泄积在心中的委屈和气愤",
    "tx_emotion_00202000136": "说话很没有耐心，很浮躁的内心",
    "tx_emotion_00202000301": "怜惜又愤恨，泣不成声，难以控制情绪。",
    "tx_emotion_00202000395": "教育别人，充满坚定的语气，还带有愤怒",
    "tx_emotion_00202000425": "嫉妒，不满，生出怨恨情绪。",
    "tx_emotion_00202000429": "声音高亢、清越，情绪激昂、强烈",
    "tx_emotion_00202000436": "急切，激动，表示立场分明。",
    "tx_emotion_00203000019": "喜悦的心情，喜悦的言辞",
    "tx_emotion_00203000031": "因高兴或受某种刺激而精神激奋",
    "tx_emotion_00203000034": "心情快乐舒畅",
    "tx_emotion_00203000050": "某个节日的到来，倍感开心和欢喜",
    "tx_emotion_00203000065": "语调间掩饰不住的开心快乐的喜悦",
    "tx_emotion_00203000066": "言语间的喜悦和开心很明显",
    "tx_emotion_00203000069": "对什么感到很喜欢，心里很是欢喜",
    "tx_emotion_00203000072": "语气充斥着开心",
    "tx_emotion_00203000086": "心情喜悦无比，兴高采烈",
    "tx_emotion_00203000095": "想起过去的温馨场面，心里很是温暖，开心",
    "tx_emotion_00203000111": "兴致高，情绪热烈，兴致勃勃",
    "tx_emotion_00203000128": "言语中尽是喜悦欢喜之意",
    "tx_emotion_00203000273": "感到幸福和满意",
    "tx_emotion_00203000352": "感到幸福和满意",
    "tx_emotion_00203000357": "因高兴或受某种刺激而精神激奋",
    "tx_emotion_00203000364": "因高兴或受某种刺激而精神激奋",
    "tx_emotion_00203000372": "心里高兴，言语间表现出欢喜",
    "tx_emotion_00203000379": "因得到某个东西，非常开心激动",
    "tx_emotion_00203000399": "心情快乐舒畅",
    "tx_emotion_00203000412": "心花怒放、欢天喜地、欣喜若狂的样子",
    "tx_emotion_00203000425": "无法言喻的开心快乐",
    "tx_emotion_00203000448": "心里开心愉悦，还有一丝甜蜜",
    "tx_emotion_00203000470": "言语间的喜悦和开心很明显",
    "tx_emotion_00203000485": "心情十分愉快，畅快",
    "tx_emotion_00203000486": "因某事的如意而高兴无比",
    "tx_emotion_00204000391": "心里生气，不爽",
    "tx_emotion_00204000426": "心中醋意正浓，觉得有被抛弃之感",
    "tx_emotion_00204000458": "嫉妒，不满，生出怨恨情绪",
    "tx_emotion_00204000494": "心生不满，厌恶。",
    "tx_emotion_00205000062": "怒火重重，满是气愤",
    "tx_emotion_00205000069": "心里难受，不舒服",
    "tx_emotion_00205000105": "心里苦楚，内心悲伤，悲痛，委屈",
    "tx_emotion_00205000110": "心里苦楚，内心悲伤，悲痛，委屈",
    "tx_emotion_00205000119": "心里苦楚，内心悲伤，悲痛，委屈",
    "tx_emotion_00205000127": "情绪激动警惕为保护她而着急",
    "tx_emotion_00205000198": "怜惜又愤恨，泣不成声，难以控制情绪",
    "tx_emotion_00205000265": "傲气十足，对自己有信心",
    "tx_emotion_00206000061": "好奇心所致，保持疑惑，想要知道答案。",
    "tx_emotion_00206000149": "不解的问",
    "tx_emotion_00206000167": "意想不到，感到惊讶奇怪。",
    "tx_emotion_00206000186": "感到疑惑，不知该怎么办",
    "tx_emotion_00206000235": "充满疑问的语气，和不解的语调",
    "tx_emotion_00206000240": "好奇心所致，保持疑惑，想要知道答案。",
    "tx_emotion_00206000315": "百思不得其解",
    "tx_emotion_00206000337": "好奇心所致，保持疑惑，想要知道答案。",
    "tx_emotion_00206000380": "为他人担心害怕",
    "tx_emotion_00206000458": "内心愉悦，开心",
    "tx_emotion_00206000480": "充满疑问的语气，和不解的语调",
    "tx_emotion_00207000018": "抱着希望，结果却是深深的失望",
    "tx_emotion_00207000080": "悲哀痛苦，悲伤难过",
    "tx_emotion_00207000113": "悲哀痛苦，悲伤难过",
    "tx_emotion_00207000162": "内心内疚，泣不成声，情绪无法控制",
    "tx_emotion_00207000211": "抱着希望，结果却是深深的失望",
    "tx_emotion_00207000252": "心境十分悲伤",
    "tx_emotion_00208000087": "心里苦楚，内心悲伤，悲痛，委屈",
    "tx_emotion_00208000257": "愤愤的感慨",
    "tx_emotion_00208000303": "气愤像野火一样在心里烧",
    "tx_emotion_00208000327": "感到不满，很愤怒，打抱不平的样子",
    "tx_emotion_00208000390": "心里生气，恼怒且气愤",
    "tx_emotion_00209000005": "充满疑问，很好奇",
    "tx_emotion_00209000026": "心情愉悦，开心极了",
    "tx_emotion_00209000060": "情绪激动警惕为保护她而着急",
    "tx_emotion_00209000215": "悲哀痛苦，悲伤难过",
    "tx_emotion_00209000269": "非常愉快、高兴",
    "tx_emotion_00209000346": "目瞪口呆、桥舌不下、惊讶的样子",
    "tx_emotion_00210000049": "声音断断续续，口齿不清，难过得哭泣，充满怜悯之心",
    "tx_emotion_00210000124": "内心内疚，泣不成声，情绪无法控制",
    "tx_emotion_00210000249": "声音断断续续，口齿不清，难过得哭泣，充满怜悯之心",
    "tx_emotion_00210000272": "奔溃大哭，无法控制自己的情绪",
    "tx_emotion_00210000419": "情绪激动警惕为保护她而着急",
    "tx_emotion_00210000453": "不满，生出怨恨情绪",
    "tx_emotion_00210000476": "声音颤抖，充满悲伤，凄凉",
    "tx_emotion_00301000019": "不堪忍受，十分难过，情绪激动难以控制，声音忽上忽下",
    "tx_emotion_00301000036": "悲哀痛苦，悲伤难过。",
    "tx_emotion_00301000052": "悲哀痛苦，悲伤难过。",
    "tx_emotion_00301000185": "对往事的追忆中带着一丝甜蜜，还有伤感",
    "tx_emotion_00301000203": "极端的伤心，心里悲痛，心如刀割",
    "tx_emotion_00301000457": "伤心难过，声音颤抖，情绪激动失望",
    "tx_emotion_00301000461": "因不如意的事心里悲痛，伤心难过。",
    "tx_emotion_00301000494": "伤心难过，又无能为力",
    "tx_emotion_00302000016": "心生不满，厌恶。",
    "tx_emotion_00302000027": "心生不满，厌恶。",
    "tx_emotion_00302000054": "因不如意的事心里悲痛，伤心难过",
    "tx_emotion_00302000090": "情绪激动警惕为保护她而着急。",
    "tx_emotion_00302000107": "充满正气，情绪激动",
    "tx_emotion_00302000216": "心情平静，不急躁。",
    "tx_emotion_00302000224": "情绪失控，随时会爆发",
    "tx_emotion_00302000304": "急切，激动，表示立场分明",
    "tx_emotion_00302000356": "不堪忍受，十分难过，情绪激动难以控制，声音忽上忽下",
    "tx_emotion_00302000470": "事事埋怨的感觉",
    "tx_emotion_00303000023": "心情快乐舒畅",
    "tx_emotion_00303000046": "心情快乐舒畅",
    "tx_emotion_00303000052": "非常愉快、高兴",
    "tx_emotion_00303000056": "心情快乐舒畅",
    "tx_emotion_00303000080": "说话从容不迫",
    "tx_emotion_00303000082": "感到幸福和满意",
    "tx_emotion_00303000117": "愉快高兴，心情舒畅",
    "tx_emotion_00303000119": "形容内心十分开心，高兴",
    "tx_emotion_00303000127": "喜事临门，心情美好而快乐。",
    "tx_emotion_00303000129": "心情快乐舒畅",
    "tx_emotion_00303000130": "心情十分高兴，乐在其中。",
    "tx_emotion_00303000148": "听到开心的事，心情舒畅",
    "tx_emotion_00303000158": "心情快乐舒畅",
    "tx_emotion_00303000167": "听到开心的事，愉快高兴",
    "tx_emotion_00303000173": "愉快高兴，心情舒畅",
    "tx_emotion_00303000182": "丝毫不加节制地表露欢乐、热情和惊奇",
    "tx_emotion_00303000198": "心情快乐舒畅",
    "tx_emotion_00303000211": "心情快乐舒畅",
    "tx_emotion_00303000236": "心情快乐舒畅",
    "tx_emotion_00303000260": "心情快乐舒畅",
    "tx_emotion_00303000298": "听到开心的事，心情舒畅",
    "tx_emotion_00303000310": "出乎意料的高兴",
    "tx_emotion_00303000354": "心情快乐舒畅",
    "tx_emotion_00303000360": "愉快高兴，心情舒畅",
    "tx_emotion_00303000363": "心情快乐舒畅",
    "tx_emotion_00303000389": "心情快乐舒畅",
    "tx_emotion_00303000396": "感到满足时的高兴心情",
    "tx_emotion_00303000421": "听到开心的事，心情舒畅",
    "tx_emotion_00303000433": "形容内心十分开心，高兴",
    "tx_emotion_00303000457": "很兴奋心情很激动",
    "tx_emotion_00303000462": "愉快高兴，心情舒畅",
    "tx_emotion_00303000465": "很开心很愉悦",
    "tx_emotion_00303000474": "非常愉快、高兴",
    "tx_emotion_00304000049": "语气激动，语调愤怒，声音洪亮，怒不可遏",
    "tx_emotion_00304000079": "心里激愤",
    "tx_emotion_00304000169": "对事情的气愤",
    "tx_emotion_00304000235": "语气激动，语调忿忿不平",
    "tx_emotion_00304000459": "感觉怒火中烧，无法压抑",
    "tx_emotion_00305000005": "急切，激动，表示立场分明。",
    "tx_emotion_00305000009": "怒气很大，控制不住了",
    "tx_emotion_00305000014": "内心很愤怒，快要爆发出来了",
    "tx_emotion_00305000119": "悲伤难过",
    "tx_emotion_00305000164": "激动而愤怒，发怒，恼怒",
    "tx_emotion_00305000183": "心生不满，厌恶。",
    "tx_emotion_00305000280": "平缓的情绪，不骄不躁",
    "tx_emotion_00305000283": "对往事的追忆中带有心酸，还有一丝伤感。",
    "tx_emotion_00305000308": "心生不满，厌恶。",
    "tx_emotion_00305000383": "对人真诚的劝告或忠告，深刻有力。",
    "tx_emotion_00306000098": "心里疑惑不解，对此感到好奇",
    "tx_emotion_00306000101": "因对某事存在疑问而感到十分好奇",
    "tx_emotion_00306000202": "心中充满好奇",
    "tx_emotion_00306000261": "对新鲜事物有着强烈的好奇心",
    "tx_emotion_00306000405": "觉得自己与众不同，自以为是",
    "tx_emotion_00306000460": "心中疑惑不解，充满好奇",
    "tx_emotion_00309000124": "心里开心激动",
    "tx_emotion_00309000154": "平静叙事，无情绪",
    "tx_emotion_00309000160": "感到又惊又喜，不可思议",
    "tx_emotion_00309000167": "内心着急于想知道某事",
    "tx_emotion_00309000172": "没想到的事情",
    "tx_emotion_00309000183": "内心恍然大悟，突然明白",
    "tx_emotion_00309000269": "突如其来的事情",
    "tx_emotion_00309000324": "喜出望外，感受到了希望",
    "tx_emotion_00309000349": "语气惊喜，略带兴奋",
    "tx_emotion_00309000452": "恍然大悟，原来是他",
    "tx_emotion_00400000011": "平静的阐述，无情绪",
    "tx_emotion_00400000026": "深沉，不外露，耐人寻味",
    "tx_emotion_00400000160": "语气平和，语调平静",
    "tx_emotion_00400000196": "形容端正老实，合乎常理",
    "tx_emotion_00400000259": "语气平和，语调平静",
    "tx_emotion_00400000378": "深沉，不外露，耐人寻味",
    "tx_emotion_00401000087": "对往事的回忆，不禁发出感叹",
    "tx_emotion_00401000174": "让人听着很心酸，可悲的样子",
    "tx_emotion_00401000213": "伤心难过，很是烦闷",
    "tx_emotion_00401000239": "悲伤到无力，悲痛到窒息",
    "tx_emotion_00401000242": "让人觉得很凄凉，可悲的样子",
    "tx_emotion_00401000281": "悲痛到无法自拔",
    "tx_emotion_00401000297": "心里苦闷，忧伤不已",
    "tx_emotion_00401000327": "很低沉的语气，让人感到忧伤和失落",
    "tx_emotion_00401000339": "悲伤逆流成河",
    "tx_emotion_00401000440": "思绪万千，心中忧愁满满",
    "tx_emotion_00402000196": "从语气能感觉到很尊重很敬重",
    "tx_emotion_00402000283": "语气着急，气急败坏",
    "tx_emotion_00402000299": "急切，激动，大声表达自己的不满。",
    "tx_emotion_00402000480": "愤怒得咬牙切齿，恶狠狠的样子",
    "tx_emotion_00403000052": "心情快乐舒畅",
    "tx_emotion_00403000060": "心里甜的跟蜜一样",
    "tx_emotion_00403000077": "高兴的压抑不住心跳",
    "tx_emotion_00403000099": "感到幸福或满意",
    "tx_emotion_00403000131": "形容内心十分开心，高兴",
    "tx_emotion_00403000143": "无比欢乐和幸福",
    "tx_emotion_00403000145": "越想心里越快乐，越想心里越美",
    "tx_emotion_00403000147": "内心十分疑惑，不明白",
    "tx_emotion_00403000157": "发自内心的喜悦",
    "tx_emotion_00403000163": "心里高兴极了",
    "tx_emotion_00403000173": "感到满足时的高兴心情",
    "tx_emotion_00403000175": "内心感到心满意足",
    "tx_emotion_00403000197": "心里兴奋不已，感觉值得",
    "tx_emotion_00403000252": "感到满足时的高兴心情",
    "tx_emotion_00403000255": "内心抑制不住的喜悦",
    "tx_emotion_00403000359": "心里很是欢愉，开心",
    "tx_emotion_00403000393": "形容内心十分开心，高兴",
    "tx_emotion_00403000395": "情绪激动欢乐，很是开心",
    "tx_emotion_00403000432": "形容内心十分开心，高兴",
    "tx_emotion_00403000439": "非常愉快、高兴",
    "tx_emotion_00403000451": "心里甚是开心，喜悦",
    "tx_emotion_00403000482": "内心感到兴奋不已，十分刺激",
    "tx_emotion_00403000494": "感到心满意足，十分快乐",
    "tx_emotion_00405000239": "心里生气，恼怒且气愤",
    "tx_emotion_00405000329": "感觉自己怒火中烧",
    "tx_emotion_00409000007": "言语里满是惊讶之意",
    "tx_emotion_00409000081": "好奇心所致，保持疑惑，想要知道答案。",
    "tx_emotion_00409000119": "语气虽然有些怀疑，但是更多的是尊重",
    "tx_emotion_00409000228": "意外的感觉，感到震撼",
    "tx_emotion_00409000285": "很轻松的气氛，开着玩笑",
    "tx_emotion_00409000323": "带有佩服的言语",
    "tx_emotion_00409000325": "心情快乐舒畅",
    "tx_emotion_00409000441": "心里感到很是惊讶和惊喜",
    "tx_emotion_00409000462": "心里感到很是开心又很惊讶",
    "tx_emotion_00409000470": "气愤不已，情绪很是激愤",
    "tx_emulate_00_060_0001_000030": "心情畅快，语气坚定，有欢喜真挚的感觉",
    "tx_emulate_00_102_0004_000088": "声音高昂，情绪十分激动，内心十分不愿意相信",
    "tx_emulate_00_103_0001_000017": "心情十分兴奋，语速很快，心里十分期待",
    "tx_emulate_00_104_0001_000113": "声调上扬，声音尖锐洪亮，字正腔圆，情绪有些激动，质疑责骂的语气表达了心中的不满与质疑",
    "tx_emulate_00_108_0001_000008": "语调不高，疑问的口吻满是不解，带有一些迟疑，心中不理解",
    "tx_emulate_00_108_0001_000079": "声音微弱，语气中带有一丝慌张和心虚",
    "tx_emulate_00_108_0002_000070": "上来语调就上扬，愉悦的感情溢于言表，后面是认真的叮嘱，表达自己对当前的心满意足",
    "tx_emulate_00_108_0004_000011": "声音高拔，语调高，语气中带着疑惑，十分意外的感觉",
    "tx_emulate_00_108_0004_000039": "语气轻快，言语里透出内心的随意与无所谓的得意洋洋的心情。",
    "tx_emulate_00_109_0004_000042": "心情有点着急，语气表达出自己的关心，态度很诚恳，温和。",
    "tx_emulate_00_109_0004_000071": "语速极快，情绪中非常不爽，声音很尖锐，后面又流露放缓了态度",
    "tx_emulate_00_109_0005_000085": "语速较快，情绪波动起伏，语调高亢起伏，在指责不满之余还有质疑的意味",
    "tx_emulate_00_109_0005_000091": "语速快，语气急促，言语里透出内心的恼怒与不耐烦的情绪。",
    "tx_emulate_00_110_0002_000028": "声音很喜气洋洋的，欢快轻松的语调展现内心的欢愉",
    "tx_emulate_00_110_0002_000038": "语速很快，声音很高，情绪中充满释然。",
    "tx_emulate_00_110_0004_000059": "又惊又喜，语气中透着轻松快乐",
    "tx_emulate_00_110_0004_000099": "声音断断续续，语气中带着窘迫",
    "tx_emulate_00_111_0001_000023": "语气中充满了惊讶，心情分外的愉悦，强压制住心中的喜悦。",
    "tx_emulate_00_111_0001_000042": "声色十分的温柔，难以抑制躁动的心，开心高兴到了极点。",
    "tx_emulate_00_112_0001_000066": "心里的快乐全都表现在脸上，声调上扬",
    "tx_emulate_00_112_0002_000016": "语速快，声音中充满着急和担心",
    "tx_emulate_00_112_0002_000070": "声调上扬，语气轻松愉快，情绪很高",
    "tx_emulate_00_112_0005_000089": "语气中带有荣幸，开心极了",
    "tx_emulate_00_112_0006_000020": "祈求的口吻中带有些委屈",
    "tx_emulate_00_113_0002_000003": "语气十分做作，做作中带着些不满",
    "tx_emulate_00_114_0001_000001": "语速不疾不徐，语气十分笃定，很有说服力",
    "tx_emulate_00_114_0002_000030": "是揶揄的口吻，语气十分轻快",
    "tx_emulate_00_114_0003_000026": "语调宛转起伏，语速稍快，语气十分肯定",
    "tx_emulate_00_115_0002_000007": "精神抖擞，语气中充满得意和肯定",
    "tx_emulate_00_115_0005_000035": "语速较快，声音较高，情绪中带着一些焦急和抱怨。",
    "tx_emulate_00_115_0009_000024": "言语中透露出轻蔑，满眼的不屑而又格外的气愤，很是气不过。",
    "tx_emulate_00_115_0010_000012": "前半句语气坚决，带有不可置否之意，后半句则可见其淡然处之之姿。",
    "tx_emulate_00_117_0002_000013": "声音微扬，语调低缓，话语中带着无所谓，一点也不重视的感觉",
    "tx_emulate_00_117_0005_000059": "声音高扬，语气中带着焦急，非常无助，一筹莫展的感觉",
    "tx_emulate_00_118_0002_000020": "声音欢快活泼，情绪很饱满，带着欢乐的情感。",
    "tx_emulate_00_118_0002_000022": "情绪激动，声调下降，声音尖，内心气愤地无法抑制",
    "tx_emulate_00_118_0005_000032": "语气激动，声音中透露出不理解。",
    "tx_emulate_00_119_0001_000001": "语速比较快，声音中透露着一种轻快的感觉，很兴奋",
    "tx_emulate_00_119_0001_000034": "声音高挑，语速微快，话语里带着责备，心里非常着急，很是凌乱，没有办法的感觉",
    "tx_emulate_00_119_0001_000049": "情绪很高涨，语气十分轻松，让人听了觉得想笑",
    "tx_emulate_00_120_0002_000071": "语调急促且高扬，语气紧张，流露害怕恐惧之情。",
    "tx_emulate_00_121_0002_000009": "声音带着欢快，心里比较开心，满目含笑。",
    "tx_emulate_00_122_0001_000025": "情绪激扬，心里带着喜悦之情，由内而发的开心。",
    "tx_emulate_00_123_0001_000048": "语气很轻松，表现的镇定自若，很坦然的样子。",
    "tx_emulate_00_123_0002_000003": "语速较缓，声音轻柔，突出内心的欢喜自豪",
    "tx_emulate_00_123_0009_000012": "语气柔和，态度温和，心情放松很愉快。",
    "tx_emulate_00_125_0001_000067": "声音逐渐高扬，语调渐高，话语中带着惊恐和害怕，内心非常的紧张的情绪",
    "tx_emulate_00_125_0002_000049": "带开玩笑的心态说话，情绪有不屑，声音坚定自信",
    "tx_emulate_00_125_0002_000058": "语调十分欢快，整个人神采飞扬的，声音中满是得意，很是满意",
    "tx_emulate_00_125_0008_000017": "声音不急不快，内心带着感叹和抒发的心情。",
    "tx_emulate_00_126_0001_000039": "语调很低，但是语速稍快，有些畏缩的语气透露出内心的恐惧和担心",
    "tx_emulate_00_127_0001_000084": "话语中带着一种收到意外之喜后的喜悦",
    "tx_emulate_00_128_0006_000033": "言语间充满嫉妒和羡慕的语气 艳羡不已",
    "tx_emulate_00_129_0004_000056": "先是语调很急切的表示自己的认真，后面轻快的声音显示了满心的愉悦",
    "tx_emulate_00_129_0004_000057": "声音轻盈干练，语气中带着骄傲，豪放的情感。",
    "tx_emulate_00_129_0005_000029": "心情急切，着急迫切，语速特别快，焦急烦躁，心神不定",
    "tx_emulate_00_130_0001_000057": "语气不确定，有疑问和困惑，摇摆不定",
    "tx_emulate_00_130_0003_000017": "语气中透露欢乐，带有些些调侃",
    "tx_emulate_00_131_0005_000061": "语气很重，语速很快，情绪十分不耐烦。",
    "tx_emulate_00_132_0004_000026": "语气中充满笃定和喜悦，心里十分开心、有把握",
    "tx_emulate_00_132_0005_000016": "语气中带有急切、惶恐、不知所措",
    "tx_emulate_00_133_0005_000062": "语气很温柔，声音很细，透露着一种讽刺不满的感觉",
    "tx_emulate_00_135_0002_000003": "声音清脆，带着笑声，心里的快乐展现在声音中",
    "tx_emulate_00_135_0004_000015": "语速快，声音轻快明亮，包含激动和急切、担忧",
    "tx_emulate_00_137_0002_000079": "情绪中充满了无比的自豪和欢乐，语气欢快，声音十分轻快",
    "tx_emulate_00_137_0003_000022": "语速缓慢，音调较高，语气轻松愉快，内心很开心，声音有些调皮",
    "tx_emulate_00_137_0003_000082": "说话时停顿许久，语速很慢，语调微微上扬，心里感到十分疑惑",
    "tx_emulate_00_137_0005_000092": "先是感叹一声，后面语调轻快地重复言辞，感到十分高兴",
    "tx_emulate_00_144_0005_000014": "语气十分不爽，略有些愤懑",
    "tx_emulate_00_144_0007_000056": "语气很轻快，心情很舒畅，带着笑意。",
    "tx_emulate_00_150_0005_000005": "语气轻快十分满足，开心极了",
    "tx_emulate_00_150_0005_000006": "心情愉悦，语气很放松，欢快的情绪。",
    "tx_emulate_00_151_0001_000018": "语气十分感激，还略有些开心",
    "tx_emulate_00_151_0007_000028": "语气较为轻快，心情舒畅，愉悦的情绪。",
    "tx_emulate_00_153_0001_000033": "心里止不住地难过，语气中带有淡淡的失落",
    "tx_emulate_00_157_0001_000031": "声音非常高昂激动，显示内心骄傲自得和心满意足",
    "tx_emulate_00_158_0002_000007": "语气中充满气愤、不满，表现内心的愤怒",
    "tx_emulate_00_158_0002_000049": "声音强烈，带有浓烈的哀怨情绪",
    "tx_emulate_00_158_0003_000068": "声音高昂，语速较快，情绪中充满愤怒和不满。",
    "tx_emulate_00_159_0001_000006": "非常担心，语气中满是担忧、关怀",
    "tx_emulate_00_162_0002_000075": "说话先是带有结巴，后来非常的急躁恼火致使自己的说话语速加快",
    "tx_emulate_00_162_0005_000004": "语速较快，声音较高，情绪中带着一些焦急和质问。",
    "tx_emulate_00_171_0006_000036": "一开始说话语调不高，声音很低，后来语速却较快显示出心有不甘，但是又毫无办法",
    "tx_emulate_00_179_0005_000021": "声音高挑，语速轻快，语气中带着心满意足的高兴。",
    "tx_emulate_00_185_0002_000007": "声音轻快，高兴而喜悦，内心各种的顺畅与如意。",
    "tx_emulate_00_186_0001_000023": "声调较高，语速较快，语气中带着愉悦，表明说话者欣喜愉悦的心情",
    "tx_emulate_01_001_0018_000065": "声音很坚定，着急更甚，心绪火烧火燎的。",
    "tx_emulate_01_001_0019_000077": "语气充满了惊讶，好像难以想象居然会这样。",
    "tx_emulate_01_002_0005_000008": "言辞中透露出浓浓的悲辛和顾忌之意，亦可闻其豪情和激昂之音。",
    "tx_emulate_01_002_0007_000012": "声音用力，语气上扬，非常生气的情感。",
    "tx_emulate_01_002_0017_000057": "在感叹中，内心的赞赏喜爱之意无法掩盖，亦带有丝丝惋惜和怜爱之感。",
    "tx_emulate_01_003_0003_000018": "非常高兴，话语中透露着喜欢和愉悦",
    "tx_emulate_01_004_0006_000023": "言语中充满了喜悦，情绪十分激动，心情很是舒畅。",
    "tx_emulate_01_005_0004_000013": "语气中透露着激动、欣赏和高兴，声音高扬",
    "tx_emulate_01_005_0005_000022": "语气不坚定，声音重心在前，情绪略有起伏，",
    "tx_emulate_01_006_0020_000094": "心中非常高兴。语速较快的说出来。",
    "tx_emulate_01_007_0006_000025": "声音拔高，语速较快，语气中带着讨厌，非常不喜的情绪",
    "tx_emulate_01_007_0008_000034": "声音高昂，语气渐重，话语中带着中肯，说话非常有分量的情绪",
    "tx_emulate_01_007_0021_000120": "十分认真的语气，特别的坦然，语重心长的。",
    "tx_emulate_01_007_0024_000091": "声音高亢尖利，语调拔高，语气里带着凶狠和恼怒的情绪",
    "tx_emulate_01_007_0025_000099": "声音高挑，语速微快，话语里带着坚决和果敢的情绪",
    "tx_emulate_01_008_0011_000050": "声音高扬，话语中透着哀伤和心疼的情绪",
    "tx_emulate_01_008_0017_000077": "声音坚定，语气中透露出执着的情感。",
    "tx_emulate_01_009_0003_000026": "声音低婉，语速微扬，话语里带着羞怯，十分可爱的感觉",
    "tx_emulate_01_010_0012_000085": "声音严厉，态度严肃认真，表现不可侵犯的气势",
    "tx_emulate_01_011_0005_000017": "声音较快，声音较高，内心感到焦急和慌张的情绪。",
    "tx_emulate_01_011_0009_000047": "前重后轻的语调，有着不屑的语气",
    "tx_emulate_01_011_0010_000050": "声音平缓，语速适中，情绪中带着幸灾乐祸感觉。",
    "tx_emulate_01_011_0011_000059": "理直气壮的语气带着些微着急",
    "tx_emulate_01_011_0013_000073": "升起的语调表明了纳闷的心情",
    "tx_emulate_01_012_0004_000021": "说话的语速非常的烦闷声音中带着一些气愤与不满。",
    "tx_emulate_01_012_0023_000084": "语气低沉，情绪低落，内心感到灰心和失望。",
    "tx_emulate_01_012_0025_000092": "声音甜美温柔，内心带有喜笑颜开的兴奋情绪。",
    "tx_emulate_01_013_0002_000004": "声音微扬，话语中带着关切，十分急切的感觉",
    "tx_emulate_01_013_0007_000025": "语速较快，内心不耐烦，带有抱怨不满的情绪。",
    "tx_emulate_01_013_0007_000032": "声音微扬，语调渐高，话语里带着安抚，心底十分善良的",
    "tx_emulate_01_014_0004_000016": "声音高拔尖锐，语调拔高，话语中带着愤怒和心疼的感觉",
    "tx_emulate_01_014_0005_000035": "语速快，声音激烈，表现心里的担忧、急切",
    "tx_emulate_01_014_0011_000061": "语气轻快，内心放松，轻松，得意洋洋的高兴情感。",
    "tx_emulate_01_014_0016_000086": "语速很慢，声音低沉，内心充满感激和感动的情绪。",
    "tx_emulate_01_016_0003_000006": "声音高，语速快，话语中带着种种的质问，表示迷惑不解。",
    "tx_emulate_01_016_0007_000035": "语气中带着关心安慰的情感，语速正常，声音微微低沉。",
    "tx_emulate_01_016_0008_000037": "语气先是急切的，然后转变为迟疑和结结巴巴。",
    "tx_emulate_01_016_0016_000067": "声音微微的脆弱，话语中带着坚定与自信的情感。",
    "tx_emulate_01_016_0023_000150": "语气有些慌乱，声音刻意压低，态度严肃",
    "tx_emulate_01_018_0003_000014": "语调较高，情绪很欢快，语气充满了开心。",
    "tx_emulate_01_020_0001_000004": "赞美的语气，充满着满意的心情，夹杂着一点感叹",
    "tx_emulate_01_020_0007_000012": "句子中带有一种激动的心情，还有一点愉快的语气，还有一点喜悦",
    "tx_emulate_01_020_0017_000067": "情绪非常激动，说话声带着哭腔，久久不能平静",
    "tx_emulate_01_021_0003_000015": "语气低沉且柔和，心里满是疑惑，一头雾水而纳闷不解。",
    "tx_emulate_01_022_0009_000026": "声色极其的凄楚，声音沉闷，心里很不满且气愤。",
    "tx_emulate_01_022_0022_000108": "语气较重，声音高昂，情绪非常生气。",
    "tx_emulate_01_022_0022_000111": "语速很快，特别着急的感觉，还有些烦躁，大发雷霆，恨铁不成钢的语气。",
    "tx_emulate_01_023_0006_000042": "语气很重，语速缓慢，情绪中带着不悦和不高兴。",
    "tx_emulate_01_023_0010_000063": "语速很快语气很重，对别人感到不耐烦。",
    "tx_emulate_01_023_0010_000064": "声音较低，语速较快，情绪中充满担忧。",
    "tx_emulate_01_023_0014_000081": "情绪很烦乱的样子，语速较缓慢。",
    "tx_emulate_01_024_0009_000083": "语气高昂，情绪中带着抱怨与责怪。",
    "tx_emulate_01_024_0010_000114": "语速较快，内心十分的激动和坚定的激昂的情感。",
    "tx_emulate_01_024_0012_000117": "语速过快，表达出内心心急如焚的情绪。",
    "tx_emulate_01_024_0013_000139": "声音高昂，语气很重，情绪中带着愤恨和气怒。",
    "tx_emulate_01_026_0005_000031": "语速较快，内心感到不满意，语气里充满嫌弃和不高兴。",
    "tx_emulate_01_028_0008_000035": "语速较慢，语调较高，语气较重，凸显了说话者情绪的极度不悦，内心不理解",
    "tx_emulate_01_028_0009_000046": "语速较慢，语调较低，语气中带着慨叹，凸显了说话者情绪的惆怅",
    "tx_emulate_01_028_0009_000054": "声音隐约带着哭腔，语气中充满了委屈，声音娇媚",
    "tx_emulate_01_029_0013_000037": "语速较快，语调较高，语气中充满怒气，凸显了说话者内心的愤怒，情绪激动",
    "tx_emulate_01_030_0014_000035": "声音微扬，语中含笑，因为舒服而心情都非常的开怀的感觉",
    "tx_emulate_01_031_0009_000032": "语速较快，语调较高，语气十分自豪，表明说话者内心志得意满",
    "tx_emulate_01_031_0011_000045": "语速较慢，语调较高，语气中充满亢奋，凸显了说话者内心的激动，情绪高涨",
    "tx_emulate_01_031_0015_000061": "声音沉稳，语速微缓，话语中带着谦卑，说话做事极度小心的感觉",
    "tx_emulate_01_031_0020_000073": "语速较快，语调较高，语气充满了呵斥，说明说话者情绪的气愤，内心愤怒",
    "tx_emulate_01_032_0011_000017": "语速较快，语调较高，语气带着开心的感觉，表明说话者情绪十分高涨，兴致勃勃",
    "tx_emulate_01_032_0011_000020": "语速较快，语调较高，语气中充满了鼓舞，表明说话者内心十分兴奋，情绪高涨",
    "tx_emulate_01_032_0018_000042": "声音高扬，语调顿挫，语气里带着撒娇，十分生气的，啥话都听不进去的感觉",
    "tx_emulate_01_033_0004_000039": "语速稍慢，情绪有一丝尴尬，语气有些遮遮掩掩的感觉",
    "tx_emulate_01_033_0006_000075": "每个字都掷地有声，表达不容置喙之情。",
    "tx_emulate_01_034_0003_000030": "语调持续高涨，语气轻松，流露出兴高采烈的心情。",
    "tx_emulate_01_034_0005_000040": "语调冷淡急促，内心充满不悦与好奇，带有些许的醋意",
    "tx_emulate_01_190_0001_000041": "语调欢快上扬，内心高兴极了。",
    "tx_emulate_01_190_0001_000042": "语调欢快得意，心情舒畅，精神愉快。",
    "tx_emulate_01_191_0005_000043": "语调由低变激昂，先是很无奈，后来发出了急躁的抱怨，心里非常不满",
    "tx_emulate_01_192_0003_000046": "声音尖锐，语调高昂，非常强烈地表达了心中的不爽",
    "tx_emulate_01_196_0001_000024": "语气非常的虚弱，像是刚刚哭过，很伤心难过",
    "tx_emulate_01_196_0003_000051": "语气中充满了无奈，感觉实在是不知道怎么办",
    "tx_emulate_01_196_0004_000027": "语气中充满了震惊，表示什么不愿意相信的感觉。",
    "tx_emulate_01_199_0001_000022": "情绪有些激动，说话的语气也很不好，语速很快。",
    "tx_emulate_01_200_0002_000001": "语调冷淡急促，内心充满不满与懊恼，带有些许的嘲讽",
    "tx_emulate_01_202_0001_000005": "语调轻柔急促，内心充满好奇与焦躁",
    "tx_emulate_01_202_0001_000036": "语调高亢急促，铿锵有力，内心充满愤怒和嘲讽，带有些许的惊讶",
    "tx_emulate_01_203_0001_000002": "语气不爽，言语里满是不快，非常的生气和恼怒",
    "tx_emulate_01_203_0008_000028": "语调低沉急促，抑扬顿挫，内心充满鄙视和看不惯，带有嘲讽和傲慢之意",
    "tx_emulate_01_205_0002_000018": "语调低沉有力，语气冷淡，内心充满烦躁和不悦",
    "tx_emulate_01_206_0004_000007": "语调高亢急促，铿锵有力，内心充满焦躁与愤怒，带有些许的委屈和斥责",
    "tx_emulate_01_206_0005_000031": "语调冷淡舒缓，情绪颓靡，内心感到十分不在意和不关心",
    "tx_emulate_01_209_0001_000021": "语气中带着不敢相信，但是又保持着怀疑的态度",
    "tx_emulate_01_210_0009_000010": "语速快，语气强硬，铿锵有力的语气透出内心的镇定与平静的情绪。",
    "tx_emulate_01_216_0004_000039": "语调高扬，声音铿锵有力，表现出心中的兴奋",
    "tx_emulate_01_219_0002_000027": "语气较重，情绪中带着一些愤怒和抱怨",
    "tx_emulate_02_001_0015_000048": "语气激昂，表达内心不爽，不快的感受。",
    "tx_emulate_02_002_0016_000058": "语调上扬，语速加快，言语狂妄，居功自傲",
    "tx_emulate_02_002_0018_000067": "语速较快，语调较高，语气中带着质疑，凸显了说话者内心的不满和不悦",
    "tx_emulate_02_003_0023_000078": "语速较缓，语气加强，表现出心中的埋怨和责怪之情",
    "tx_emulate_02_004_0004_000018": "声音有力，语调高扬，言语充满了担当和气势",
    "tx_emulate_02_005_0005_000018": "语调上扬，情绪激愤，流露出心中的气愤",
    "tx_emulate_02_006_0002_000017": "语速较快，言语夸耀感叹，表现出心中敬畏",
    "tx_emulate_02_006_0009_000053": "声音加高，语气愤愤，言语间充满了振奋",
    "tx_emulate_02_006_0009_000056": "语气嚣张，语调上扬，情绪暴躁，表现出怪罪情感",
    "tx_emulate_02_006_0019_000078": "情绪低落，语气不悦，言语间流露出心中的责怪之情",
    "tx_emulate_02_006_0019_000090": "情绪低落，语气哀伤，言语间充满了渴求和希望",
    "tx_emulate_02_006_0020_000097": "语气哀伤，情绪低迷，表现出心中的痛苦",
    "tx_emulate_02_007_0006_000025": "语气不悦，心中充满不满和责怪",
    "tx_emulate_02_007_0010_000050": "语气沉闷，情绪非常大，带有埋怨和责怪",
    "tx_emulate_02_007_0024_000112": "语气加强，声音充满了不耐烦，表现出了怨恨",
    "tx_emulate_02_008_0011_000058": "心里面满是自责，言语中充满着不愉快，非常的伤心",
    "tx_emulate_02_008_0020_000108": "声音淡淡，语速较缓，言语间充满了诱惑",
    "tx_emulate_02_039_0002_000034": "有些着急的感觉，情绪十分严肃认真。",
    "tx_emulate_02_039_0003_000072": "声音高扬，语调渐重，话语中带着怒气，心情极度的不满的感觉",
    "tx_emulate_02_041_0002_000076": "声音高亢，语速很快，声音激昂，情绪十分的激动",
    "tx_emulate_02_042_0001_000128": "声音微扬，语速缓慢，情绪中带着一些担心，十分不安的情绪",
    "tx_emulate_02_042_0002_000072": "声音低婉，语调微缓，因事情不如意而情绪低落，精神不振",
    "tx_emulate_02_042_0002_000101": "语气激烈，情绪非常激动，有愤怒的情绪。",
    "tx_emulate_02_042_0003_000154": "声音高扬，语调和缓，语气中带着轻松，心情十分舒畅的感觉，很有信心",
    "tx_emulate_02_042_0004_000065": "声音微挑，语调低缓，话语中带着焦急，心情极度焦躁，非常忧心的感觉",
    "tx_emulate_02_043_0003_000020": "声音高挑，语调渐高，语气中带着狂傲，清高的感觉",
    "tx_emulate_02_044_0001_000032": "激动振奋，带着期待，开心的语气",
    "tx_emulate_02_044_0010_000061": "心情激动，情绪冲动， 笑逐言开，非常感谢",
    "tx_emulate_02_045_0001_000001": "语调下降，语气很坚定温柔，透露着一种真诚的感觉",
    "tx_emulate_02_045_0002_000003": "心情急切，语速很快，感觉焦躁为难",
    "tx_emulate_02_045_0015_000057": "音调很高，声音起伏比较大，声音中透露着一种内心的愤怒",
    "tx_emulate_02_046_0004_000012": "说话语速很快，声音高昂，透露着内心的激动兴奋",
    "tx_emulate_02_046_0022_000084": "在语气中可以感受到一种责备的心情，还有一点埋怨和嗔怪",
    "tx_emulate_02_051_0001_000002": "音调很高，语速很快，内心有点不满的情绪。",
    "tx_emulate_02_052_0011_000081": "语调上扬，情绪激愤，语气愤愤，凸显出心中的不甘和怨恨",
    "tx_emulate_02_052_0016_000106": "情绪激愤，语调上扬，表现出心中的不理解和责怪之情",
    "tx_emulate_02_055_0004_000004": "有些指责的语气，特别的愤怒，怒不可遏，而且十分的失望。",
    "tx_emulate_02_055_0015_000044": "升起的语调中满满的期盼",
    "tx_emulate_02_055_0033_000122": "音调升高，心事重重的情感，带有不高兴的情绪。",
    "tx_emulate_02_056_0005_000027": "前高后低的语调，中间语气逐渐变弱，有些不屑的语气",
    "tx_emulate_02_057_0010_000047": "语速很快，声音激昂，情绪高亢激烈且急切。",
    "tx_emulate_02_057_0034_000160": "声音很重，语气十分愤慨，情绪激动而带有怒气",
    "tx_emulate_02_222_0001_000017": "说话尾音上扬，语气里透露出了浓浓的不悦和不解。",
    "tx_emulate_02_227_0002_000019": "声音重，语带愤怒，有点儿愤然不平的样子。",
    "tx_emulate_02_229_0001_000044": "语调高扬，可谓针锋相对，其内心的愤慨不满和傲然无畏之情显露无遗。",
    "tx_emulate_02_231_0008_000034": "语调高扬，反对的语气，突出内心的不满和愤怒",
    "tx_emulate_02_233_0001_000034": "语气淡淡，情绪不高，心中充满了鄙夷看不起",
    "tx_emulate_02_238_0002_000034": "声音高扬，气势汹汹，语气中带有抱怨，非常不满。",
    "tx_emulate_02_239_0002_000056": "语气温柔耐心，透露着一种劝说的感觉",
    "tx_emulate_02_242_0002_000069": "声音逐渐拔高，语气中十分不满，情绪激动的感觉",
    "tx_emulate_02_242_0002_000072": "声音逐渐高昂，话语中感动情绪非常激动很生气的感觉",
    "tx_emulate_02_245_0001_000034": "声音高亢尖利，语气里带着不悦，情绪激动，非常恼怒的情绪",
    "tx_emulate_02_245_0002_000019": "声音高昂，语速超快，话语中带着咄咄逼人的质问情感，内心特别的气愤。",
    "tx_emulate_02_245_0002_000035": "声音高挑，话语中带着失望，很烦躁，有点生气的情绪",
    "tx_emulate_02_254_0005_000056": "语调冷淡急促，内心充满鄙视，带有些许疑惑",
    "tx_emulate_02_254_0005_000110": "开始时语调波折起伏，带有强烈的无奈，后来加重音调，不爽的情绪流露出来",
    "tx_emulate_02_255_0001_000027": "语调低沉急促到高亢，内心强烈的不满和反对",
    "tx_emulate_02_255_0003_000001": "语调欢快活泼，抑扬顿挫，内心充满惊讶和好奇",
    "tx_emulate_02_257_0001_000007": "语调高亢有力，内心充满自信和坚定，带有些许不耐烦",
    "tx_emulate_02_257_0001_000008": "声音中透露着一种不开心的感觉，语气中透露着不耐烦",
    "tx_emulate_02_257_0002_000038": "语气高亢，带着责骂的声音，怒气和怒火十分明显。",
    "tx_emulate_02_258_0002_000011": "语调急促且高扬，饱含惊讶之情。",
    "tx_emulate_02_259_0003_000006": "语调高昂且急促，自信且含不屑的情绪。",
    "tx_emulate_02_260_0001_000024": "说话快，语调低，抱怨重。",
    "tx_emulate_02_260_0001_000039": "语气中透露着轻视、不屑，心里很看不起",
    "tx_emulate_02_260_0002_000105": "情绪低落，声音拉长，语气中含有一丝无奈、难过",
    "tx_emulate_02_261_0005_000089": "语气充满不耐烦，嫌弃，很讨厌的样子。",
    "tx_emulate_02_261_0005_000107": "语气中充满恼怒，非常的不满，极力指责。",
    "tx_emulate_02_263_0002_000014": "推拒的语气，语气欢乐，突出心情的舒畅开心",
    "tx_emulate_02_265_0001_000066": "语调高扬，情绪激愤，内心很是气愤",
    "tx_emulate_02_265_0002_000026": "语调较高，语速很快地指责，质问的语气中流露出十足的怨气",
    "tx_emulate_02_265_0002_000087": "语气生硬，带有不开心，不愉快的情绪。",
    "tx_emulate_02_265_0006_000056": "语气中带有鄙视，非常的痛恨，带有愤怒。",
    "tx_xiao_0100101000076": "有强烈的自嘲感，对自己的行为等十分不屑却又感到悲伤与无奈",
    "tx_xiao_0100101000302": "低沉的语调透着无限思念的情绪。",
    "tx_xiao_0100101000330": "平缓的语调透出无尽的悲伤，难过与痛苦，无以言表。",
    "tx_xiao_0100101000408": "心情极度悲伤，悲痛欲绝，连呼吸都痛，撕心裂肺。",
    "tx_xiao_0100101000439": "声音较小，语速较慢。充满了难过无奈的语气，喃喃自语的感觉",
    "tx_xiao_0100103000017": "心情满足，语气中充满开心的情感，十分愉悦",
    "tx_xiao_0100103000032": "语气开心，有点儿激动，十分开心和快乐",
    "tx_xiao_0100103000066": "带有一种愉快和高兴的心情，还有一种满足",
    "tx_xiao_0100103000069": "声音亲切，语调平稳，流露出喜爱和宠溺，心情好",
    "tx_xiao_0100103000115": "语气中带有一点愉快，十分开心",
    "tx_xiao_0100103000159": "声音欢喜，语气中带有开心",
    "tx_xiao_0100103000171": "话语带有一种欣喜的心情，可以看出当时的兴奋喜悦心情",
    "tx_xiao_0100103000202": "声音带有一种快乐和幸福",
    "tx_xiao_0100103000268": "语气中充满激动的心情，夹杂着一点惊喜和喜悦",
    "tx_xiao_0100103000272": "声音欢喜，语气中有喜悦和愉快",
    "tx_xiao_0100103000306": "语调急促且高扬，饱含喜气洋洋之情。",
    "tx_xiao_0100103000326": "十分开心的样子，语调上扬，说话间透露出满满的兴奋",
    "tx_xiao_0100103000327": "语调上扬，态度诚恳，表达出真诚、朴实的祝福",
    "tx_xiao_0100103000336": "因一件事心情大好，语调上扬，仿佛能看到脸上洋溢着幸福的笑容",
    "tx_xiao_0100103000344": "语调急促且高扬，饱含欣喜得意之情。",
    "tx_xiao_0100103000354": "轻松快活的语气，说话欢快，体现了乐观的心态",
    "tx_xiao_0100103000364": "轻松愉快的语气表达了内心的激动和期盼",
    "tx_xiao_0100103000368": "表达了激动开心的心情，让人听了都觉得心情舒畅",
    "tx_xiao_0100103000420": "语调上扬，一字一句的透露出吃惊与欢快的心情",
    "tx_xiao_0100103000425": "语气轻快，语速稍快，表现出期待已久的喜悦心情",
    "tx_xiao_0100103000429": "声音平和，语速适中，语调高，表示很开心",
    "tx_xiao_0100103000459": "声音激昂，郑重肯定的表达开心的情感",
    "tx_xiao_0100103000480": "语气欢快，情绪表现出喜悦和祝福。",
    "tx_xiao_0100104000019": "很讽刺、鄙夷的语气，声音里满是指责与不快，语调提升",
    "tx_xiao_0100104000064": "语调高昂且急促，表达生气不满之情。",
    "tx_xiao_0100104000111": "音调逐渐升高，满腔怒火无处发泄，情绪中透露满满的失望和愤怒",
    "tx_xiao_0100104000141": "声音字正腔圆，语速快音调高，难以遏制心中的怒火",
    "tx_xiao_0100104000143": "声音撕心裂肺地吼，音调高语速快，难以遏制心中的怒火，满满的愤怒",
    "tx_xiao_0100104000190": "非常明显的质问表现出情绪的气愤激昂。",
    "tx_xiao_0100104000205": "语调上扬，声音略带嘶哑，表达愤怒和质问的情绪",
    "tx_xiao_0100104000295": "语气坚定，语调很高，透露着生气的情绪",
    "tx_xiao_0100104000354": "语气肯定不容置疑，略带有生气",
    "tx_xiao_0100104000428": "语气中充满不耐烦的意味，内心满是愤怒和不满",
    "tx_xiao_0100105000112": "声音低沉，语速稍微有点慢，音调沉稳，表示难过",
    "tx_xiao_0100105000158": "语速快，声音沉稳，语气发怒的迹象，表达人生气",
    "tx_xiao_0100105000254": "语调上扬，得意的情绪里还带一点怒气。",
    "tx_xiao_0100105000277": "语气很是冷酷，还带着点威胁的感觉，情绪显得比较冷漠。",
    "tx_xiao_0100105000307": "语气中透露出自以为是，得意的情感。",
    "tx_xiao_0100105000327": "大声的责骂，声音大，语速快，给人一种怒火中烧的感觉",
    "tx_xiao_0100105000369": "声音调侃，语气中透露出兴奋与高兴。",
    "tx_xiao_0100105000468": "语调逐渐变高，语气从质问逐渐变得有些激动",
    "tx_xiao_0100106000020": "声调渐高，处于一种疑问但稍带相信的状态。",
    "tx_xiao_0100106000061": "语气轻松，语速较快，内心欢快舒畅。",
    "tx_xiao_0100106000077": "语气戏谑轻松，内心开心且欢快。",
    "tx_xiao_0100106000099": "语气笃定，体现出内心充满诚意。",
    "tx_xiao_0100106000103": "语气上扬，语速适中，声音缓慢，充满疑问",
    "tx_xiao_0100106000111": "语气上扬，语速适中，声音缓慢，感觉说话人十分迷惑不解",
    "tx_xiao_0100106000116": "实在不能理解为什么会这样 十分困惑",
    "tx_xiao_0100106000182": "语气激扬，语速适中，声音缓慢，充满疑惑和诧异",
    "tx_xiao_0100106000278": "语气轻快，情绪里包含着好奇和疑惑。",
    "tx_xiao_0100106000282": "语气激扬，语速适中，声音微弱，充满疑惑和 诧异",
    "tx_xiao_0100106000316": "声音给人一种嗔怪的感觉，同时充满了好奇",
    "tx_xiao_0100106000317": "询问，声音给人一种充满好奇和不解的感觉",
    "tx_xiao_0100106000332": "声音轻柔，语气里透露出猜疑。",
    "tx_xiao_0100107000103": "声音里带有抽泣，情绪里流露出伤心。",
    "tx_xiao_0100107000310": "语速很慢、哽咽，很是痛苦",
    "tx_xiao_0100107000341": "语气较为平缓，担忧中带着难过",
    "tx_xiao_0100107000369": "语气肯定没有怀疑，十分信赖",
    "tx_xiao_0100107000421": "神情低落，带着哀伤痛苦，语音低沉",
    "tx_xiao_0100107000431": "态度痛苦悲伤，郁闷叹息，声音低沉",
    "tx_xiao_0100107000449": "语气较为缓和，声音略有凄凉之感，情绪中满是无法纾解的冤枉。",
    "tx_xiao_0200101000712": "语气失落，声音里透露出自责与懊悔。",
    "tx_xiao_0200101000731": "声音里透露出自责与懊悔。",
    "tx_xiao_0200101000952": "语调平稳，语气中夹杂着一丝忧伤",
    "tx_xiao_0200103000008": "语调渐高，无比惊讶与欣喜。",
    "tx_xiao_0200103000048": "语气激动，感到十分满足和欣喜。",
    "tx_xiao_0200103000053": "语气十分轻松愉快，有安慰之意，情绪开阔疏朗",
    "tx_xiao_0200103000080": "语调高，语气欣喜欢快，情绪十分快活",
    "tx_xiao_0200103000095": "语调高，语气激动，情绪积极向往，十分期盼",
    "tx_xiao_0200103000102": "又惊又喜，语气中充满喜悦和惊讶",
    "tx_xiao_0200103000124": "因为事情最后得到好的结果而高兴，语气中带有欢欣庆幸之情",
    "tx_xiao_0200103000128": "语调高且语速快，表达内心激动开心之情。",
    "tx_xiao_0200103000135": "语调高扬，语速变快，流露兴高采烈之情。",
    "tx_xiao_0200103000148": "语调急促且高扬，饱含欣喜得意之情。",
    "tx_xiao_0200103000153": "语速较快，声音较为高涨充满了开心愉快的情绪",
    "tx_xiao_0200103000162": "语气轻快，情绪流露出喜悦和开心。",
    "tx_xiao_0200103000183": "语气里透着内心的赞叹，感到舒适与满足。",
    "tx_xiao_0200103000184": "语气轻快，语调上扬，情绪表现出兴奋和愉悦。",
    "tx_xiao_0200103000187": "语气里带着感叹，情绪表露出欣慰和开心。",
    "tx_xiao_0200103000192": "激动的语调透出极度的开心，欢喜雀跃。",
    "tx_xiao_0200103000206": "声音激动，语气庆幸中感到非常兴奋。",
    "tx_xiao_0200103000221": "声音雀跃，语气轻快，非常开心欢喜。",
    "tx_xiao_0200103000236": "心情舒畅，语气高兴中透露出一丝感怀。",
    "tx_xiao_0200103000258": "对某件事充满了自豪感，情绪很放松。",
    "tx_xiao_0200103000259": "对某件事很自豪，充满了得意感。",
    "tx_xiao_0200103000289": "语调上扬，情绪欢快，很开心做某事。",
    "tx_xiao_0200103000520": "语气十分轻快，言语中渗透出满足的喜悦，十分高兴，语气激动，言辞风趣自然。",
    "tx_xiao_0200103000535": "感到一种豪放不羁的发自肺腑的快活与满足",
    "tx_xiao_0200103000554": "感觉是自尊心得到了极大的满足，说话语气带有兴奋和得意之感",
    "tx_xiao_0200103000604": "语速较快，声音较为高涨充满了开心愉快的情绪，语气积极",
    "tx_xiao_0200103000640": "语气张扬，语调上扬，体现内心得意满足的情绪。",
    "tx_xiao_0200103000648": "声音欢快，有一些高兴",
    "tx_xiao_0200104000527": "语气激动，充满着怨恨和不满。",
    "tx_xiao_0200104000609": "语调高昂，语速极快，愤怒到了极点。",
    "tx_xiao_0200104000718": "声音急切，语气恼怒中透露出执拗。",
    "tx_xiao_0200104000737": "声音凶狠，语气中透露出恼火与不满。",
    "tx_xiao_0200104000848": "生气极了，反问中透露出愤怒",
    "tx_xiao_0200104000887": "语气中是委屈和责备兼而有之，嗔怪和恼怒埋怨交织",
    "tx_xiao_0200105000529": "语调上扬，情绪较为激烈，又充斥着不满和怨怼。",
    "tx_xiao_0200105000606": "语调高昂且急促，表达生气不满之情。",
    "tx_xiao_0200105000643": "语调高昂，语速极快，流露生气不满之情。",
    "tx_xiao_0200105000715": "声音坚毅，语气愤怒中透露出不满。",
    "tx_xiao_0200105000746": "声音凶狠，语气中透露出及其愤怒的情感。",
    "tx_xiao_0200106000514": "表明内心十分平和，带有些许的轻松随意和宽慰之意",
    "tx_xiao_0200106000526": "语气急促，感到无比好奇且十分担忧。",
    "tx_xiao_0200106000535": "语气轻松，感到十分好奇且期待。",
    "tx_xiao_0200106000563": "语气疑惑但十分理直气壮，情绪厌烦拒绝",
    "tx_xiao_0200106000574": "语气轻佻略有些蔑视，态度高傲，情绪不屑",
    "tx_xiao_0200106000584": "语调略高，语气肯定明确，情绪略有些惊讶与恍惚",
    "tx_xiao_0200106000644": "平缓却逐渐上升语调，表达自己的疑惑之情。",
    "tx_xiao_0200106000737": "声音低沉，内心觉得不理解",
    "tx_xiao_0200106000881": "语气较为调皮，带有甜蜜和喜悦之意的询问",
    "tx_xiao_0200107000662": "语调由低落转为坦然，语气悲伤无奈伴有清楚明了，情绪痛苦但逐渐释怀",
    "tx_xiao_0200107000704": "声音小而弱，隐隐带有抽泣声，表现内心的难过",
    "tx_xiao_0200107000936": "表明内心充满委屈和不满，流露出极度的悲伤",
    "tx_xiao_0200107000972": "带有哭意，语气里饱含着期待恳求和渴望，还有浓浓的卑微之感",
    "tx_xiao_0200107000978": "哭腔明显，感觉十分委屈可怜，带有难过悲伤之情",
    "tx_xiao_0200107001042": "语气中充满了悲伤痛苦，声音较小传递了悲痛欲绝的情绪，有哭泣声"
}

================================================
FILE: dataset/text.txt
================================================
tx_emotion_00201000015 一年又一年,一日复一日,一聚一离别,一生一场梦。 
tx_emotion_00201000107 人生就像剥洋葱,总有一片会让你流泪。 
tx_emotion_00201000148 心痛?要怪就怪自己,有本事爱上别人,没本事让别人爱上自己。 
tx_emotion_00201000209 抱歉让你认识一个这么差劲的我。 
tx_emotion_00201000220 那种感觉,好像自己的心被人千刀万剐那样般的难受。 
tx_emotion_00201000274 他如果真的爱你,你就不会这么难过了。 
tx_emotion_00201000323 孩子流产啦,海鸥痛不欲生。 
tx_emotion_00201000340 娃娃扑过去抱住她,立刻失声痛哭。 
tx_emotion_00201000467 红了鼻头的小丑,眼泪止不住的流,流到嘴边咽下悲伤。 
tx_emotion_00202000047 这,好像不是由方夫人来定的。 
tx_emotion_00202000118 你脑子坏了,人妖,还不是你先踩我。 
tx_emotion_00202000136 你不喜欢来就回客栈呗!你!我这是为公子担心! 
tx_emotion_00202000301 在我改变主意之前,在我眼前消失。 
tx_emotion_00202000395 受到处罚你可不能怨别人,知道吗,臭小子! 
tx_emotion_00202000425 你说谁不要脸呐?你才不要脸呐! 
tx_emotion_00202000429 哈?谁要把它送给我,长大了我就嫁给他! 
tx_emotion_00202000436 那是因为以前他需要你的支持,所以对你特别好。我哥他坏透了! 
tx_emotion_00203000019 终于去看运动会啦,舒畅啊! 
tx_emotion_00203000031 司马鹰扬笑了,笑得有点儿得意。 
tx_emotion_00203000034 终于得以放风,二十分钟就到了效野,畅快呀! 
tx_emotion_00203000050 看,国庆到了,妹妹也开心! 
tx_emotion_00203000065 希研祝你们玩儿得开心哦。 
tx_emotion_00203000066 去豆捞坊每次都是愉快的餐饮体验啊! 
tx_emotion_00203000069 我家宝宝蹦蹦跳跳像个小兔子一样,真是可爱嘤。 
tx_emotion_00203000072 撇下儿子,享受快乐的二人自驾天堂寨游。 
tx_emotion_00203000086 今天很开心,卷卷要很幸福,很快乐! 
tx_emotion_00203000095 我的伙伴,童年的快乐,回想起来依然温暖。 
tx_emotion_00203000111 人民脸上映出欢快的神情。 
tx_emotion_00203000128 京东六幺八买东西真的好便宜呀,促销结束立刻恢复原价,赚到啦! 
tx_emotion_00203000273 等到七月底项目结束,我就可以申请休年假了,好期待哦! 
tx_emotion_00203000352 赵小珍廖小岑十四岁快乐! 
tx_emotion_00203000357 看着逐渐缩小的黑雾太子高兴得又蹦又跳。 
tx_emotion_00203000364 突然发现,俺闺女出牙了,开心,高兴,兴奋! 
tx_emotion_00203000372 今天好饱,也挺开心的,睡觉喽,乌拉拉乌拉拉,晚安。 
tx_emotion_00203000379 抢到了五月天的演唱会门票,还是头排,啊,好幸福! 
tx_emotion_00203000399 这几日蕊蕊同宝贝玩儿得劲开心,多谢宝贝! 
tx_emotion_00203000412 落班之际表弟们到访,开心到晕! 
tx_emotion_00203000425 儿童节快乐,本引大神我披荆斩棘终于回来啦! 
tx_emotion_00203000448 莽莽你又长大了一岁,开心! 
tx_emotion_00203000470 兰文良,陈晓丽新婚快乐! 
tx_emotion_00203000485 聚会、豪饮、畅谈、开心、回味无穷。 
tx_emotion_00203000486 我终于买到了我梦寐以求的限量款的驴家的包,兴奋到失眠! 
tx_emotion_00204000391 昨天晚上的气还没消呢,你少招惹我。 
tx_emotion_00204000426 米琦,你出来,我有话要跟你说。 
tx_emotion_00204000458 严敏寒,你想和阮伟在一起,我就偏不让你如意。 
tx_emotion_00204000494 我就是要让你尝尝我的痛苦!其实我还是有点儿感激你的, 
tx_emotion_00205000062 你们麒麟帮简直找死! 
tx_emotion_00205000069 喂!你想干什么呀!发什么酒疯!喂!不要在发癫啦! 
tx_emotion_00205000105 那我,究竟哪点不如楠傲! 
tx_emotion_00205000110 哪样啊?是她自己神经不好?跟我抢球场,然后跟我拼篮球, 
tx_emotion_00205000119 去他的!那他刚刚叫你留下的东西…不会就是蓝宝石吧? 
tx_emotion_00205000127 我就知道绝不能领他这份“情,他那点子心眼, 
tx_emotion_00205000198 你怎么这么傻!冷冽,你放了她我来当你的仆人! 
tx_emotion_00205000265 没门儿!我以后要靠自己买四轮车。 
tx_emotion_00206000061 要凌哥哥怎么个“疼法儿?是“亲疼你,还是“抱疼你? 
tx_emotion_00206000149 你在这鬼鬼祟祟的,还想干吗? 
tx_emotion_00206000167 妈!怎么了?小夜? 
tx_emotion_00206000186 妈,你在看什么啊?我后面有什么东西吗? 
tx_emotion_00206000235 别呀,就不多陪我聊会儿吗? 
tx_emotion_00206000240 这都不是重点,重点是他们居然在一起看电影,冰块脸哥哥怎么办? 
tx_emotion_00206000315 干吗个个都用这样的眼神望着我啊,很奇怪吗? 
tx_emotion_00206000337 小若,喜欢哥哥,哥哥喜欢小若吗? 
tx_emotion_00206000380 关键是你以后的工作怎么找,找到怎么做好。其实我建议你, 
tx_emotion_00206000458 喜不喜欢我家的小玄? 
tx_emotion_00206000480 刚才洁叫所有人下去为她庆生呐,你怎么还不下去? 
tx_emotion_00207000018 原来父皇什么都知道了,我真笨。 
tx_emotion_00207000080 真的,镜夜、镜夜他真的,真的,真的。 
tx_emotion_00207000113 由于他有心脏病的原因也就去世了,只剩下我妈和我, 
tx_emotion_00207000162 可是,我们就在同一天之内,少了本校的黄金校草和白金校花。 
tx_emotion_00207000211 使得她内心产生了寂寞感以及对我的厌恶,昨天晚上。 
tx_emotion_00207000252 他说他很爱她,他说会守护她。 
tx_emotion_00208000087 说实话!伯父!我不缺钱!不缺钱?呵!世界上有谁不缺钱! 
tx_emotion_00208000257 我根本不认识易青平,爸爸, 
tx_emotion_00208000303 县委招待所当经理,不是一般女人干得了的, 
tx_emotion_00208000327 那些标榜自己是朋克的人,也只不过是欺世盗名的小丑罢了! 
tx_emotion_00208000390 倩倩,你刚刚不应该这样的! 
tx_emotion_00209000005 你知道我们去约会! 
tx_emotion_00209000026 还是你的漫画全啊。 
tx_emotion_00209000060 金族,你怎么和这个人面兽心的家伙走在一起,离他远点儿! 
tx_emotion_00209000215 彼此彼此吧,一向冷漠又孤傲的殿下不也在关心一个陌生人吗? 
tx_emotion_00209000269 哈?在我家里睡?真的吗阿木! 
tx_emotion_00209000346 居然会为了一个陌生人干出那些傻事。 
tx_emotion_00210000049 你…算啦,麻烦你还是去看看有什么可以避雨的地方。 
tx_emotion_00210000124 别哭别哭,告诉发生什么事啦? 
tx_emotion_00210000249 而是她自己的心意。依着她的性子, 
tx_emotion_00210000272 你,我只是担心不知道该用什么脸回去见我的未婚夫嘛! 
tx_emotion_00210000419 那快去吃饭吧,别饿坏了肚子。 
tx_emotion_00210000453 唉,别管这些了,你们的比赛还没结束吧!快点儿带我去看。 
tx_emotion_00210000476 那我们送你去美容院吧,记得吃饭知道吗? 
tx_emotion_00301000019 子琼来了,双眼通红,抱住夏咏大哭。 
tx_emotion_00301000036 我趴在沙发上,额头抵着玛雅的大腿,相当伤感。 
tx_emotion_00301000052 当他死于绞刑架上的时候,比约特再也控制不住自己的悲伤。 
tx_emotion_00301000185 明知会失去自由,为了得到对方,为了令对方快乐,也甘愿做。 
tx_emotion_00301000203 你走啦,我留下的是一辈子的伤心,这个时候,我的心一直在痛! 
tx_emotion_00301000457 伊斯玛仪号啕大哭,母亲伤心落泪,到处奔波,寻找饮食。 
tx_emotion_00301000461 朋友养的小橘猫突然生病死了,真替他伤心啊。 
tx_emotion_00301000494 很多事儿都被慢慢拆下来拼凑在心里,然后物是人非。 
tx_emotion_00302000016 而且我当时也不想再见到你! 
tx_emotion_00302000027 对,就你是知识分子,你好好去你的麻省理工待着,别来烦我。 
tx_emotion_00302000054 这里我真是一分钟也呆不下去了。 
tx_emotion_00302000090 是为你好,是在关心你!你也明知道,学校一定会追究这件事儿的, 
tx_emotion_00302000107 没有谁是非谁不可的,以妍,自己的幸福才是最重要的! 
tx_emotion_00302000216 这位大叔,你好像误会什么了吧!月老大拉着我不是怕我受伤。 
tx_emotion_00302000224 说话客气点儿,你以为老子不打女人? 
tx_emotion_00302000304 说话老是不好儿好儿地讲,还调戏良家妇女,更可恶的是。 
tx_emotion_00302000356 我只有一个愿望,就是杀了你,为我父母报仇! 
tx_emotion_00302000470 只知道玩儿;有的非常花心,不知道谈了多少个啦;有的又太老实啦。 
tx_emotion_00303000023 首先,致此伟大的世纪神棍节,祝光棍儿们光棍儿节快乐! 
tx_emotion_00303000046 带着一窝快乐的小猪准备出发喽。 
tx_emotion_00303000052 那个傻娃儿破蛋快乐哈! 
tx_emotion_00303000056 离开赛场,迎面而来的是同学们的笑容和欢呼,我心里也乐开了花儿。 
tx_emotion_00303000080 花满楼浅饮了口茶,脸上露出云淡风轻的笑容。 
tx_emotion_00303000082 做一只快乐、知足的鸟儿。 
tx_emotion_00303000117 游戏开心就好啦,男或女,让他们揣测去,哈哈。 
tx_emotion_00303000119 最欢乐的一次喜酒,欢乐无极限呀,噢耶! 
tx_emotion_00303000127 祝黄老师跟傲老师新婚快乐! 
tx_emotion_00303000129 小小的人啊,风水起呀,天天就爱穷开心呐啊。 
tx_emotion_00303000130 今天是俺和俺老婆的生日,祝老婆生日快乐! 
tx_emotion_00303000148 约老同学去抚宁的板厂峪,很开心。 
tx_emotion_00303000158 黄磊调戏地问道,教室后面几个混混哈哈大笑。 
tx_emotion_00303000167 祝自己生日快乐,心想事成哦,今天好运! 
tx_emotion_00303000173 花儿在灿烂地微笑,鸟儿在欢乐地欢叫,我的心情,像吃了蜜一样甜。 
tx_emotion_00303000182 想起和你在一起的时候,感觉是无比的幸福和快乐。 
tx_emotion_00303000198 恭喜彭威巍先生与管红霞小姐新婚快乐。 
tx_emotion_00303000211 亲爱的枇杷,十九个月快乐! 
tx_emotion_00303000236 丰盛晚餐,大家双节快乐! 
tx_emotion_00303000260 老婆,祝你福气又安康,幸福快乐! 
tx_emotion_00303000298 看看沿途的风景,心里感觉好轻松、愉快又惬意。 
tx_emotion_00303000310 我们兴奋地跑下去,感受风雪。 
tx_emotion_00303000354 海绵宝宝之所以快乐不是因为他二,而是因为有人跟他一起二。 
tx_emotion_00303000360 今晚的冒演席圆满结束,期待明天更开心的一幕! 
tx_emotion_00303000363 今天状态好了不少,早上去镇北堡影视城,她玩儿的还算开心。 
tx_emotion_00303000389 吃点儿喝点儿,聊聊你不开心的事儿,让我开心开心! 
tx_emotion_00303000396 这一次的帽山之行,很累但是很开心嘛! 
tx_emotion_00303000421 长剑郎当出鞘,云暮笙哈哈大笑,举剑起舞。 
tx_emotion_00303000433 今天看到一个超级好笑的笑话,哈哈哈笑死我了,哈哈哈。 
tx_emotion_00303000457 宝贝,国庆节快乐,嗨爆头呀! 
tx_emotion_00303000462 我在那儿捐了钱,敲了大钟,真开心! 
tx_emotion_00303000465 真心祝愿飞歌与嫂子新婚快乐,百年好合,早生贵子! 
tx_emotion_00303000474 快乐的午后,久违的二一九! 
tx_emotion_00304000049 死女人,敬酒不吃吃罚酒,兄弟们过来,帮我按住她。 
tx_emotion_00304000079 混蛋!上次的账还没跟你算完呢!竟然敢说我运宝儿长得丑? 
tx_emotion_00304000169 你若是刚对她做出什么事儿的话!你一定会后悔的! 
tx_emotion_00304000235 你瞎说什么,怎么这么没有礼貌。 
tx_emotion_00304000459 是你害得我家破人亡!我清清楚楚记得你那张脸,我隐姓埋名, 
tx_emotion_00305000005 虽然我知道你听不懂我说什么,但是我还是要告诉你, 
tx_emotion_00305000009 一切都是狗屁!别人可以受他蒙蔽,我可不会。 
tx_emotion_00305000014 你刚才说的才是实话,你说你有自己的思想, 
tx_emotion_00305000119 去他妈!他刚刚叫你留下的东西…不会就是蓝宝石吧? 
tx_emotion_00305000164 你他妈的!老子不认识单词你不知道啊? 
tx_emotion_00305000183 你,去去去去,别打扰我复习。 
tx_emotion_00305000280 没有谁是非谁不可的,以妍,自己的幸福才是最重要的责任! 
tx_emotion_00305000283 你变了,我记得你以前眼里是最容不下沙子的。 
tx_emotion_00305000308 哼!你仇家也不见得少吧。 
tx_emotion_00305000383 到时候你可别来找我!我可告诉你哎,逸和修对玄儿可都有意思! 
tx_emotion_00306000098 那后来怎么没有去呢? 
tx_emotion_00306000101 我的工作总经理会另有安排的,是吗? 
tx_emotion_00306000202 风月,她说的都是真的吗? 
tx_emotion_00306000261 晨稀是谁,男的还是女的,漂亮吗? 
tx_emotion_00306000405 喂,你这是在夸她呀还是在夸你自己呀? 
tx_emotion_00306000460 有什么问题吗? 
tx_emotion_00309000124 哈,很惊讶吧!你今天真的很像一个白马王子! 
tx_emotion_00309000154 利如歌离开烈火山庄,竟然如此大张旗鼓! 
tx_emotion_00309000160 哇!原来是青梅竹马呀! 
tx_emotion_00309000167 原来那个把你迷得神魂颠倒的小女孩儿就是她呀。 
tx_emotion_00309000172 钟皓文,你不会偏爱这种吧! 
tx_emotion_00309000183 原来你一直戴着护腕儿,就是为了掩盖上面的疤痕呀! 
tx_emotion_00309000269 哈?在我家里睡?真的吗阿木? 
tx_emotion_00309000324 幸好陈老师您及时赶到呀。 
tx_emotion_00309000349 啊,我没事儿啊,有事儿的是你的衣服呀。 
tx_emotion_00309000452 啊!琳,洛特菲尔!小平头! 
tx_emotion_00400000011 稚嫩的嗓音在高音区并不声嘶力竭,似乎还显得游刃有余。 
tx_emotion_00400000026 幺三八二三二。 
tx_emotion_00400000160 经检测,那些白色粉末儿是冰毒。 
tx_emotion_00400000196 您自个儿算一下,怎么样合算些? 
tx_emotion_00400000259 抱歉没找到阿巴嗄山,找到其他的阿巴嗄山。 
tx_emotion_00400000378 我发“撙省”这个词。 
tx_emotion_00401000087 或许这本来没有错的,因为青春本来就是用来怀念的。 
tx_emotion_00401000174 丑小鸭就这样伤心地流着眼泪往回家走了。 
tx_emotion_00401000213 谁带我回到只有童话的日子里? 
tx_emotion_00401000239 阿香小姐一脸悲伤地俯下头,又猛摇头。 
tx_emotion_00401000242 和寂寞对话,只是不愿意相信你已经离开。 
tx_emotion_00401000281 感觉整个世界都抛弃了我,真绝望。 
tx_emotion_00401000297 爱上一个人的重要标志就是:遇上任何美景都在遗憾,为何你不在身边。 
tx_emotion_00401000327 还可以做朋友吗”是一段故事的结束。 
tx_emotion_00401000339 即便是流泪,也是一种纪念。就算是流泪,也回不到童年。 
tx_emotion_00401000440 葬了那红尘三千,葬了那破碎的心,葬了这段情。 
tx_emotion_00402000196 他是你的皇嫂!我最爱的女人,这是不允许任何人侵犯的。 
tx_emotion_00402000283 不要以为,有爷爷保驾护航就不能把你怎么样!我从来不对女生动手。 
tx_emotion_00402000299 不会吧!我受够了这种每天都被人家欺负的日子了! 
tx_emotion_00402000480 夜冰儿,我叫你给我死去的兄弟陪葬。 
tx_emotion_00403000052 那个啥娃儿破蛋快乐哈! 
tx_emotion_00403000060 荧屏上温馨的瞬间好浪漫玫瑰花语,祝你们新婚快乐。 
tx_emotion_00403000077 这几天宝宝特别兴奋,好像很忙一样,这边动动那边动动。 
tx_emotion_00403000099 萌萌跟妈妈一样怕痒,最喜欢挠的她哈哈大笑。 
tx_emotion_00403000131 硕果累累的十月,我敞开怀抱欢迎你,迎接更大的喜悦。 
tx_emotion_00403000143 哥儿几个吃的不是火锅、是快乐和幸福。 
tx_emotion_00403000145 中午和同学喝点儿开心,晚上和家乡伙伴喝点儿更开心! 
tx_emotion_00403000147 和她们叽叽歪歪聊到哈哈大笑不知道自己在掩饰什么。 
tx_emotion_00403000157 有宝宝之后买这些婴儿衣服,婴儿用品的时候各种兴奋! 
tx_emotion_00403000163 和安仔开心地过一个愉快的生日! 
tx_emotion_00403000173 花儿在灿烂地微笑,鸟儿在快乐地欢叫,我的心情,像吃了蜜一样甜。 
tx_emotion_00403000175 又去套圈圈啦,玩儿得很开心,可大的都是和阿姨要的。 
tx_emotion_00403000197 听朋友说是品牌店,兴致勃勃赶来一看,真棒! 
tx_emotion_00403000252 抽奖竟然中了网易云音乐的免费会员,手气真是好得不得了! 
tx_emotion_00403000255 妹妹夫唱妇随偷石榴、香泡、柿子、橘子,我接得忙,笑得开心! 
tx_emotion_00403000359 款式多,各国美食,款式应有尽有,畅快体验! 
tx_emotion_00403000393 最近格瓦拉搞活动搞的很欢快耶。 
tx_emotion_00403000395 刚才买了瓶霸王凉茶,哇再来一瓶,极度开心。 
tx_emotion_00403000432 点菜时服务员推荐活紫胆,我一听超开心! 
tx_emotion_00403000439 福辉辉弟兄新婚快乐! 
tx_emotion_00403000451 今天弟兄生日,开心留再多血也无所谓! 
tx_emotion_00403000482 这一吻,让刘德华兴奋得一阵眩晕。 
tx_emotion_00403000494 从漫展出来一身汗,买了萌妹子抱枕超级开心。 
tx_emotion_00405000239 晚上我把望远镜带过来,我倒要看看到底是谁没眼光? 
tx_emotion_00405000329 你别想骗我,一个星期前,赵睿看见你抱着一个女人回到你的房间。 
tx_emotion_00409000007 第一次在酒吧门口遇到你的时候,我都不敢认啦。 
tx_emotion_00409000081 沫影,你真的和凌翼寒在交往啊? 
tx_emotion_00409000119 你是世界第一黑帮老大兼第一杀手聂。 
tx_emotion_00409000228 天呐,跟着她快五年啦,居然她就是冰凌儿小姐。 
tx_emotion_00409000285 圆圆,你怎么看见漂亮姑娘比我们男孩子还动心?子申你小心点。 
tx_emotion_00409000323 好兄弟,你太有才啦!全班同学被你编的故事闹得天翻地覆! 
tx_emotion_00409000325 太好了,终于不用死啦。 
tx_emotion_00409000441 原来,静羽也会外旋发球啊。 
tx_emotion_00409000462 映然,怎么是你呀? 
tx_emotion_00409000470 什么?雨侨,算了。我们不去了、不去啦! 
tx_emulate_00_060_0001_000030	太正确了，训练有素。
tx_emulate_00_102_0004_000088	一个花心的男人，和一个急着想把自己嫁出去的女人，简直就是两个星球的，撞在一起，一定是悲剧。
tx_emulate_00_103_0001_000017	我终于要去约会了嗯。
tx_emulate_00_104_0001_000113	你就这么随便，随随便便地带了一个随随便便的女人回来啦。
tx_emulate_00_108_0001_000008	史先生约我来这儿干吗？
tx_emulate_00_108_0001_000079	呃哈，我马上走，慢慢睡啊。
tx_emulate_00_108_0002_000070	哈妹，今天是我人生中最重要的时刻，所以你现在，就到阳台上去站岗。
tx_emulate_00_108_0004_000011	跟人打架啦？
tx_emulate_00_108_0004_000039	你也有被关在门外的时候？
tx_emulate_00_109_0004_000042	真的，如果到了二十岁，还没有交得到男朋友的话，就等于进入了欧巴桑的阶段。很丢脸的。
tx_emulate_00_109_0004_000071	我怎么办，你们找龚喜，他只让坐巴士，不让坐计程车，要不叫两辆计程车。
tx_emulate_00_109_0005_000085	你们女人为什么说话都不直说呢？什么这个那个那个的，那个男人是谁？看那样子满脸邪气。
tx_emulate_00_109_0005_000091	我告诉你，我老婆要是被别人拐走的话，我找你算账，东西你拿。
tx_emulate_00_110_0002_000028	王浩，恭喜啦。
tx_emulate_00_110_0002_000038	到男女之间开始讲实话的时候，就代表他们马上要分手，或者已经牵手了。
tx_emulate_00_110_0004_000059	你也是中国人？
tx_emulate_00_110_0004_000099	我去上洗手间啦。
tx_emulate_00_111_0001_000023	你要请我去你家玩啊？就我一个人？
tx_emulate_00_111_0001_000042	我说如果，有人跟我求婚的话，你们相不相信？
tx_emulate_00_112_0001_000066	我知道，你说，你会想我，诶嘿嘿嘿，算你还有一点良心。
tx_emulate_00_112_0002_000016	王浩一个人跑是逃婚，两个人跑可就是私奔了，要赶紧想办法搞清楚小萍到底是什么意思啊。
tx_emulate_00_112_0002_000070	有大明星接待，多留几天都没问题。
tx_emulate_00_112_0005_000089	租什么嘛，你来到我的社区就是我的荣幸。
tx_emulate_00_112_0006_000020	我正在逃难嘛，你留一点钱给我再走哇。
tx_emulate_00_113_0002_000003	日本的空气太干燥了，影响了我的皮肤。
tx_emulate_00_114_0001_000001	我也是，这个统计，肯定有问题。
tx_emulate_00_114_0002_000030	他的这种类型，正好是女人最喜欢的。
tx_emulate_00_114_0003_000026	这个太简单啦，小萍是全中国最好追的女人呐。
tx_emulate_00_115_0002_000007	只能用一句话解释，她是个不正常的女人。
tx_emulate_00_115_0005_000035	快点快点快点上菜，难得看你们开火，弄点好吃的来。
tx_emulate_00_115_0009_000024	广告公司项目经理，他什么都不懂，过两天就被炒鱿鱼了。
tx_emulate_00_115_0010_000012	不可能的，我欠那么多债务。
tx_emulate_00_117_0002_000013	友情算什么呀，友情又不会让一个人，去奴役另外的一个人。
tx_emulate_00_117_0005_000059	那你，你问万玲啊，我。
tx_emulate_00_118_0002_000020	阿呆，大妈咪要去约会了，你好好儿地待在这儿吃蛋糕，小妈咪一会儿就回来，啊。
tx_emulate_00_118_0002_000022	你少来这套。
tx_emulate_00_118_0005_000032	那你自己为什么不登报征婚啊？你比小萍更需要男人啊。
tx_emulate_00_119_0001_000001	男人婆男人婆，快看，快看我有什么变化啊？
tx_emulate_00_119_0001_000034	唉你们不要吵了嘛，两百万是一个很大的数目，你们快来帮王浩想想办法。
tx_emulate_00_119_0001_000049	大家都说，呃他长得像王浩。
tx_emulate_00_120_0002_000071	我们在一起的距离，是不是太近了，我会害怕的。
tx_emulate_00_121_0002_000009	谢谢典哥，谢谢何哥。
tx_emulate_00_122_0001_000025	他约我去看电影。
tx_emulate_00_123_0001_000048	铁蛋也能吃啊？
tx_emulate_00_123_0002_000003	我是来找男朋友的。
tx_emulate_00_123_0009_000012	不要生气了，好不好？
tx_emulate_00_125_0001_000067	但是，这个时候有老巫婆出现啦，她跟公主说，嘿嘿嘿，没有人会来救你，我要把你变成一只汪汪小狗。
tx_emulate_00_125_0002_000049	第三，不可以为了报复也去找外遇，那样只会便宜了其他的臭男人，而活活地糟蹋自己。
tx_emulate_00_125_0002_000058	外面写一个牌子，上面写着，女人的乐园，男人的禁地。
tx_emulate_00_125_0008_000017	每天晚上，你可以看着家人平安地躺着，抱着妻子入眠的时候，嘴角都挂着微笑。
tx_emulate_00_126_0001_000039	我会不会死啊？
tx_emulate_00_127_0001_000084	诶，我们的许愿船都回来了诶。
tx_emulate_00_128_0006_000033	你不是要吃苦吗，那从现在开始吧。
tx_emulate_00_129_0004_000056	这次我不是骗你的哦，这个真的很贵重啊，这是最新的DVD啦，呢，还附送了好多光碟。
tx_emulate_00_129_0004_000057	给你看啊，有第六感生死恋，别闯阴阳界，六月六日断肠时，魂断蓝桥，还有最后的晚。
tx_emulate_00_129_0005_000029	你们有没有要跟我说的？
tx_emulate_00_130_0001_000057	诶，你们怎么都不进去呀？
tx_emulate_00_130_0003_000017	你看你，笨得连汤都不会喝，做错什么事情啦？这叫恶有恶报。
tx_emulate_00_131_0005_000061	哎呦，这河水，太冷了吧。
tx_emulate_00_132_0004_000026	他每天，都是那样紧密地和我贴在一起。
tx_emulate_00_132_0005_000016	呃，让我赔罪，让我赎罪，让我奉献一切好不好？
tx_emulate_00_133_0005_000062	是啊，麻雀变凤凰，她本来只是代万铃的班，谁晓得歪打正着，飞上枝头当凤凰啦。
tx_emulate_00_135_0002_000003	小张，周日带我去采购，好啊，没问题。
tx_emulate_00_135_0004_000015	阿婆你要坚持住，阿婆。
tx_emulate_00_137_0002_000079	今天晚上的星星好多哦，好美，不知道，会不会有流星雨呢？
tx_emulate_00_137_0003_000022	告诉你个好消息。
tx_emulate_00_137_0003_000082	请问，你有什么事？
tx_emulate_00_137_0005_000092	嘿，都没中都没中。
tx_emulate_00_144_0005_000014	让人会有起床气，心情很不好，昨天晚上呢一惊一乍的，我们都没睡好觉是不是？
tx_emulate_00_144_0007_000056	谢谢，谢谢，呵呵，谢谢，你看看爸爸回来没有？
tx_emulate_00_150_0005_000005	你怎么知道我饿了呀啊，这真好吃。
tx_emulate_00_150_0005_000006	我今天加班呢，又被主任看到了，他还表扬我了。
tx_emulate_00_151_0001_000018	谢谢你们热情款待，下次回国我做东。
tx_emulate_00_151_0007_000028	可是中餐贵呀！一个穷学生怎么吃得起？有一次我一个人在宿舍，真是想中餐想疯啦。
tx_emulate_00_153_0001_000033	这现在掉不掉链子，哪里是我说了算的呀。
tx_emulate_00_157_0001_000031	你喜不喜欢姑姑这儿啊？
tx_emulate_00_158_0002_000007	不是，明哲你什么意思啊，啊？
tx_emulate_00_158_0002_000049	诶呀，这日子没法儿过啦。
tx_emulate_00_158_0003_000068	我靠，你们要求也太高了吧。
tx_emulate_00_159_0001_000006	万一将来蒙总还在，众诚完了怎么办？
tx_emulate_00_162_0002_000075	我我我，我在这儿我还怎么住啊？
tx_emulate_00_162_0005_000004	你是家里的老大，明成是你的亲弟弟，你帮帮明成好不好？
tx_emulate_00_171_0006_000036	行了行了，我算是看透啦，你们苏家这三个男人加起来还不如一个明玉。
tx_emulate_00_179_0005_000021	还有礼物呢。
tx_emulate_00_185_0002_000007	你看看，你看看你这人缘，太好啦，太好啦。
tx_emulate_00_186_0001_000023	也给你们一家拜年啦，我还得给小咪发红包呢。
tx_emulate_01_001_0018_000065	你们都上当啦，三爷，他是共跳的水线子。
tx_emulate_01_001_0019_000077	这么说共军三十来人，就把他们威虎山三百人马打的剩下不到五十人，还灭了老五。
tx_emulate_01_002_0005_000008	这杯酒，当祭典韦，祭郭奉孝，祭荀令君，祭庞德，祭夏侯渊，祭孤的子侄，曹昂，曹安民。
tx_emulate_01_002_0007_000012	但是我的头洗到一半，你把水闸关了。
tx_emulate_01_002_0017_000057	她倒是一个奇女子啊。
tx_emulate_01_003_0003_000018	所以，没的麻匪的日子，才是好日子。
tx_emulate_01_004_0006_000023	唉，你必须仔细看才能看懂，我们就在这个位置啊，这是布鲁克林，我们去那儿抓了两条狗。
tx_emulate_01_005_0004_000013	啊我们要得到宝贝，把它给，夺回来。
tx_emulate_01_005_0005_000022	那就问几个有意义的，我生从何来死往何处，我为何要出现在这个世界上？
tx_emulate_01_006_0020_000094	怎么不能实现啊？走，我们一起去，啊。
tx_emulate_01_007_0006_000025	我哪儿知道喊啥呀？
tx_emulate_01_007_0008_000034	铡的是，宵小奸佞，行凶作恶之辈，虎头铡。
tx_emulate_01_007_0021_000120	啊，守口如瓶的一个梦想。
tx_emulate_01_007_0024_000091	哎，死到临头了是吧，你还在这耍贫嘴啊。
tx_emulate_01_007_0025_000099	是铁板钉钉啦。
tx_emulate_01_008_0011_000050	她充满了陌生和恐惧。
tx_emulate_01_008_0017_000077	这便是我们，推行的王化之道。
tx_emulate_01_009_0003_000026	知道吗？你不爱说话，我喜欢你。
tx_emulate_01_010_0012_000085	初而为官，家母便谆谆诲之。
tx_emulate_01_011_0005_000017	唉娘呀，头发都看见啦。
tx_emulate_01_011_0009_000047	那不是方天画戟吗？
tx_emulate_01_011_0010_000050	哼哼，老纪，你是不是想把家里的人都弄成是宫里的呀？
tx_emulate_01_011_0011_000059	我要把事情给闹大了，我要闹得让刑部知道，我要闹得，让万岁爷知道。
tx_emulate_01_011_0013_000073	你怎么知道那是他母亲？
tx_emulate_01_012_0004_000021	看来我该去，拜访他们啦。
tx_emulate_01_012_0023_000084	狮子，不全是那么差劲。
tx_emulate_01_012_0025_000092	哦什么惊喜？
tx_emulate_01_013_0002_000004	妹妹读过书吗？
tx_emulate_01_013_0007_000025	您好好地听着。
tx_emulate_01_013_0007_000032	愿上帝和你同席。
tx_emulate_01_014_0004_000016	你男人死了，何家破败了，你寡妇拉扯着这么一堆孩子，人都是势利眼。
tx_emulate_01_014_0005_000035	图图吃了一排，两排，三排，四排，五排，六排，七排，八排。
tx_emulate_01_014_0011_000061	与糯米一起，炒来炒去。
tx_emulate_01_014_0016_000086	谢谢紫薇格格。
tx_emulate_01_016_0003_000006	等一下等一下，你说，我是你的主人？
tx_emulate_01_016_0007_000035	告诉你，也许，你可以逃跑。
tx_emulate_01_016_0008_000037	把你的给我呜，不，就就，就有了吗？
tx_emulate_01_016_0016_000067	这第三碗酒哇。
tx_emulate_01_016_0023_000150	把这几封信带出去，有烈士的。
tx_emulate_01_018_0003_000014	听到了吗，你也入围了。
tx_emulate_01_020_0001_000004	你看起来还是那么白。
tx_emulate_01_020_0007_000012	智慧大王来啦。
tx_emulate_01_020_0017_000067	两个都得救，两个都得救哇。
tx_emulate_01_021_0003_000015	没有自由的人会幸福吗？
tx_emulate_01_022_0009_000026	才是沛国的天。
tx_emulate_01_022_0022_000108	就只凭一根铁钉。
tx_emulate_01_022_0022_000111	你明明发过誓，会好好保护我，你发过誓。
tx_emulate_01_023_0006_000042	你说凭什么。
tx_emulate_01_023_0010_000063	呃刚才我老猪吃的是快了点儿。
tx_emulate_01_023_0010_000064	也不知道是什么味儿，有籽没籽儿，有核没核。
tx_emulate_01_023_0014_000081	都快睁不开了。
tx_emulate_01_024_0009_000083	你们人多，你们枪多。
tx_emulate_01_024_0010_000114	法租界外边都是中国人的地方。
tx_emulate_01_024_0012_000117	你不做巡捕，以后去做什么呀。
tx_emulate_01_024_0013_000139	如果完不成任务。
tx_emulate_01_026_0005_000031	他又咋啦，嫌你丑，嫌你脏。
tx_emulate_01_028_0008_000035	为什么还要把时间浪费在这个上头哇？
tx_emulate_01_028_0009_000046	你以前，不是最喜欢这样儿称呼的吗？
tx_emulate_01_028_0009_000054	您是因为这个就不爱臣妾了吗，那我不要规矩也不要体统，如果没有皇上的怜惜，这一切，又有什么意义。
tx_emulate_01_029_0013_000037	究竟怎么回事，把我都给弄糊涂了。
tx_emulate_01_030_0014_000035	啊，这睡衣好舒服，再不用化妆穿礼服，还不用和王子约会。
tx_emulate_01_031_0009_000032	身如玄铁，火眼金睛。
tx_emulate_01_031_0011_000045	借此，隆重盛典。
tx_emulate_01_031_0015_000061	臣不敢，臣惶恐。
tx_emulate_01_031_0020_000073	剁了皇帝老儿狗头，给我退下。
tx_emulate_01_032_0011_000017	知识，就是力量。
tx_emulate_01_032_0011_000020	恋爱和战斗，都要勇往直前。
tx_emulate_01_032_0018_000042	嗯我不听我不听我假装家里没电脑。
tx_emulate_01_033_0004_000039	嗯……就是，第一次那什么，呃也是跟高平。
tx_emulate_01_033_0006_000075	我李云龙，不会拿自己的士兵的生命，去换自己的老婆。
tx_emulate_01_034_0003_000030	我喜欢吃很多人做的菜。
tx_emulate_01_034_0005_000040	那你喜欢他吗？
tx_emulate_01_190_0001_000041	清水一杯，聊表寸心。
tx_emulate_01_190_0001_000042	接到新房子里过春节啦。
tx_emulate_01_191_0005_000043	唉我的老婆呀，我的前瞻性怎么跟你就相差那么十万八千里呐？
tx_emulate_01_192_0003_000046	你的工作包括陪聊吗？我已经忍无可忍啦。
tx_emulate_01_196_0001_000024	钱全是借来的。
tx_emulate_01_196_0003_000051	可现在问题是，问题的关键是，你要把我们俩的生活全部压在你姐身上吗？
tx_emulate_01_196_0004_000027	月息百分之十，那一年翻一百二啊。
tx_emulate_01_199_0001_000022	不会知道什么？
tx_emulate_01_200_0002_000001	哎，我说什么了我，你现在可是越来越不纯洁了啊。
tx_emulate_01_202_0001_000005	你给我说说，你给我说说你跟那个，宋什么，啊，那宋什么，到底什么关系呀现在？
tx_emulate_01_202_0001_000036	除了社保和三金扣下来两千八，不到两千八你就想买断我二十四小时？
tx_emulate_01_203_0001_000002	从明天开始起，明天开始起，我就堵在厕所门口，我谁也不让上厕所，我看他们给不给我钱。
tx_emulate_01_203_0008_000028	您想想，现在这人，您开一奥迪出去，他都不搭理你，觉得您没实力。
tx_emulate_01_205_0002_000018	你到底想干什么？
tx_emulate_01_206_0004_000007	你讨厌，你吓死我了，你知不知道？
tx_emulate_01_206_0005_000031	咱干这份工作，挣这份钱，把钱拿到手里头就完事儿了。
tx_emulate_01_209_0001_000021	这万一没人给他送钱，人家又不给他治，他不等于送死吗？
tx_emulate_01_210_0009_000010	房子是保值的诶，钱留在手里，不合算的。
tx_emulate_01_216_0004_000039	这个床，换掉，外面那个餐桌，换掉，通通都不要。
tx_emulate_01_219_0002_000027	事先为什么不告诉我一声？
tx_emulate_02_001_0015_000048	为什么要隐藏隐瞒，为什么要忍痛抚琴，你们早知道她受伤了对不对，为什么联合起来隐瞒我，为什么为什么？
tx_emulate_02_002_0016_000058	论文治武功，是朕开创了贞观之治。
tx_emulate_02_002_0018_000067	可是纪晓岚，纪晓岚他不能老逼着朕做圣人吧？
tx_emulate_02_003_0023_000078	若无，苏先生的麒麟之才，恐怕朕今天，也看不到这个场面吧。
tx_emulate_02_004_0004_000018	今天，就是有天大的事情，有我孝庄来顶着。
tx_emulate_02_005_0005_000018	不，我刚才问的是本命，现在问的是自我。
tx_emulate_02_006_0002_000017	一对叱咤风云的眼睛，那体态不活像一位英勇的神灵，刚刚落到摩天山顶，这幅十全十美的仪表。
tx_emulate_02_006_0009_000053	还有谁？还有谁？还有谁？
tx_emulate_02_006_0009_000056	你们局长都得给我们鳄鱼帮面子，要不然他当不了这个局长，你他妈不认识我？
tx_emulate_02_006_0019_000078	我在这个老人院啊，起码，有人给我料理一下后事。
tx_emulate_02_006_0019_000090	这是我临死之前最后的一个愿望。
tx_emulate_02_006_0020_000097	我眼睁睁地看着一些老伙计呀。
tx_emulate_02_007_0006_000025	我哪知道喊啥呀？
tx_emulate_02_007_0010_000050	都怪你，嗯，看着师傅不好好看着，嗯嗯，非得擒什么妖，嗯嗯，妖怪没擒着，师傅也给弄丢了，还怪我呢。
tx_emulate_02_007_0024_000112	我就问你一句，姚远跟你说的那句话是什么？我们就想知道这一句话。
tx_emulate_02_008_0011_000058	我是世界上第一大糊涂蛋呢。
tx_emulate_02_008_0020_000108	我的货十回有八回被张麻子劫走了，如果你们可以去剿匪的话，钱要多少有多少。
tx_emulate_02_039_0002_000034	和家里的人留一段话。
tx_emulate_02_039_0003_000072	不傻，谁和你做朋友。
tx_emulate_02_041_0002_000076	别怪光江某再说句不客气的话。
tx_emulate_02_042_0001_000128	万一紫薇还来不及禀明身份，皇上就要来个乱点鸳鸯谱了。
tx_emulate_02_042_0002_000072	我答应过大家要改变这里，如果我们现在把咖啡卖了。
tx_emulate_02_042_0002_000101	你回村之后做的每一件事情，哪一个不是我告诉你的？
tx_emulate_02_042_0003_000154	哪知道美梦，不会成真呐。
tx_emulate_02_042_0004_000065	电令完全可能被截获，一旦被破译，命令是袭击明楼座驾，刺杀的却是南田洋子，我们全都得暴露。
tx_emulate_02_043_0003_000020	本人是天生领队，亲自找来一群大坏蛋，组成了怪盗军团阴谋诡计，什么事都干。
tx_emulate_02_044_0001_000032	就说啊，有一种鱼叫鲲，超级大，有天那么大，还能飞，翅膀像云一样大。
tx_emulate_02_044_0010_000061	哦谢谢，你真是热心呐。
tx_emulate_02_045_0001_000001	我愿意为您做任何事情，可是马克队长，那个人救过我一命，我，我不能。
tx_emulate_02_045_0002_000003	马克队长那个人，救过我一命。
tx_emulate_02_045_0015_000057	你不是我哥哥。
tx_emulate_02_046_0004_000012	啊，我是特优生。
tx_emulate_02_046_0022_000084	你练武功那么高，还要她救你？
tx_emulate_02_051_0001_000002	你还记得我们付出了代价吗？你还记得我们曾经的梦想跟誓言了吗？
tx_emulate_02_052_0011_000081	身为三D的我，就连票价都是你们的两。
tx_emulate_02_052_0016_000106	为什么不留下她呀，小玉这一走，你再想见她就难啦。
tx_emulate_02_055_0004_000004	你好狠毒的心啊你。
tx_emulate_02_055_0015_000044	他也许就放过我们啦。
tx_emulate_02_055_0033_000122	好啦，都闭嘴。
tx_emulate_02_056_0005_000027	干什么，我当然不会研究那些浅薄的社会问题。
tx_emulate_02_057_0010_000047	这是我们的使命。
tx_emulate_02_057_0034_000160	你放火烧了我的亲生母亲，你今生今世都要以她的身份活着，所有人都知道，偏偏你在骗自己。
tx_emulate_02_222_0001_000017	怎么就那么不会说人话呢？
tx_emulate_02_227_0002_000019	我天天吃糠咽菜的，我不就是顾着你，顾着这个家，顾着孩子吗。
tx_emulate_02_229_0001_000044	怎么啦，我告诉你，我像日本鬼子扫荡一样，把咱家里里外外扫荡了一遍。
tx_emulate_02_231_0008_000034	不好，你们厂里搞歧视。
tx_emulate_02_233_0001_000034	诶呦，说了一句咱俩有夫妻相，说你有三十五岁，瞧把你委屈的。
tx_emulate_02_238_0002_000034	还拿着白头发呢，神经病呀你。
tx_emulate_02_239_0002_000056	别跟着他学的油腔滑调，拈花惹草的。
tx_emulate_02_242_0002_000069	你说清楚了再走。
tx_emulate_02_242_0002_000072	啊，你想她你去找她去呀，你别以为我看不出来，你天天干吗给我脸色看啊。
tx_emulate_02_245_0001_000034	你胡说什么你？
tx_emulate_02_245_0002_000019	我今儿早上才要多多去你宿舍问了，你宿舍同学说你昨晚根本就没回来，你在哪儿过的夜？
tx_emulate_02_245_0002_000035	你知道我叫你来干什么，你看你这作业做的，你糊弄谁呢。
tx_emulate_02_254_0005_000056	你说，我从小到大，他们俩天天吵就没消停过，这叫爱情？
tx_emulate_02_254_0005_000110	诶呦妈呀，您一回来这叫一乱内。
tx_emulate_02_255_0001_000027	瞧你这说的，唉，人家是两口子。
tx_emulate_02_255_0003_000001	你怎么回来啦，请假回来哒？
tx_emulate_02_257_0001_000007	你没看我在这儿研究李大钊传呢嘛我，我这要出书，我写书我要。
tx_emulate_02_257_0001_000008	我说，你别叨叨了行不行，你这辈子还没叨叨够哇？
tx_emulate_02_257_0002_000038	好高骛远，本事不大心气儿不小，你不说往下扽着他点儿，你倒好。
tx_emulate_02_258_0002_000011	还住在你们家？
tx_emulate_02_259_0003_000006	我问这个干吗呀？又不是调查户口，人离了不就得了呗。
tx_emulate_02_260_0001_000024	你看你这臭小子，奥你爸就不能关心关心你啊？
tx_emulate_02_260_0001_000039	诶呦，我还真没有见过光管接电话的太上皇呢。
tx_emulate_02_260_0002_000105	诶呀，我这次没白去呀，我亲眼盯着那个秦香莲把那手印儿给按了。
tx_emulate_02_261_0005_000089	您甭哭了行吗，别再一吵架您就跟我说这个行吗？
tx_emulate_02_261_0005_000107	是大姐帮了我您知道吗？我那是第一次啊我。
tx_emulate_02_263_0002_000014	不行，不行，我又不做生意。
tx_emulate_02_265_0001_000066	如果她怀的是你的孩子，你就要对她负责任，你就得跟她结婚。
tx_emulate_02_265_0002_000026	你要是但凡把我当人看，他们敢这样儿？
tx_emulate_02_265_0002_000087	你给他点儿自尊心吧，你动不动就打。
tx_emulate_02_265_0006_000056	我们俩从一开始就君子之交，都说好啦，背信弃义出尔反尔的是她，我告诉你这件事我是受害者。
tx_xiao_0100101000076 费尽心思想和自己聊天的样子,我自己都烦。 
tx_xiao_0100101000302 妾住长江头,君住长江尾。日日思君不见君,共饮一江水。 
tx_xiao_0100101000330 她倒吸了一口气,难过地垂下眉尾。 
tx_xiao_0100101000408 心,总像被掏空的感觉,只需要深呼吸,然后活在自己的世界里面。 
tx_xiao_0100101000439 不是我选择的,只是被迫接受的。 
tx_xiao_0100103000017 哥哥很爱我,不肯我受委屈,有你我很开心,快乐! 
tx_xiao_0100103000032 据说今晚还是蛮开心哒有饱耳福即是开心吧。 
tx_xiao_0100103000066 去豆捞坊每次都是愉快的用餐体验啊! 
tx_xiao_0100103000069 我家宝宝蹦蹦跳跳像个小兔子一样,真是可爱嘤。 
tx_xiao_0100103000115 码字真的是件愉快的事情啊,写完本儿感言也是。 
tx_xiao_0100103000159 民哥、衷心地祝福你新婚快乐。 
tx_xiao_0100103000171 收拾衣物,发现十年前的衣裤依然能穿,甚是欣喜。 
tx_xiao_0100103000202 天南地北,年月日时,幸福快乐! 
tx_xiao_0100103000268 他又惊又喜,眼睛像通了电的灯泡,募地亮了,一直沉着的脸上露出了笑容。 
tx_xiao_0100103000272 祖国母亲生日快乐,尕叔生日快乐,小刘生日快乐! 
tx_xiao_0100103000306 闫颖,今天是你的生日,祝你生日快乐。 
tx_xiao_0100103000326 喜悦涌进了她的心中,心仿佛荡漾在春水里。 
tx_xiao_0100103000327 今天老婆生日,祝老婆生日快乐! 
tx_xiao_0100103000336 明儿就能见到老公了,兴奋中! 
tx_xiao_0100103000344 用自己打工的钱买了人生第一辆山地车,美利达勇士五百,加油! 
tx_xiao_0100103000354 海绵宝宝之所以快乐不是因为他二,而是有人跟他一起二。 
tx_xiao_0100103000364 突然发现,俺闺女出牙了,开心,高兴,兴奋! 
tx_xiao_0100103000368 丹霞山偶遇一同事,兴奋、幸福! 
tx_xiao_0100103000420 户外烧烤,还能看鳄鱼,开心开心。 
tx_xiao_0100103000425 儿童节快乐,本引大神我披荆斩棘终于回来啦! 
tx_xiao_0100103000429 唱歌唱到嗓子哑了,开心。 
tx_xiao_0100103000459 哎呀呀呀,国庆节,祖国生日快乐啊! 
tx_xiao_0100103000480 祝你新婚快乐,婚后也不要忘了关注我们老大呀! 
tx_xiao_0100104000019 死人宋明城,看到漂亮一点的女生就把自己姓什么给忘了。 
tx_xiao_0100104000064 别跟我提起她!既然你不爱她,当初为什么要从我身边夺走她? 
tx_xiao_0100104000111 我从不是什么善男信女,别用宗教那一套来感化我。 
tx_xiao_0100104000141 那好吧,我不管你们的事儿,不过我希望你能尽快的把事情讲清楚。 
tx_xiao_0100104000143 你还有什么脸说他是你女朋友!我告诉你,离熙儿远点儿! 
tx_xiao_0100104000190 不说话是吧?说不出来啦,对吧?没话说了,对吧?啊? 
tx_xiao_0100104000205 你是怎么缠上武赫的?你缠着一个江半岛还不够吗? 
tx_xiao_0100104000295 怎么可以骗我们呢,走,我们去找他! 
tx_xiao_0100104000354 就算我从未管过你,这次也绝不允许你和那个人的女儿在一起! 
tx_xiao_0100104000428 你少跟我废话,快告诉我你把惠雅拐到哪里去了! 
tx_xiao_0100105000112 你不要废话!谁会喜欢你?好好照顾我哥!我走了! 
tx_xiao_0100105000158 周傲宇,就因为我不了解你所以我才会跟你结婚。 
tx_xiao_0100105000254 臭丫头,还不是在本少爷的英明领导下,你才有这样的风范! 
tx_xiao_0100105000277 想今天放学后留下来擦窗子的,你就再讲,不想的话。 
tx_xiao_0100105000307 我不管你喜不喜欢我,只要是我想要的东西,没人可以阻止,就算是翼也不行。 
tx_xiao_0100105000327 喂,你什么意思啊?人家女生都没嫌你口臭呢,居然给我漱口… 
tx_xiao_0100105000369 奔奔这才知道害羞?嗯?奔奔你不厚道呦,说好我们一起出嫁的。 
tx_xiao_0100105000468 是我推卸责任吗?如果不是你醋意大发,胡乱猜测。 
tx_xiao_0100106000020 到底怎么回事儿啊?真的吗? 
tx_xiao_0100106000061 要凌哥哥怎么个疼法?是亲疼你,还是抱疼你? 
tx_xiao_0100106000077 你们两个也眼瞅着奔三十了,还没搞定咱们那两个警花姐姐? 
tx_xiao_0100106000099 格琳娜都是我最后一个逢场作戏的女人,相信我,好吗? 
tx_xiao_0100106000103 朋友…朋友需要这样的吗? 
tx_xiao_0100106000111 不知道林大少爷会不会舍命来换回她们两个呢? 
tx_xiao_0100106000116 可为什么他看恋夏时不是那种厌恶与不屑的眼神? 
tx_xiao_0100106000182 一个是资本主义的苗,一个是社会主义的草,我不扶持它谁扶持它? 
tx_xiao_0100106000278 哦,对了,你刚刚在外面求雨做什么? 
tx_xiao_0100106000282 那个…牧野什么的,是你回台的原因? 
tx_xiao_0100106000316 死丫头,喂!你怎么来上海了? 
tx_xiao_0100106000317 封皓阳队长不是只教你数学和英语吗? 
tx_xiao_0100106000332 是的。那个婢女是殿下的侍女吗? 
tx_xiao_0100107000103 那么妈妈现在还陪着我。 
tx_xiao_0100107000310 她是我最好最好最好的朋友,请你答应我…照顾她一辈子好吗? 
tx_xiao_0100107000341 因为天辰他患了一种绝症,所以没办法接管牧氏了。 
tx_xiao_0100107000369 如有违誓,我…不用发誓了,我我相信你。 
tx_xiao_0100107000421 所以她选择一个人去接受这一切。 
tx_xiao_0100107000431 这样我就有了很温暖的感觉…现在我连宝宝都失去了。 
tx_xiao_0100107000449 我每天放学回家都小心翼翼的。 
tx_xiao_0200101000712 我小时候贪玩儿,现在才认识到知识的重要性。 
tx_xiao_0200101000731 当初我要是坚定一些,我们三个人就不会是现在这样的结局。 
tx_xiao_0200101000952 主动久了,每个人都会累,不是不爱了,只是心累了。 
tx_xiao_0200103000008 眼前这亮闪闪的宝石,令兰博基尼太惊喜了。 
tx_xiao_0200103000048 果然能沟通成功,功夫不负有心人呀! 
tx_xiao_0200103000053 尽管果子都是脏兮兮的,但我吃着非常好吃! 
tx_xiao_0200103000080 太好了以后我们可以一起冒险了,来击个掌! 
tx_xiao_0200103000095 有点儿兴奋,太渴望与那些高手交手啦。 
tx_xiao_0200103000102 天呐,我竟然长高了,还真有些不敢相信呐。 
tx_xiao_0200103000124 这回能逃过一劫,是苍天眷顾啊! 
tx_xiao_0200103000128 许多人眼眸内都是精光烁烁,很是兴奋。 
tx_xiao_0200103000135 这辈子最令我开心的事儿就是被人夸你瘦了! 
tx_xiao_0200103000148 不敢相信呐,李子兴奋根本无法言表。 
tx_xiao_0200103000153 这么喜欢喊呀,今天我就跟你一起喊个够哦。 
tx_xiao_0200103000162 太久不见闺蜜好友,电扇有些小喜悦。 
tx_xiao_0200103000183 小草在微风中舞蹈,真是美极啦! 
tx_xiao_0200103000184 喜欢听流行歌曲,一听我就停不下来的兴奋。 
tx_xiao_0200103000187 弟弟主动递了杯水给我,太懂事了。 
tx_xiao_0200103000192 今天出席的明星,都是我梦寐以求的呦。 
tx_xiao_0200103000206 嗯我能重新投入这次科研中,可太好啦。 
tx_xiao_0200103000221 在节日的庆典上,让人感觉喜气冲天! 
tx_xiao_0200103000236 我想先祖还活着的话,一定会为我们高兴的。 
tx_xiao_0200103000258 这回考试顺利无比,花花心中挺高兴的。 
tx_xiao_0200103000259 那儿景色不错吧,我爸亲自打理的呢。 
tx_xiao_0200103000289 哟橘子口感不错呀,我也要买。 
tx_xiao_0200103000520 路上略微兴奋,一直在那儿卖弄手机。 
tx_xiao_0200103000535 随后我对他笑笑,他仰天长啸,亦是好不快活。 
tx_xiao_0200103000554 更让人兴奋的是,不但优秀,而且漂亮! 
tx_xiao_0200103000604 林研希今天和姨姨聊得最久了,姨姨都乐开了花! 
tx_xiao_0200103000640 本仙女宣布,我有男朋友啦,哈哈哈,还是正宗的富二代! 
tx_xiao_0200103000648 二零二零年中国终于实现了共同富裕! 
tx_xiao_0200104000527 就你们喜欢叫人同志,在国外都把同性恋称作同志!靠! 
tx_xiao_0200104000609 你还不快滚,别在这里妨碍本姑娘学习。 
tx_xiao_0200104000718 如果你说的解释是欺骗我的那件事儿,还是不要解释得好。 
tx_xiao_0200104000737 肖童这个混蛋,我恨不得亲手杀了他! 
tx_xiao_0200104000848 臭老头,闭上你的嘴。我劝你们还是不要轻举妄动的好。 
tx_xiao_0200104000887 你知不知道当初那么一声不吭地走了让我有多担心? 
tx_xiao_0200105000529 她身体里流的不是血,是冰! 
tx_xiao_0200105000606 大壁虎发怒了,脸爆炸得通红,像一颗星火,落在一盆汽油上。 
tx_xiao_0200105000643 天气很热,作业很多,心情焦虑和烦躁得什么都不想做。 
tx_xiao_0200105000715 老喜欢出阴招,使手段,太不厚道了! 
tx_xiao_0200105000746 一下子拉过凳子,鱼腥草大爷愤怒地坐了下去。 
tx_xiao_0200106000514 你是不是担心我和紫罗要跑去搅局?放心。 
tx_xiao_0200106000526 怎么约会?去哪里?干吗去了?做姐姐的,我不是关心你嘛。 
tx_xiao_0200106000535 没什么啦!趁现在没什么事儿,说说你爷爷到底跟你讲了些什么啊! 
tx_xiao_0200106000563 难道点了就一定要吃吗?我现在很饱,不需要吃。 
tx_xiao_0200106000574 就你那绣花拳头?呵呵。 
tx_xiao_0200106000584 当年写情书的人应该是你吧? 
tx_xiao_0200106000644 怎么?一提起他你就生气。你们俩闹别扭了啊? 
tx_xiao_0200106000737 你还不是这样爱着他啊。 
tx_xiao_0200106000881 后来怎么看出我这个魔鬼就是你心目中的天使的呢? 
tx_xiao_0200107000662 好,我知道了,谢谢你这么坦诚地回答我。 
tx_xiao_0200107000704 这哥们儿貌似不是南方人。 
tx_xiao_0200107000936 从来没有把我当成恋人看待。 
tx_xiao_0200107000972 就算危险随时会来到你也不会离开的对吗?你会吗。 
tx_xiao_0200107000978 研究所也不跟我续签合同了,我只能回国了,连个工作都没有。 
tx_xiao_0200107001042 能一直选择面对,选择用尽办法来挽救我们的婚姻,我能怪他什么啊。 


================================================
FILE: dataset/wav.scp
================================================
tx_emotion_00403000359 dataset/wav/tx_emotion_00403000359.wav
tx_emulate_00_103_0001_000017 dataset/wav/tx_emulate_00_103_0001_000017.wav
tx_emotion_00203000050 dataset/wav/tx_emotion_00203000050.wav
tx_emotion_00203000095 dataset/wav/tx_emotion_00203000095.wav
tx_emulate_00_127_0001_000084 dataset/wav/tx_emulate_00_127_0001_000084.wav
tx_emulate_00_119_0001_000049 dataset/wav/tx_emulate_00_119_0001_000049.wav
tx_emotion_00403000077 dataset/wav/tx_emotion_00403000077.wav
tx_emotion_00203000031 dataset/wav/tx_emotion_00203000031.wav
tx_emotion_00409000285 dataset/wav/tx_emotion_00409000285.wav
tx_xiao_0200103000095 dataset/wav/tx_xiao_0200103000095.wav
tx_emulate_01_012_0025_000092 dataset/wav/tx_emulate_01_012_0025_000092.wav
tx_xiao_0200103000520 dataset/wav/tx_xiao_0200103000520.wav
tx_emotion_00203000066 dataset/wav/tx_emotion_00203000066.wav
tx_emotion_00403000052 dataset/wav/tx_emotion_00403000052.wav
tx_emotion_00403000255 dataset/wav/tx_emotion_00403000255.wav
tx_emotion_00303000023 dataset/wav/tx_emotion_00303000023.wav
tx_emulate_00_108_0004_000039 dataset/wav/tx_emulate_00_108_0004_000039.wav
tx_xiao_0200103000183 dataset/wav/tx_xiao_0200103000183.wav
tx_emotion_00303000465 dataset/wav/tx_emotion_00303000465.wav
tx_emotion_00303000127 dataset/wav/tx_emotion_00303000127.wav
tx_xiao_0200103000604 dataset/wav/tx_xiao_0200103000604.wav
tx_emotion_00203000065 dataset/wav/tx_emotion_00203000065.wav
tx_xiao_0200103000162 dataset/wav/tx_xiao_0200103000162.wav
tx_emotion_00203000485 dataset/wav/tx_emotion_00203000485.wav
tx_xiao_0100105000254 dataset/wav/tx_xiao_0100105000254.wav
tx_emulate_00_185_0002_000007 dataset/wav/tx_emulate_00_185_0002_000007.wav
tx_emulate_00_157_0001_000031 dataset/wav/tx_emulate_00_157_0001_000031.wav
tx_xiao_0200103000187 dataset/wav/tx_xiao_0200103000187.wav
tx_xiao_0100103000480 dataset/wav/tx_xiao_0100103000480.wav
tx_xiao_0200103000148 dataset/wav/tx_xiao_0200103000148.wav
tx_emulate_01_216_0004_000039 dataset/wav/tx_emulate_01_216_0004_000039.wav
tx_emulate_00_119_0001_000001 dataset/wav/tx_emulate_00_119_0001_000001.wav
tx_emulate_00_144_0007_000056 dataset/wav/tx_emulate_00_144_0007_000056.wav
tx_xiao_0200103000124 dataset/wav/tx_xiao_0200103000124.wav
tx_xiao_0200103000102 dataset/wav/tx_xiao_0200103000102.wav
tx_emulate_00_110_0004_000059 dataset/wav/tx_emulate_00_110_0004_000059.wav
tx_xiao_0100103000069 dataset/wav/tx_xiao_0100103000069.wav
tx_emulate_00_121_0002_000009 dataset/wav/tx_emulate_00_121_0002_000009.wav
tx_xiao_0200103000128 dataset/wav/tx_xiao_0200103000128.wav
tx_emulate_00_151_0001_000018 dataset/wav/tx_emulate_00_151_0001_000018.wav
tx_emotion_00403000432 dataset/wav/tx_emotion_00403000432.wav
tx_emotion_00403000173 dataset/wav/tx_emotion_00403000173.wav
tx_emulate_01_005_0004_000013 dataset/wav/tx_emulate_01_005_0004_000013.wav
tx_emulate_00_123_0002_000003 dataset/wav/tx_emulate_00_123_0002_000003.wav
tx_emotion_00403000099 dataset/wav/tx_emotion_00403000099.wav
tx_emotion_00303000396 dataset/wav/tx_emotion_00303000396.wav
tx_emulate_00_137_0002_000079 dataset/wav/tx_emulate_00_137_0002_000079.wav
tx_xiao_0100103000459 dataset/wav/tx_xiao_0100103000459.wav
tx_xiao_0100103000171 dataset/wav/tx_xiao_0100103000171.wav
tx_xiao_0100103000268 dataset/wav/tx_xiao_0100103000268.wav
tx_xiao_0200103000554 dataset/wav/tx_xiao_0200103000554.wav
tx_emulate_00_129_0004_000056 dataset/wav/tx_emulate_00_129_0004_000056.wav
tx_emulate_01_032_0011_000017 dataset/wav/tx_emulate_01_032_0011_000017.wav
tx_emotion_00303000474 dataset/wav/tx_emotion_00303000474.wav
tx_emotion_00303000462 dataset/wav/tx_emotion_00303000462.wav
tx_emulate_01_006_0020_000094 dataset/wav/tx_emulate_01_006_0020_000094.wav
tx_emulate_01_014_0011_000061 dataset/wav/tx_emulate_01_014_0011_000061.wav
tx_emotion_00303000310 dataset/wav/tx_emotion_00303000310.wav
tx_emotion_00302000107 dataset/wav/tx_emotion_00302000107.wav
tx_emulate_01_003_0003_000018 dataset/wav/tx_emulate_01_003_0003_000018.wav
tx_xiao_0100103000032 dataset/wav/tx_xiao_0100103000032.wav
tx_xiao_0200103000206 dataset/wav/tx_xiao_0200103000206.wav
tx_emulate_00_137_0003_000022 dataset/wav/tx_emulate_00_137_0003_000022.wav
tx_emotion_00403000131 dataset/wav/tx_emotion_00403000131.wav
tx_emulate_00_115_0002_000007 dataset/wav/tx_emulate_00_115_0002_000007.wav
tx_xiao_0200103000289 dataset/wav/tx_xiao_0200103000289.wav
tx_xiao_0100103000306 dataset/wav/tx_xiao_0100103000306.wav
tx_emulate_01_018_0003_000014 dataset/wav/tx_emulate_01_018_0003_000014.wav
tx_emotion_00309000124 dataset/wav/tx_emotion_00309000124.wav
tx_xiao_0200103000048 dataset/wav/tx_xiao_0200103000048.wav
tx_emulate_00_130_0003_000017 dataset/wav/tx_emulate_00_130_0003_000017.wav
tx_xiao_0100103000336 dataset/wav/tx_xiao_0100103000336.wav
tx_xiao_0100106000061 dataset/wav/tx_xiao_0100106000061.wav
tx_emulate_00_118_0002_000020 dataset/wav/tx_emulate_00_118_0002_000020.wav
tx_xiao_0200103000259 dataset/wav/tx_xiao_0200103000259.wav
tx_emotion_00403000451 dataset/wav/tx_emotion_00403000451.wav
tx_xiao_0100103000429 dataset/wav/tx_xiao_0100103000429.wav
tx_xiao_0100103000364 dataset/wav/tx_xiao_0100103000364.wav
tx_emotion_00303000363 dataset/wav/tx_emotion_00303000363.wav
tx_emotion_00403000494 dataset/wav/tx_emotion_00403000494.wav
tx_xiao_0200103000258 dataset/wav/tx_xiao_0200103000258.wav
tx_emotion_00303000211 dataset/wav/tx_emotion_00303000211.wav
tx_emulate_00_112_0002_000070 dataset/wav/tx_emulate_00_112_0002_000070.wav
tx_emotion_00203000399 dataset/wav/tx_emotion_00203000399.wav
tx_emotion_00206000458 dataset/wav/tx_emotion_00206000458.wav
tx_emotion_00203000448 dataset/wav/tx_emotion_00203000448.wav
tx_xiao_0200103000640 dataset/wav/tx_xiao_0200103000640.wav
tx_emotion_00403000157 dataset/wav/tx_emotion_00403000157.wav
tx_xiao_0100105000369 dataset/wav/tx_xiao_0100105000369.wav
tx_emotion_00303000158 dataset/wav/tx_emotion_00303000158.wav
tx_emotion_00203000034 dataset/wav/tx_emotion_00203000034.wav
tx_xiao_0100103000066 dataset/wav/tx_xiao_0100103000066.wav
tx_xiao_0100103000368 dataset/wav/tx_xiao_0100103000368.wav
tx_xiao_0100103000420 dataset/wav/tx_xiao_0100103000420.wav
tx_emulate_01_190_0001_000042 dataset/wav/tx_emulate_01_190_0001_000042.wav
tx_xiao_0200103000184 dataset/wav/tx_xiao_0200103000184.wav
tx_emulate_00_151_0007_000028 dataset/wav/tx_emulate_00_151_0007_000028.wav
tx_emulate_00_122_0001_000025 dataset/wav/tx_emulate_00_122_0001_000025.wav
tx_emulate_01_034_0003_000030 dataset/wav/tx_emulate_01_034_0003_000030.wav
tx_emotion_00403000395 dataset/wav/tx_emotion_00403000395.wav
tx_emotion_00303000130 dataset/wav/tx_emotion_00303000130.wav
tx_xiao_0100103000159 dataset/wav/tx_xiao_0100103000159.wav
tx_emulate_00_135_0002_000003 dataset/wav/tx_emulate_00_135_0002_000003.wav
tx_emotion_00203000128 dataset/wav/tx_emotion_00203000128.wav
tx_emotion_00403000252 dataset/wav/tx_emotion_00403000252.wav
tx_emulate_00_125_0002_000058 dataset/wav/tx_emulate_00_125_0002_000058.wav
tx_emulate_01_032_0011_000020 dataset/wav/tx_emulate_01_032_0011_000020.wav
tx_emotion_00203000086 dataset/wav/tx_emotion_00203000086.wav
tx_emotion_00403000163 dataset/wav/tx_emotion_00403000163.wav
tx_emotion_00203000379 dataset/wav/tx_emotion_00203000379.wav
tx_xiao_0200103000236 dataset/wav/tx_xiao_0200103000236.wav
tx_emotion_00203000019 dataset/wav/tx_emotion_00203000019.wav
tx_emotion_00203000352 dataset/wav/tx_emotion_00203000352.wav
tx_emotion_00203000273 dataset/wav/tx_emotion_00203000273.wav
tx_emotion_00403000060 dataset/wav/tx_emotion_00403000060.wav
tx_emotion_00403000482 dataset/wav/tx_emotion_00403000482.wav
tx_xiao_0200103000053 dataset/wav/tx_xiao_0200103000053.wav
tx_emotion_00203000069 dataset/wav/tx_emotion_00203000069.wav
tx_emulate_00_186_0001_000023 dataset/wav/tx_emulate_00_186_0001_000023.wav
tx_emulate_00_108_0002_000070 dataset/wav/tx_emulate_00_108_0002_000070.wav
tx_emulate_00_060_0001_000030 dataset/wav/tx_emulate_00_060_0001_000030.wav
tx_emulate_00_179_0005_000021 dataset/wav/tx_emulate_00_179_0005_000021.wav
tx_emulate_00_110_0002_000028 dataset/wav/tx_emulate_00_110_0002_000028.wav
tx_emotion_00403000393 dataset/wav/tx_emotion_00403000393.wav
tx_emulate_02_255_0003_000001 dataset/wav/tx_emulate_02_255_0003_000001.wav
tx_xiao_0100103000344 dataset/wav/tx_xiao_0100103000344.wav
tx_xiao_0100106000077 dataset/wav/tx_xiao_0100106000077.wav
tx_emulate_02_044_0010_000061 dataset/wav/tx_emulate_02_044_0010_000061.wav
tx_emotion_00309000349 dataset/wav/tx_emotion_00309000349.wav
tx_emotion_00203000072 dataset/wav/tx_emotion_00203000072.wav
tx_emotion_00303000052 dataset/wav/tx_emotion_00303000052.wav
tx_emotion_00203000425 dataset/wav/tx_emotion_00203000425.wav
tx_emotion_00303000182 dataset/wav/tx_emotion_00303000182.wav
tx_emotion_00409000325 dataset/wav/tx_emotion_00409000325.wav
tx_xiao_0100103000272 dataset/wav/tx_xiao_0100103000272.wav
tx_emotion_00303000360 dataset/wav/tx_emotion_00303000360.wav
tx_emotion_00303000198 dataset/wav/tx_emotion_00303000198.wav
tx_emotion_00303000046 dataset/wav/tx_emotion_00303000046.wav
tx_emotion_00403000197 dataset/wav/tx_emotion_00403000197.wav
tx_emotion_00303000236 dataset/wav/tx_emotion_00303000236.wav
tx_xiao_0200106000514 dataset/wav/tx_xiao_0200106000514.wav
tx_emotion_00203000470 dataset/wav/tx_emotion_00203000470.wav
tx_xiao_0100103000115 dataset/wav/tx_xiao_0100103000115.wav
tx_emotion_00203000486 dataset/wav/tx_emotion_00203000486.wav
tx_xiao_0100103000017 dataset/wav/tx_xiao_0100103000017.wav
tx_xiao_0100103000202 dataset/wav/tx_xiao_0100103000202.wav
tx_emotion_00209000026 dataset/wav/tx_emotion_00209000026.wav
tx_xiao_0200103000535 dataset/wav/tx_xiao_0200103000535.wav
tx_emotion_00303000389 dataset/wav/tx_emotion_00303000389.wav
tx_emulate_01_031_0009_000032 dataset/wav/tx_emulate_01_031_0009_000032.wav
tx_emulate_00_150_0005_000005 dataset/wav/tx_emulate_00_150_0005_000005.wav
tx_emulate_01_020_0007_000012 dataset/wav/tx_emulate_01_020_0007_000012.wav
tx_xiao_0200103000192 dataset/wav/tx_xiao_0200103000192.wav
tx_emotion_00303000173 dataset/wav/tx_emotion_00303000173.wav
tx_emotion_00403000175 dataset/wav/tx_emotion_00403000175.wav
tx_emulate_01_190_0001_000041 dataset/wav/tx_emulate_01_190_0001_000041.wav
tx_emulate_01_030_0014_000035 dataset/wav/tx_emulate_01_030_0014_000035.wav
tx_xiao_0100103000326 dataset/wav/tx_xiao_0100103000326.wav
tx_xiao_0100103000354 dataset/wav/tx_xiao_0100103000354.wav
tx_emotion_00303000260 dataset/wav/tx_emotion_00303000260.wav
tx_xiao_0200106000881 dataset/wav/tx_xiao_0200106000881.wav
tx_emotion_00303000457 dataset/wav/tx_emotion_00303000457.wav
tx_emotion_00203000412 dataset/wav/tx_emotion_00203000412.wav
tx_emotion_00403000143 dataset/wav/tx_emotion_00403000143.wav
tx_xiao_0200103000648 dataset/wav/tx_xiao_0200103000648.wav
tx_emotion_00209000269 dataset/wav/tx_emotion_00209000269.wav
tx_emotion_00303000129 dataset/wav/tx_emotion_00303000129.wav
tx_emotion_00309000324 dataset/wav/tx_emotion_00309000324.wav
tx_emulate_00_137_0005_000092 dataset/wav/tx_emulate_00_137_0005_000092.wav
tx_emotion_00303000056 dataset/wav/tx_emotion_00303000056.wav
tx_emotion_00203000372 dataset/wav/tx_emotion_00203000372.wav
tx_emotion_00303000117 dataset/wav/tx_emotion_00303000117.wav
tx_xiao_0200103000153 dataset/wav/tx_xiao_0200103000153.wav
tx_emotion_00303000298 dataset/wav/tx_emotion_00303000298.wav
tx_emotion_00303000167 dataset/wav/tx_emotion_00303000167.wav
tx_xiao_0100103000425 dataset/wav/tx_xiao_0100103000425.wav
tx_xiao_0200103000221 dataset/wav/tx_xiao_0200103000221.wav
tx_emulate_00_132_0004_000026 dataset/wav/tx_emulate_00_132_0004_000026.wav
tx_emotion_00303000148 dataset/wav/tx_emotion_00303000148.wav
tx_emotion_00303000119 dataset/wav/tx_emotion_00303000119.wav
tx_xiao_0200103000080 dataset/wav/tx_xiao_0200103000080.wav
tx_emulate_00_150_0005_000006 dataset/wav/tx_emulate_00_150_0005_000006.wav
tx_emulate_00_112_0001_000066 dataset/wav/tx_emulate_00_112_0001_000066.wav
tx_emulate_00_111_0001_000042 dataset/wav/tx_emulate_00_111_0001_000042.wav
tx_emotion_00403000439 dataset/wav/tx_emotion_00403000439.wav
tx_xiao_0100105000307 dataset/wav/tx_xiao_0100105000307.wav
tx_xiao_0100103000327 dataset/wav/tx_xiao_0100103000327.wav
tx_emotion_00303000082 dataset/wav/tx_emotion_00303000082.wav
tx_emulate_00_112_0005_000089 dataset/wav/tx_emulate_00_112_0005_000089.wav
tx_emotion_00403000145 dataset/wav/tx_emotion_00403000145.wav
tx_emotion_00203000364 dataset/wav/tx_emotion_00203000364.wav
tx_emulate_02_263_0002_000014 dataset/wav/tx_emulate_02_263_0002_000014.wav
tx_emotion_00303000354 dataset/wav/tx_emotion_00303000354.wav
tx_emulate_02_044_0001_000032 dataset/wav/tx_emulate_02_044_0001_000032.wav
tx_emotion_00203000357 dataset/wav/tx_emotion_00203000357.wav
tx_emotion_00303000421 dataset/wav/tx_emotion_00303000421.wav
tx_emulate_02_042_0003_000154 dataset/wav/tx_emulate_02_042_0003_000154.wav
tx_emotion_00303000433 dataset/wav/tx_emotion_00303000433.wav
tx_emulate_01_004_0006_000023 dataset/wav/tx_emulate_01_004_0006_000023.wav
tx_emulate_01_020_0001_000004 dataset/wav/tx_emulate_01_020_0001_000004.wav
tx_xiao_0200104000737 dataset/wav/tx_xiao_0200104000737.wav
tx_xiao_0200103000135 dataset/wav/tx_xiao_0200103000135.wav
tx_emulate_02_254_0005_000056 dataset/wav/tx_emulate_02_254_0005_000056.wav
tx_emulate_01_007_0021_000120 dataset/wav/tx_emulate_01_007_0021_000120.wav
tx_emotion_00302000027 dataset/wav/tx_emotion_00302000027.wav
tx_emulate_02_261_0005_000107 dataset/wav/tx_emulate_02_261_0005_000107.wav
tx_emulate_01_031_0011_000045 dataset/wav/tx_emulate_01_031_0011_000045.wav
tx_emulate_01_205_0002_000018 dataset/wav/tx_emulate_01_205_0002_000018.wav
tx_emulate_00_162_0005_000004 dataset/wav/tx_emulate_00_162_0005_000004.wav
tx_xiao_0200105000746 dataset/wav/tx_xiao_0200105000746.wav
tx_emulate_00_115_0010_000012 dataset/wav/tx_emulate_00_115_0010_000012.wav
tx_emotion_00202000429 dataset/wav/tx_emotion_00202000429.wav
tx_emotion_00302000090 dataset/wav/tx_emotion_00302000090.wav
tx_emotion_00400000011 dataset/wav/tx_emotion_00400000011.wav
tx_emulate_00_125_0008_000017 dataset/wav/tx_emulate_00_125_0008_000017.wav
tx_emotion_00306000460 dataset/wav/tx_emotion_00306000460.wav
tx_emulate_00_128_0006_000033 dataset/wav/tx_emulate_00_128_0006_000033.wav
tx_xiao_0200106000644 dataset/wav/tx_xiao_0200106000644.wav
tx_emotion_00400000160 dataset/wav/tx_emotion_00400000160.wav
tx_emotion_00210000419 dataset/wav/tx_emotion_00210000419.wav
tx_emulate_01_023_0010_000063 dataset/wav/tx_emulate_01_023_0010_000063.wav
tx_emotion_00402000196 dataset/wav/tx_emotion_00402000196.wav
tx_emotion_00206000149 dataset/wav/tx_emotion_00206000149.wav
tx_emotion_00309000452 dataset/wav/tx_emotion_00309000452.wav
tx_emulate_00_108_0004_000011 dataset/wav/tx_emulate_00_108_0004_000011.wav
tx_emulate_01_009_0003_000026 dataset/wav/tx_emulate_01_009_0003_000026.wav
tx_emulate_00_110_0004_000099 dataset/wav/tx_emulate_00_110_0004_000099.wav
tx_xiao_0200106000535 dataset/wav/tx_xiao_0200106000535.wav
tx_emulate_02_046_0004_000012 dataset/wav/tx_emulate_02_046_0004_000012.wav
tx_emulate_00_132_0005_000016 dataset/wav/tx_emulate_00_132_0005_000016.wav
tx_emotion_00206000315 dataset/wav/tx_emotion_00206000315.wav
tx_emulate_01_023_0010_000064 dataset/wav/tx_emulate_01_023_0010_000064.wav
tx_emulate_01_001_0019_000077 dataset/wav/tx_emulate_01_001_0019_000077.wav
tx_emulate_01_021_0003_000015 dataset/wav/tx_emulate_01_021_0003_000015.wav
tx_xiao_0100107000341 dataset/wav/tx_xiao_0100107000341.wav
tx_emotion_00306000405 dataset/wav/tx_emotion_00306000405.wav
tx_emotion_00205000265 dataset/wav/tx_emotion_00205000265.wav
tx_emotion_00301000185 dataset/wav/tx_emotion_00301000185.wav
tx_xiao_0100105000327 dataset/wav/tx_xiao_0100105000327.wav
tx_emotion_00403000147 dataset/wav/tx_emotion_00403000147.wav
tx_emotion_00302000016 dataset/wav/tx_emotion_00302000016.wav
tx_emulate_01_031_0015_000061 dataset/wav/tx_emulate_01_031_0015_000061.wav
tx_emotion_00206000186 dataset/wav/tx_emotion_00206000186.wav
tx_emotion_00302000304 dataset/wav/tx_emotion_00302000304.wav
tx_xiao_0100106000282 dataset/wav/tx_xiao_0100106000282.wav
tx_emulate_02_055_0015_000044 dataset/wav/tx_emulate_02_055_0015_000044.wav
tx_emulate_02_257_0001_000007 dataset/wav/tx_emulate_02_257_0001_000007.wav
tx_emulate_01_033_0004_000039 dataset/wav/tx_emulate_01_033_0004_000039.wav
tx_emotion_00206000235 dataset/wav/tx_emotion_00206000235.wav
tx_emotion_00309000154 dataset/wav/tx_emotion_00309000154.wav
tx_emotion_00309000183 dataset/wav/tx_emotion_00309000183.wav
tx_emotion_00202000136 dataset/wav/tx_emotion_00202000136.wav
tx_emotion_00409000081 dataset/wav/tx_emotion_00409000081.wav
tx_emulate_02_043_0003_000020 dataset/wav/tx_emulate_02_043_0003_000020.wav
tx_emotion_00305000005 dataset/wav/tx_emotion_00305000005.wav
tx_emulate_02_260_0001_000024 dataset/wav/tx_emulate_02_260_0001_000024.wav
tx_emulate_01_007_0025_000099 dataset/wav/tx_emulate_01_007_0025_000099.wav
tx_emotion_00305000308 dataset/wav/tx_emotion_00305000308.wav
tx_emulate_00_123_0001_000048 dataset/wav/tx_emulate_00_123_0001_000048.wav
tx_emotion_00203000111 dataset/wav/tx_emotion_00203000111.wav
tx_emulate_01_013_0002_000004 dataset/wav/tx_emulate_01_013_0002_000004.wav
tx_emotion_00202000395 dataset/wav/tx_emotion_00202000395.wav
tx_emotion_00305000183 dataset/wav/tx_emotion_00305000183.wav
tx_emotion_00209000346 dataset/wav/tx_emotion_00209000346.wav
tx_xiao_0100106000111 dataset/wav/tx_xiao_0100106000111.wav
tx_emotion_00209000005 dataset/wav/tx_emotion_00209000005.wav
tx_emulate_00_109_0004_000071 dataset/wav/tx_emulate_00_109_0004_000071.wav
tx_emotion_00303000080 dataset/wav/tx_emotion_00303000080.wav
tx_emulate_00_114_0003_000026 dataset/wav/tx_emulate_00_114_0003_000026.wav
tx_emulate_01_209_0001_000021 dataset/wav/tx_emulate_01_209_0001_000021.wav
tx_emotion_00302000470 dataset/wav/tx_emotion_00302000470.wav
tx_emotion_00309000172 dataset/wav/tx_emotion_00309000172.wav
tx_emulate_00_129_0004_000057 dataset/wav/tx_emulate_00_129_0004_000057.wav
tx_emotion_00306000101 dataset/wav/tx_emotion_00306000101.wav
tx_xiao_0200106000526 dataset/wav/tx_xiao_0200106000526.wav
tx_emulate_00_108_0001_000008 dataset/wav/tx_emulate_00_108_0001_000008.wav
tx_emotion_00409000323 dataset/wav/tx_emotion_00409000323.wav
tx_emotion_00400000259 dataset/wav/tx_emotion_00400000259.wav
tx_emulate_01_014_0016_000086 dataset/wav/tx_emulate_01_014_0016_000086.wav
tx_emulate_00_117_0002_000013 dataset/wav/tx_emulate_00_117_0002_000013.wav
tx_emulate_00_112_0002_000016 dataset/wav/tx_emulate_00_112_0002_000016.wav
tx_emulate_02_258_0002_000011 dataset/wav/tx_emulate_02_258_0002_000011.wav
tx_emulate_00_129_0005_000029 dataset/wav/tx_emulate_00_129_0005_000029.wav
tx_emulate_02_057_0010_000047 dataset/wav/tx_emulate_02_057_0010_000047.wav
tx_emulate_01_210_0009_000010 dataset/wav/tx_emulate_01_210_0009_000010.wav
tx_emulate_02_045_0002_000003 dataset/wav/tx_emulate_02_045_0002_000003.wav
tx_xiao_0100106000316 dataset/wav/tx_xiao_0100106000316.wav
tx_emulate_01_007_0008_000034 dataset/wav/tx_emulate_01_007_0008_000034.wav
tx_xiao_0100105000468 dataset/wav/tx_xiao_0100105000468.wav
tx_emulate_00_109_0004_000042 dataset/wav/tx_emulate_00_109_0004_000042.wav
tx_xiao_0100104000190 dataset/wav/tx_xiao_0100104000190.wav
tx_emotion_00305000009 dataset/wav/tx_emotion_00305000009.wav
tx_emotion_00206000167 dataset/wav/tx_emotion_00206000167.wav
tx_emulate_02_006_0002_000017 dataset/wav/tx_emulate_02_006_0002_000017.wav
tx_emotion_00206000240 dataset/wav/tx_emotion_00206000240.wav
tx_emulate_02_046_0022_000084 dataset/wav/tx_emulate_02_046_0022_000084.wav
tx_emotion_00409000007 dataset/wav/tx_emotion_00409000007.wav
tx_emulate_02_260_0001_000039 dataset/wav/tx_emulate_02_260_0001_000039.wav
tx_emulate_02_245_0002_000019 dataset/wav/tx_emulate_02_245_0002_000019.wav
tx_emulate_01_034_0005_000040 dataset/wav/tx_emulate_01_034_0005_000040.wav
tx_emulate_00_130_0001_000057 dataset/wav/tx_emulate_00_130_0001_000057.wav
tx_xiao_0100107000369 dataset/wav/tx_xiao_0100107000369.wav
tx_xiao_0200106000574 dataset/wav/tx_xiao_0200106000574.wav
tx_emulate_02_056_0005_000027 dataset/wav/tx_emulate_02_056_0005_000027.wav
tx_emotion_00306000261 dataset/wav/tx_emotion_00306000261.wav
tx_xiao_0100106000332 dataset/wav/tx_xiao_0100106000332.wav
tx_emulate_02_045_0001_000001 dataset/wav/tx_emulate_02_045_0001_000001.wav
tx_emulate_02_257_0002_000038 dataset/wav/tx_emulate_02_257_0002_000038.wav
tx_emulate_00_109_0005_000085 dataset/wav/tx_emulate_00_109_0005_000085.wav
tx_xiao_0100106000020 dataset/wav/tx_xiao_0100106000020.wav
tx_emotion_00206000480 dataset/wav/tx_emotion_00206000480.wav
tx_emulate_01_016_0023_000150 dataset/wav/tx_emulate_01_016_0023_000150.wav
tx_emulate_01_005_0005_000022 dataset/wav/tx_emulate_01_005_0005_000022.wav
tx_xiao_0200104000848 dataset/wav/tx_xiao_0200104000848.wav
tx_emotion_00206000061 dataset/wav/tx_emotion_00206000061.wav
tx_emulate_01_011_0011_000059 dataset/wav/tx_emulate_01_011_0011_000059.wav
tx_emotion_00400000196 dataset/wav/tx_emotion_00400000196.wav
tx_xiao_0100106000103 dataset/wav/tx_xiao_0100106000103.wav
tx_emotion_00409000119 dataset/wav/tx_emotion_00409000119.wav
tx_emotion_00204000494 dataset/wav/tx_emotion_00204000494.wav
tx_emulate_01_011_0005_000017 dataset/wav/tx_emulate_01_011_0005_000017.wav
tx_xiao_0100106000317 dataset/wav/tx_xiao_0100106000317.wav
tx_emulate_01_033_0006_000075 dataset/wav/tx_emulate_01_033_0006_000075.wav
tx_emulate_02_039_0002_000034 dataset/wav/tx_emulate_02_039_0002_000034.wav
tx_emulate_01_002_0007_000012 dataset/wav/tx_emulate_01_002_0007_000012.wav
tx_emulate_00_108_0001_000079 dataset/wav/tx_emulate_00_108_0001_000079.wav
tx_emulate_01_010_0012_000085 dataset/wav/tx_emulate_01_010_0012_000085.wav
tx_xiao_0100105000158 dataset/wav/tx_xiao_0100105000158.wav
tx_emulate_00_131_0005_000061 dataset/wav/tx_emulate_00_131_0005_000061.wav
tx_emulate_02_259_0003_000006 dataset/wav/tx_emulate_02_259_0003_000006.wav
tx_xiao_0200106000563 dataset/wav/tx_xiao_0200106000563.wav
tx_xiao_0100105000277 dataset/wav/tx_xiao_0100105000277.wav
tx_emotion_00409000228 dataset/wav/tx_emotion_00409000228.wav
tx_emotion_00202000436 dataset/wav/tx_emotion_00202000436.wav
tx_emulate_00_114_0001_000001 dataset/wav/tx_emulate_00_114_0001_000001.wav
tx_emulate_02_042_0004_000065 dataset/wav/tx_emulate_02_042_0004_000065.wav
tx_emulate_01_013_0007_000032 dataset/wav/tx_emulate_01_013_0007_000032.wav
tx_xiao_0100104000295 dataset/wav/tx_xiao_0100104000295.wav
tx_emulate_02_006_0009_000056 dataset/wav/tx_emulate_02_006_0009_000056.wav
tx_xiao_0100104000354 dataset/wav/tx_xiao_0100104000354.wav
tx_emotion_00309000167 dataset/wav/tx_emotion_00309000167.wav
tx_emotion_00401000087 dataset/wav/tx_emotion_00401000087.wav
tx_emulate_01_011_0009_000047 dataset/wav/tx_emulate_01_011_0009_000047.wav
tx_emulate_00_110_0002_000038 dataset/wav/tx_emulate_00_110_0002_000038.wav
tx_emulate_01_202_0001_000005 dataset/wav/tx_emulate_01_202_0001_000005.wav
tx_emotion_00206000337 dataset/wav/tx_emotion_00206000337.wav
tx_xiao_0200106000584 dataset/wav/tx_xiao_0200106000584.wav
tx_emulate_01_022_0022_000108 dataset/wav/tx_emulate_01_022_0022_000108.wav
tx_emulate_01_016_0007_000035 dataset/wav/tx_emulate_01_016_0007_000035.wav
tx_emulate_02_002_0016_000058 dataset/wav/tx_emulate_02_002_0016_000058.wav
tx_emotion_00205000127 dataset/wav/tx_emotion_00205000127.wav
tx_emulate_01_016_0008_000037 dataset/wav/tx_emulate_01_016_0008_000037.wav
tx_emulate_01_023_0014_000081 dataset/wav/tx_emulate_01_023_0014_000081.wav
tx_emotion_00409000441 dataset/wav/tx_emotion_00409000441.wav
tx_emotion_00400000026 dataset/wav/tx_emotion_00400000026.wav
tx_emulate_02_004_0004_000018 dataset/wav/tx_emulate_02_004_0004_000018.wav
tx_xiao_0100106000278 dataset/wav/tx_xiao_0100106000278.wav
tx_emulate_00_123_0009_000012 dataset/wav/tx_emulate_00_123_0009_000012.wav
tx_emulate_02_265_0002_000026 dataset/wav/tx_emulate_02_265_0002_000026.wav
tx_emotion_00309000269 dataset/wav/tx_emotion_00309000269.wav
tx_emulate_02_239_0002_000056 dataset/wav/tx_emulate_02_239_0002_000056.wav
tx_emulate_00_159_0001_000006 dataset/wav/tx_emulate_00_159_0001_000006.wav
tx_emotion_00209000060 dataset/wav/tx_emotion_00209000060.wav
tx_emotion_00306000202 dataset/wav/tx_emotion_00306000202.wav
tx_emulate_01_032_0018_000042 dataset/wav/tx_emulate_01_032_0018_000042.wav
tx_emotion_00306000098 dataset/wav/tx_emotion_00306000098.wav
tx_emulate_01_008_0017_000077 dataset/wav/tx_emulate_01_008_0017_000077.wav
tx_emulate_02_008_0020_000108 dataset/wav/tx_emulate_02_008_0020_000108.wav
tx_emulate_00_114_0002_000030 dataset/wav/tx_emulate_00_114_0002_000030.wav
tx_xiao_0100106000182 dataset/wav/tx_xiao_0100106000182.wav
tx_emulate_02_042_0001_000128 dataset/wav/tx_emulate_02_042_0001_000128.wav
tx_emulate_00_162_0002_000075 dataset/wav/tx_emulate_00_162_0002_000075.wav
tx_emotion_00305000280 dataset/wav/tx_emotion_00305000280.wav
tx_emulate_01_016_0016_000067 dataset/wav/tx_emulate_01_016_0016_000067.wav
tx_emulate_00_113_0002_000003 dataset/wav/tx_emulate_00_113_0002_000003.wav
tx_emulate_00_126_0001_000039 dataset/wav/tx_emulate_00_126_0001_000039.wav
tx_emulate_00_133_0005_000062 dataset/wav/tx_emulate_00_133_0005_000062.wav
tx_emulate_00_120_0002_000071 dataset/wav/tx_emulate_00_120_0002_000071.wav
tx_emotion_00202000047 dataset/wav/tx_emotion_00202000047.wav
tx_emulate_01_016_0003_000006 dataset/wav/tx_emulate_01_016_0003_000006.wav
tx_emulate_00_137_0003_000082 dataset/wav/tx_emulate_00_137_0003_000082.wav
tx_emulate_01_013_0007_000025 dataset/wav/tx_emulate_01_013_0007_000025.wav
tx_emulate_02_261_0005_000089 dataset/wav/tx_emulate_02_261_0005_000089.wav
tx_emotion_00302000224 dataset/wav/tx_emotion_00302000224.wav
tx_emulate_01_007_0024_000091 dataset/wav/tx_emulate_01_007_0024_000091.wav
tx_xiao_0100106000099 dataset/wav/tx_xiao_0100106000099.wav
tx_emulate_00_118_0005_000032 dataset/wav/tx_emulate_00_118_0005_000032.wav
tx_emulate_02_229_0001_000044 dataset/wav/tx_emulate_02_229_0001_000044.wav
tx_emotion_00204000426 dataset/wav/tx_emotion_00204000426.wav
tx_xiao_0100104000205 dataset/wav/tx_xiao_0100104000205.wav
tx_emotion_00305000383 dataset/wav/tx_emotion_00305000383.wav
tx_emotion_00302000216 dataset/wav/tx_emotion_00302000216.wav
tx_emulate_02_238_0002_000034 dataset/wav/tx_emulate_02_238_0002_000034.wav
tx_emotion_00400000378 dataset/wav/tx_emotion_00400000378.wav
tx_emulate_00_125_0002_000049 dataset/wav/tx_emulate_00_125_0002_000049.wav
tx_xiao_0200101000952 dataset/wav/tx_xiao_0200101000952.wav
tx_xiao_0200104000718 dataset/wav/tx_xiao_0200104000718.wav
tx_emotion_00206000380 dataset/wav/tx_emotion_00206000380.wav
tx_emulate_01_199_0001_000022 dataset/wav/tx_emulate_01_199_0001_000022.wav
tx_xiao_0200105000715 dataset/wav/tx_xiao_0200105000715.wav
tx_emulate_02_051_0001_000002 dataset/wav/tx_emulate_02_051_0001_000002.wav
tx_emulate_01_203_0001_000002 dataset/wav/tx_emulate_01_203_0001_000002.wav
tx_emulate_00_158_0002_000049 dataset/wav/tx_emulate_00_158_0002_000049.wav
tx_emotion_00401000297 dataset/wav/tx_emotion_00401000297.wav
tx_xiao_0100104000428 dataset/wav/tx_xiao_0100104000428.wav
tx_emulate_00_119_0001_000034 dataset/wav/tx_emulate_00_119_0001_000034.wav
tx_emulate_01_200_0002_000001 dataset/wav/tx_emulate_01_200_0002_000001.wav
tx_emotion_00301000494 dataset/wav/tx_emotion_00301000494.wav
tx_emotion_00304000459 dataset/wav/tx_emotion_00304000459.wav
tx_emulate_00_112_0006_000020 dataset/wav/tx_emulate_00_112_0006_000020.wav
tx_xiao_0200107001042 dataset/wav/tx_xiao_0200107001042.wav
tx_emotion_00201000467 dataset/wav/tx_emotion_00201000467.wav
tx_emotion_00401000440 dataset/wav/tx_emotion_00401000440.wav
tx_emulate_01_196_0004_000027 dataset/wav/tx_emulate_01_196_0004_000027.wav
tx_emulate_02_245_0001_000034 dataset/wav/tx_emulate_02_245_0001_000034.wav
tx_emotion_00309000160 dataset/wav/tx_emotion_00309000160.wav
tx_emotion_00205000198 dataset/wav/tx_emotion_00205000198.wav
tx_emulate_02_052_0016_000106 dataset/wav/tx_emulate_02_052_0016_000106.wav
tx_emotion_00301000019 dataset/wav/tx_emotion_00301000019.wav
tx_emulate_00_104_0001_000113 dataset/wav/tx_emulate_00_104_0001_000113.wav
tx_emotion_00405000329 dataset/wav/tx_emotion_00405000329.wav
tx_emotion_00201000340 dataset/wav/tx_emotion_00201000340.wav
tx_emulate_00_102_0004_000088 dataset/wav/tx_emulate_00_102_0004_000088.wav
tx_emulate_01_028_0008_000035 dataset/wav/tx_emulate_01_028_0008_000035.wav
tx_xiao_0200107000662 dataset/wav/tx_xiao_0200107000662.wav
tx_emotion_00201000107 dataset/wav/tx_emotion_00201000107.wav
tx_emotion_00208000327 dataset/wav/tx_emotion_00208000327.wav
tx_emotion_00210000249 dataset/wav/tx_emotion_00210000249.wav
tx_emotion_00409000470 dataset/wav/tx_emotion_00409000470.wav
tx_emotion_00210000049 dataset/wav/tx_emotion_00210000049.wav
tx_emotion_00304000235 dataset/wav/tx_emotion_00304000235.wav
tx_emulate_01_219_0002_000027 dataset/wav/tx_emulate_01_219_0002_000027.wav
tx_xiao_0200107000704 dataset/wav/tx_xiao_0200107000704.wav
tx_emotion_00401000174 dataset/wav/tx_emotion_00401000174.wav
tx_xiao_0100107000431 dataset/wav/tx_xiao_0100107000431.wav
tx_emotion_00409000462 dataset/wav/tx_emotion_00409000462.wav
tx_emotion_00209000215 dataset/wav/tx_emotion_00209000215.wav
tx_emotion_00210000124 dataset/wav/tx_emotion_00210000124.wav
tx_emulate_02_045_0015_000057 dataset/wav/tx_emulate_02_045_0015_000057.wav
tx_emulate_01_022_0022_000111 dataset/wav/tx_emulate_01_022_0022_000111.wav
tx_emulate_01_012_0023_000084 dataset/wav/tx_emulate_01_012_0023_000084.wav
tx_emulate_01_014_0004_000016 dataset/wav/tx_emulate_01_014_0004_000016.wav
tx_emulate_00_117_0005_000059 dataset/wav/tx_emulate_00_117_0005_000059.wav
tx_emotion_00402000299 dataset/wav/tx_emotion_00402000299.wav
tx_emotion_00205000105 dataset/wav/tx_emotion_00205000105.wav
tx_emotion_00401000239 dataset/wav/tx_emotion_00401000239.wav
tx_emulate_01_020_0017_000067 dataset/wav/tx_emulate_01_020_0017_000067.wav
tx_emotion_00208000087 dataset/wav/tx_emotion_00208000087.wav
tx_emotion_00205000119 dataset/wav/tx_emotion_00205000119.wav
tx_emotion_00401000281 dataset/wav/tx_emotion_00401000281.wav
tx_emotion_00304000079 dataset/wav/tx_emotion_00304000079.wav
tx_emulate_01_024_0010_000114 dataset/wav/tx_emulate_01_024_0010_000114.wav
tx_emulate_02_245_0002_000035 dataset/wav/tx_emulate_02_245_0002_000035.wav
tx_emulate_02_242_0002_000072 dataset/wav/tx_emulate_02_242_0002_000072.wav
tx_emulate_02_007_0024_000112 dataset/wav/tx_emulate_02_007_0024_000112.wav
tx_emulate_02_006_0019_000090 dataset/wav/tx_emulate_02_006_0019_000090.wav
tx_emulate_02_260_0002_000105 dataset/wav/tx_emulate_02_260_0002_000105.wav
tx_emulate_00_135_0004_000015 dataset/wav/tx_emulate_00_135_0004_000015.wav
tx_xiao_0100106000116 dataset/wav/tx_xiao_0100106000116.wav
tx_emotion_00207000113 dataset/wav/tx_emotion_00207000113.wav
tx_emulate_02_008_0011_000058 dataset/wav/tx_emulate_02_008_0011_000058.wav
tx_emotion_00208000303 dataset/wav/tx_emotion_00208000303.wav
tx_emulate_01_001_0018_000065 dataset/wav/tx_emulate_01_001_0018_000065.wav
tx_emulate_00_144_0005_000014 dataset/wav/tx_emulate_00_144_0005_000014.wav
tx_emulate_02_005_0005_000018 dataset/wav/tx_emulate_02_005_0005_000018.wav
tx_emulate_00_115_0009_000024 dataset/wav/tx_emulate_00_115_0009_000024.wav
tx_emotion_00207000080 dataset/wav/tx_emotion_00207000080.wav
tx_xiao_0100107000103 dataset/wav/tx_xiao_0100107000103.wav
tx_xiao_0200107000978 dataset/wav/tx_xiao_0200107000978.wav
tx_emotion_00301000203 dataset/wav/tx_emotion_00301000203.wav
tx_emotion_00210000272 dataset/wav/tx_emotion_00210000272.wav
tx_xiao_0100104000111 dataset/wav/tx_xiao_0100104000111.wav
tx_emulate_00_153_0001_000033 dataset/wav/tx_emulate_00_153_0001_000033.wav
tx_xiao_0100101000439 dataset/wav/tx_xiao_0100101000439.wav
tx_emotion_00402000480 dataset/wav/tx_emotion_00402000480.wav
tx_emotion_00208000390 dataset/wav/tx_emotion_00208000390.wav
tx_xiao_0200104000527 dataset/wav/tx_xiao_0200104000527.wav
tx_xiao_0100107000310 dataset/wav/tx_xiao_0100107000310.wav
tx_emotion_00401000242 dataset/wav/tx_emotion_00401000242.wav
tx_emotion_00305000164 dataset/wav/tx_emotion_00305000164.wav
tx_emotion_00401000327 dataset/wav/tx_emotion_00401000327.wav
tx_emulate_01_024_0013_000139 dataset/wav/tx_emulate_01_024_0013_000139.wav
tx_emulate_00_158_0002_000007 dataset/wav/tx_emulate_00_158_0002_000007.wav
tx_emulate_01_206_0004_000007 dataset/wav/tx_emulate_01_206_0004_000007.wav
tx_emotion_00207000252 dataset/wav/tx_emotion_00207000252.wav
tx_xiao_0100105000112 dataset/wav/tx_xiao_0100105000112.wav
tx_emotion_00204000391 dataset/wav/tx_emotion_00204000391.wav
tx_xiao_0200107000936 dataset/wav/tx_xiao_0200107000936.wav
tx_emulate_01_002_0017_000057 dataset/wav/tx_emulate_01_002_0017_000057.wav
tx_emulate_02_265_0001_000066 dataset/wav/tx_emulate_02_265_0001_000066.wav
tx_emulate_01_196_0001_000024 dataset/wav/tx_emulate_01_196_0001_000024.wav
tx_xiao_0200101000712 dataset/wav/tx_xiao_0200101000712.wav
tx_emulate_02_052_0011_000081 dataset/wav/tx_emulate_02_052_0011_000081.wav
tx_emulate_01_024_0009_000083 dataset/wav/tx_emulate_01_024_0009_000083.wav
tx_emotion_00301000052 dataset/wav/tx_emotion_00301000052.wav
tx_xiao_0200104000887 dataset/wav/tx_xiao_0200104000887.wav
tx_emulate_02_257_0001_000008 dataset/wav/tx_emulate_02_257_0001_000008.wav
tx_xiao_0100101000302 dataset/wav/tx_xiao_0100101000302.wav
tx_emulate_02_227_0002_000019 dataset/wav/tx_emulate_02_227_0002_000019.wav
tx_emotion_00201000015 dataset/wav/tx_emotion_00201000015.wav
tx_emotion_00208000257 dataset/wav/tx_emotion_00208000257.wav
tx_emulate_01_028_0009_000046 dataset/wav/tx_emulate_01_028_0009_000046.wav
tx_emotion_00305000014 dataset/wav/tx_emotion_00305000014.wav
tx_xiao_0200103000008 dataset/wav/tx_xiao_0200103000008.wav
tx_xiao_0200105000643 dataset/wav/tx_xiao_0200105000643.wav
tx_emotion_00301000461 dataset/wav/tx_emotion_00301000461.wav
tx_emulate_01_191_0005_000043 dataset/wav/tx_emulate_01_191_0005_000043.wav
tx_emulate_00_171_0006_000036 dataset/wav/tx_emulate_00_171_0006_000036.wav
tx_emulate_02_001_0015_000048 dataset/wav/tx_emulate_02_001_0015_000048.wav
tx_emulate_02_002_0018_000067 dataset/wav/tx_emulate_02_002_0018_000067.wav
tx_xiao_0100104000143 dataset/wav/tx_xiao_0100104000143.wav
tx_emulate_02_057_0034_000160 dataset/wav/tx_emulate_02_057_0034_000160.wav
tx_emulate_01_014_0005_000035 dataset/wav/tx_emulate_01_014_0005_000035.wav
tx_xiao_0100104000019 dataset/wav/tx_xiao_0100104000019.wav
tx_emotion_00305000283 dataset/wav/tx_emotion_00305000283.wav
tx_emotion_00302000356 dataset/wav/tx_emotion_00302000356.wav
tx_xiao_0100101000330 dataset/wav/tx_xiao_0100101000330.wav
tx_emulate_00_115_0005_000035 dataset/wav/tx_emulate_00_115_0005_000035.wav
tx_emulate_02_007_0010_000050 dataset/wav/tx_emulate_02_007_0010_000050.wav
tx_emulate_01_203_0008_000028 dataset/wav/tx_emulate_01_203_0008_000028.wav
tx_emotion_00201000148 dataset/wav/tx_emotion_00201000148.wav
tx_emulate_02_265_0006_000056 dataset/wav/tx_emulate_02_265_0006_000056.wav
tx_emotion_00401000213 dataset/wav/tx_emotion_00401000213.wav
tx_emulate_02_003_0023_000078 dataset/wav/tx_emulate_02_003_0023_000078.wav
tx_emotion_00205000069 dataset/wav/tx_emotion_00205000069.wav
tx_emotion_00401000339 dataset/wav/tx_emotion_00401000339.wav
tx_emulate_02_265_0002_000087 dataset/wav/tx_emulate_02_265_0002_000087.wav
tx_emulate_01_192_0003_000046 dataset/wav/tx_emulate_01_192_0003_000046.wav
tx_emotion_00301000036 dataset/wav/tx_emotion_00301000036.wav
tx_emulate_01_206_0005_000031 dataset/wav/tx_emulate_01_206_0005_000031.wav
tx_emulate_00_158_0003_000068 dataset/wav/tx_emulate_00_158_0003_000068.wav
tx_xiao_0200105000606 dataset/wav/tx_xiao_0200105000606.wav
tx_emotion_00204000458 dataset/wav/tx_emotion_00204000458.wav
tx_emulate_02_006_0019_000078 dataset/wav/tx_emulate_02_006_0019_000078.wav
tx_emotion_00301000457 dataset/wav/tx_emotion_00301000457.wav
tx_emulate_02_006_0009_000053 dataset/wav/tx_emulate_02_006_0009_000053.wav
tx_emulate_01_012_0004_000021 dataset/wav/tx_emulate_01_012_0004_000021.wav
tx_emotion_00304000169 dataset/wav/tx_emotion_00304000169.wav
tx_emotion_00205000110 dataset/wav/tx_emotion_00205000110.wav
tx_emulate_02_055_0004_000004 dataset/wav/tx_emulate_02_055_0004_000004.wav
tx_emulate_01_011_0013_000073 dataset/wav/tx_emulate_01_011_0013_000073.wav
tx_emulate_02_254_0005_000110 dataset/wav/tx_emulate_02_254_0005_000110.wav
tx_emotion_00302000054 dataset/wav/tx_emotion_00302000054.wav
tx_emotion_00402000283 dataset/wav/tx_emotion_00402000283.wav
tx_emulate_01_008_0011_000050 dataset/wav/tx_emulate_01_008_0011_000050.wav
tx_emulate_01_007_0006_000025 dataset/wav/tx_emulate_01_007_0006_000025.wav
tx_emulate_01_202_0001_000036 dataset/wav/tx_emulate_01_202_0001_000036.wav
tx_emulate_02_222_0001_000017 dataset/wav/tx_emulate_02_222_0001_000017.wav
tx_emulate_01_011_0010_000050 dataset/wav/tx_emulate_01_011_0010_000050.wav
tx_emotion_00202000118 dataset/wav/tx_emotion_00202000118.wav
tx_emulate_01_024_0012_000117 dataset/wav/tx_emulate_01_024_0012_000117.wav
tx_emulate_00_118_0002_000022 dataset/wav/tx_emulate_00_118_0002_000022.wav
tx_xiao_0100101000076 dataset/wav/tx_xiao_0100101000076.wav
tx_emotion_00305000119 dataset/wav/tx_emotion_00305000119.wav
tx_emulate_01_028_0009_000054 dataset/wav/tx_emulate_01_028_0009_000054.wav
tx_emotion_00207000162 dataset/wav/tx_emotion_00207000162.wav
tx_emulate_00_125_0001_000067 dataset/wav/tx_emulate_00_125_0001_000067.wav
tx_emulate_02_233_0001_000034 dataset/wav/tx_emulate_02_233_0001_000034.wav
tx_emulate_02_055_0033_000122 dataset/wav/tx_emulate_02_055_0033_000122.wav
tx_emulate_01_031_0020_000073 dataset/wav/tx_emulate_01_031_0020_000073.wav
tx_emulate_02_041_0002_000076 dataset/wav/tx_emulate_02_041_0002_000076.wav
tx_emulate_02_007_0006_000025 dataset/wav/tx_emulate_02_007_0006_000025.wav
tx_emotion_00207000211 dataset/wav/tx_emotion_00207000211.wav
tx_emulate_01_022_0009_000026 dataset/wav/tx_emulate_01_022_0009_000026.wav
tx_emotion_00201000209 dataset/wav/tx_emotion_00201000209.wav
tx_emulate_02_255_0001_000027 dataset/wav/tx_emulate_02_255_0001_000027.wav
tx_emotion_00201000220 dataset/wav/tx_emotion_00201000220.wav
tx_xiao_0200105000529 dataset/wav/tx_xiao_0200105000529.wav
tx_emotion_00207000018 dataset/wav/tx_emotion_00207000018.wav
tx_emulate_01_002_0005_000008 dataset/wav/tx_emulate_01_002_0005_000008.wav
tx_emulate_02_042_0002_000072 dataset/wav/tx_emulate_02_042_0002_000072.wav
tx_emotion_00304000049 dataset/wav/tx_emotion_00304000049.wav
tx_xiao_0100104000141 dataset/wav/tx_xiao_0100104000141.wav
tx_xiao_0100107000421 dataset/wav/tx_xiao_0100107000421.wav
tx_emotion_00210000476 dataset/wav/tx_emotion_00210000476.wav
tx_emulate_01_023_0006_000042 dataset/wav/tx_emulate_01_023_0006_000042.wav
tx_xiao_0200104000609 dataset/wav/tx_xiao_0200104000609.wav
tx_emotion_00201000274 dataset/wav/tx_emotion_00201000274.wav
tx_xiao_0200107000972 dataset/wav/tx_xiao_0200107000972.wav
tx_emulate_00_109_0005_000091 dataset/wav/tx_emulate_00_109_0005_000091.wav
tx_emotion_00202000301 dataset/wav/tx_emotion_00202000301.wav
tx_emulate_02_231_0008_000034 dataset/wav/tx_emulate_02_231_0008_000034.wav
tx_xiao_0100104000064 dataset/wav/tx_xiao_0100104000064.wav
tx_xiao_0200101000731 dataset/wav/tx_xiao_0200101000731.wav
tx_emulate_01_196_0003_000051 dataset/wav/tx_emulate_01_196_0003_000051.wav
tx_emotion_00210000453 dataset/wav/tx_emotion_00210000453.wav
tx_emulate_00_111_0001_000023 dataset/wav/tx_emulate_00_111_0001_000023.wav
tx_emotion_00201000323 dataset/wav/tx_emotion_00201000323.wav
tx_emulate_02_039_0003_000072 dataset/wav/tx_emulate_02_039_0003_000072.wav
tx_xiao_0100107000449 dataset/wav/tx_xiao_0100107000449.wav
tx_emulate_02_006_0020_000097 dataset/wav/tx_emulate_02_006_0020_000097.wav
tx_emulate_01_026_0005_000031 dataset/wav/tx_emulate_01_026_0005_000031.wav
tx_emotion_00205000062 dataset/wav/tx_emotion_00205000062.wav
tx_emotion_00405000239 dataset/wav/tx_emotion_00405000239.wav
tx_emotion_00202000425 dataset/wav/tx_emotion_00202000425.wav
tx_xiao_0200106000737 dataset/wav/tx_xiao_0200106000737.wav
tx_emulate_01_029_0013_000037 dataset/wav/tx_emulate_01_029_0013_000037.wav
tx_emulate_02_242_0002_000069 dataset/wav/tx_emulate_02_242_0002_000069.wav
tx_xiao_0100101000408 dataset/wav/tx_xiao_0100101000408.wav
tx_emulate_02_042_0002_000101 dataset/wav/tx_emulate_02_042_0002_000101.wav


================================================
FILE: environment.yml
================================================
name: secap
channels:
  - defaults
dependencies:
  - _libgcc_mutex=0.1=main
  - _openmp_mutex=5.1=1_gnu
  - appdirs=1.4.4=pyhd3eb1b0_0
  - asttokens=2.0.5=pyhd3eb1b0_0
  - backcall=0.2.0=pyhd3eb1b0_0
  - blas=1.0=openblas
  - boltons=23.0.0=py310h06a4308_0
  - brotlipy=0.7.0=py310h7f8727e_1002
  - bzip2=1.0.8=h7b6447c_0
  - ca-certificates=2023.01.10=h06a4308_0
  - certifi=2022.12.7=py310h06a4308_0
  - cffi=1.15.1=py310h5eee18b_3
  - charset-normalizer=2.0.4=pyhd3eb1b0_0
  - conda=23.3.1=py310h06a4308_0
  - conda-content-trust=0.1.3=py310h06a4308_0
  - conda-package-handling=2.0.2=py310h06a4308_0
  - conda-package-streaming=0.7.0=py310h06a4308_0
  - cryptography=38.0.4=py310h9ce1e76_0
  - decorator=5.1.1=pyhd3eb1b0_0
  - executing=0.8.3=pyhd3eb1b0_0
  - idna=3.4=py310h06a4308_0
  - intel-openmp=2023.0.0=h9e868ea_25371
  - ipython=8.10.0=py310h06a4308_0
  - jedi=0.18.1=py310h06a4308_1
  - jsonpatch=1.32=pyhd3eb1b0_0
  - jsonpointer=2.1=pyhd3eb1b0_0
  - ld_impl_linux-64=2.38=h1181459_1
  - libffi=3.4.2=h6a678d5_6
  - libgcc-ng=11.2.0=h1234567_1
  - libgfortran-ng=11.2.0=h00389a5_1
  - libgfortran5=11.2.0=h1234567_1
  - libgomp=11.2.0=h1234567_1
  - libopenblas=0.3.21=h043d6bf_0
  - libstdcxx-ng=11.2.0=h1234567_1
  - libuuid=1.41.5=h5eee18b_0
  - matplotlib-inline=0.1.6=py310h06a4308_0
  - mkl=2023.0.0=h6d00ec8_25399
  - mkl-include=2023.0.0=h06a4308_25399
  - ncurses=6.4=h6a678d5_0
  - ninja-base=1.10.2=hd09550d_5
  - numpy=1.23.5=py310hac523dd_0
  - numpy-base=1.23.5=py310h375b286_0
  - openssl=1.1.1t=h7f8727e_0
  - packaging=23.0=py310h06a4308_0
  - parso=0.8.3=pyhd3eb1b0_0
  - pexpect=4.8.0=pyhd3eb1b0_3
  - pickleshare=0.7.5=pyhd3eb1b0_1003
  - pip=22.3.1=py310h06a4308_0
  - pluggy=1.0.0=py310h06a4308_1
  - pooch=1.4.0=pyhd3eb1b0_0
  - prompt-toolkit=3.0.36=py310h06a4308_0
  - ptyprocess=0.7.0=pyhd3eb1b0_2
  - pure_eval=0.2.2=pyhd3eb1b0_0
  - pycosat=0.6.4=py310h5eee18b_0
  - pycparser=2.21=pyhd3eb1b0_0
  - pyopenssl=22.0.0=pyhd3eb1b0_0
  - pysocks=1.7.1=py310h06a4308_0
  - python=3.10.9=h7a1cb2a_2
  - pyyaml=6.0=py310h5eee18b_1
  - readline=8.2=h5eee18b_0
  - requests=2.28.1=py310h06a4308_0
  - ruamel.yaml=0.17.21=py310h5eee18b_0
  - ruamel.yaml.clib=0.2.6=py310h5eee18b_1
  - scipy=1.10.0=py310heeff2f4_1
  - setuptools=65.6.3=py310h06a4308_0
  - six=1.16.0=pyhd3eb1b0_1
  - sqlite=3.40.1=h5082296_0
  - stack_data=0.2.0=pyhd3eb1b0_0
  - tbb=2021.8.0=hdb19cb5_0
  - tk=8.6.12=h1ccaba5_0
  - toolz=0.12.0=py310h06a4308_0
  - tqdm=4.64.1=py310h06a4308_0
  - traitlets=5.7.1=py310h06a4308_0
  - typing=3.10.0.0=py310h06a4308_0
  - urllib3=1.26.14=py310h06a4308_0
  - wcwidth=0.2.5=pyhd3eb1b0_0
  - wheel=0.37.1=pyhd3eb1b0_0
  - xz=5.2.10=h5eee18b_1
  - yaml=0.2.5=h7b6447c_0
  - zlib=1.2.13=h5eee18b_0
  - zstandard=0.18.0=py310h5eee18b_0
  - pip:
      - absl-py==1.4.0
      - accelerate==0.19.0
      - accelerator==2023.3.10.dev1
      - aiohttp==3.8.4
      - aiosignal==1.3.1
      - alfred-py==3.0.7
      - anyio==3.6.2
      - arrow==1.2.3
      - async-timeout==4.0.2
      - attrs==23.1.0
      - audioread==3.0.0
      - beautifulsoup4==4.12.2
      - blessed==1.20.0
      - bottle==0.12.25
      - cachetools==5.3.0
      - click==8.1.3
      - cmake==3.26.1
      - contourpy==1.1.0
      - coverage==7.2.5
      - croniter==1.3.14
      - cycler==0.11.0
      - cython==0.29.34
      - darr==0.5.4
      - dateutils==0.6.12
      - deepdiff==6.3.0
      - deepspeed==0.8.3
      - deprecated==1.2.13
      - diffusers==0.16.1
      - exceptiongroup==1.1.1
      - fastapi==0.88.0
      - filelock==3.10.7
      - fonttools==4.41.0
      - frozenlist==1.3.3
      - fsspec==2023.5.0
      - ftfy==6.1.1
      - funcy==2.0
      - future==0.18.3
      - gensim==4.3.1
      - google-auth==2.18.0
      - google-auth-oauthlib==1.0.0
      - grpcio==1.54.2
      - gtrending==0.4.0
      - h11==0.14.0
      - hjson==3.1.0
      - huggingface-hub==0.13.3
      - importlib-metadata==6.1.0
      - iniconfig==2.0.0
      - inquirer==3.1.3
      - itsdangerous==2.1.2
      - jieba==0.42.1
      - jinja2==3.1.2
      - joblib==1.2.0
      - jsons==1.6.3
      - kiwisolver==1.4.4
      - lark==1.1.5
      - lazy-loader==0.2
      - levenshtein==0.21.0
      - librosa==0.10.0.post2
      - lightning==2.0.2
      - lightning-cloud==0.5.36
      - lightning-utilities==0.8.0
      - lit==16.0.0
      - llvmlite==0.40.0
      - loguru==0.7.0
      - markdown==3.4.3
      - markdown-it-py==2.2.0
      - markupsafe==2.1.2
      - matplotlib==3.7.2
      - mdurl==0.1.2
      - mpmath==1.3.0
      - msgpack==1.0.5
      - multidict==6.0.4
      - natsort==8.3.1
      - netifaces==0.11.0
      - networkx==3.0
      - ninja==1.11.1
      - nltk==3.8.1
      - numba==0.57.0
      - nvidia-cublas-cu11==11.10.3.66
      - nvidia-cuda-cupti-cu11==11.7.101
      - nvidia-cuda-nvrtc-cu11==11.7.99
      - nvidia-cuda-runtime-cu11==11.7.99
      - nvidia-cudnn-cu11==8.5.0.96
      - nvidia-cufft-cu11==10.9.0.58
      - nvidia-curand-cu11==10.2.10.91
      - nvidia-cusolver-cu11==11.4.0.1
      - nvidia-cusparse-cu11==11.7.4.91
      - nvidia-nccl-cu11==2.14.3
      - nvidia-nvtx-cu11==11.7.91
      - oauthlib==3.2.2
      - opencv-python==4.7.0.72
      - opentracing==2.4.0
      - ordered-set==4.1.0
      - pandas==2.0.1
      - pascal-voc-writer==0.1.4
      - pillow==9.5.0
      - portalocker==2.7.0
      - protobuf==4.23.0
      - psutil==5.9.4
      - py-cpuinfo==9.0.0
      - pyasn1==0.5.0
      - pyasn1-modules==0.3.0
      - pydantic==1.10.7
      - pygments==2.15.1
      - pyjwt==2.7.0
      - pyparsing==3.0.9
      - pyquaternion==0.9.9
      - pytest==7.3.1
      - pytest-asyncio==0.21.0
      - pytest-cov==4.0.0
      - pytest-mock==3.10.0
      - python-dateutil==2.8.2
      - python-editor==1.0.4
      - python-levenshtein==0.21.0
      - python-multipart==0.0.6
      - pytorch-lightning==2.0.2
      - pytz==2023.3
      - rapidfuzz==3.0.0
      - readchar==4.0.5
      - regex==2023.3.23
      - requests-oauthlib==1.3.1
      - rich==13.3.5
      - rsa==4.9
      - scikit-learn==1.2.2
      - sentencepiece==0.1.99
      - setproctitle==1.3.2
      - smart-open==6.3.0
      - sniffio==1.3.0
      - soundfile==0.12.1
      - soupsieve==2.4.1
      - soxr==0.3.5
      - starlette==0.22.0
      - starsessions==1.3.0
      - sympy==1.11.1
      - tensorboard==2.13.0
      - tensorboard-data-server==0.7.0
      - threadpoolctl==3.1.0
      - timm==0.6.13
      - tokenizers==0.13.2
      - tomli==2.0.1
      - torch==2.0.0
      - torchaudio==2.0.1
      - torchlibrosa==0.1.0
      - torchmetrics==0.11.4
      - torchvision==0.15.1
      - transformers==4.29.0
      - triton==2.0.0
      - typing-extensions==4.5.0
      - typish==1.9.3
      - tzdata==2023.3
      - uvicorn==0.22.0
      - uvloop==0.17.0
      - waitress==2.1.2
      - websocket-client==1.5.1
      - websockets==11.0.3
      - werkzeug==2.3.4
      - wrapt==1.15.0
      - xdg==6.0.0
      - yarl==1.9.2
      - zipp==3.15.0
prefix: /opt/conda


================================================
FILE: model2.py
================================================
import torch
import torch.nn as nn
import lightning.pytorch as pl
from module.Qformer import BertConfig, BertLMHeadModel
from transformers import (
    Wav2Vec2FeatureExtractor,
    HubertModel,
    BertTokenizer, 
    BertModel,
    LlamaTokenizer
)
from module.modeling_llama import LlamaForCausalLM
from CLUB_modules.mi_estimators import *
from tool.get_sentence_simi import SimiCal
import torch.nn.functional as F
from transformers import StoppingCriteria, StoppingCriteriaList
import numpy as np
import os

class KeywordsStoppingCriteria(StoppingCriteria):
    def __init__(self, keywords_ids:list):
        self.keywords = keywords_ids

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        if input_ids[0][-1] in self.keywords:
            return True
        return False

class MotionAudio(pl.LightningModule):
    def __init__(
        self,
        hubert_ckpt="weights/models--TencentGameMate--chinese-hubert-large/snapshots/90cb660492214f687e60f5ca509b20edae6e75bd",
        text2vec_ckpt="weights/models--shibing624--text2vec-base-chinese/snapshots/26420fdf61ddfd92fafbaf3bc21a7c06b1812248",
        llama_ckpt="weights/models--minlik--chinese-llama-7b-merged/snapshots/1ca4d87576f1fef4d44a949fb65bbe6b96675872"):
        super(MotionAudio,self).__init__()
        
        #path
        current_directory = os.path.dirname(os.path.abspath(__file__))
        hubert_ckpt = os.path.join(current_directory, hubert_ckpt)
        text2vec_ckpt = os.path.join(current_directory, text2vec_ckpt)
        llama_ckpt = os.path.join(current_directory, llama_ckpt)

        #hubert
        self.hubert_model=HubertModel.from_pretrained(hubert_ckpt)
        self.hubert_feature_extractor=Wav2Vec2FeatureExtractor.from_pretrained(hubert_ckpt)
        #text2vec
        self.text2vec_model=BertModel.from_pretrained(text2vec_ckpt)
        self.text2vec_tokenizer=BertTokenizer.from_pretrained(text2vec_ckpt)


        #llama
        self.llama_model=LlamaForCausalLM.from_pretrained(llama_ckpt, torch_dtype="auto")
        #self.llama_model = self.llama_model.to(torch.float32)
        self.llama_tokenizer=LlamaTokenizer.from_pretrained(llama_ckpt)
        if self.llama_tokenizer.pad_token_id is None:
            self.llama_tokenizer.pad_token = self.llama_tokenizer.unk_token
        #self.llama_model.model.resize_token_embeddings(len(self.llama_tokenizer))

        for p in self.parameters():
            p.requires_grad = False
        #Qformer
        self.audio_Qformer,self.audio_query_tokens=self.init_Qformer(num_query_token=32,vision_width=768)
        self.audio_Qformer.cls = None
        self.audio_Qformer.bert.embeddings.word_embeddings = None
        self.audio_Qformer.bert.embeddings.position_embeddings = None
        for layer in self.audio_Qformer.bert.encoder.layer:
            layer.output = None
            layer.intermediate = None
        
        self.audio_project=nn.Linear(1024,768)

        self.audio_llama_project=nn.Linear(768,4096)

        
        
    def init_Qformer(self,num_query_token, vision_width, cross_attention_freq=2):
        path=os.path.dirname(os.path.abspath(__file__))
        config_path=os.path.join(path,"weights/models--bert-base-chinese/snapshots/8d2a91f91cc38c96bb8b4556ba70c392f8d5ee55")
        encoder_config = BertConfig.from_pretrained(config_path)
        encoder_config.encoder_width = vision_width
        # insert cross-attention layer every other block
        encoder_config.add_cross_attention = True
        encoder_config.cross_attention_freq = cross_attention_freq
        encoder_config.query_length = num_query_token
        Qformer = BertLMHeadModel(config=encoder_config)
        ckpt=os.path.join(path,"weights/models--bert-base-chinese/snapshots/8d2a91f91cc38c96bb8b4556ba70c392f8d5ee55/pytorch_model.bin")
        Qformer.load_state_dict(torch.load(ckpt),strict=False)

        query_tokens = nn.Parameter(
            torch.zeros(1, num_query_token, encoder_config.hidden_size)
        )
        query_tokens.data.normal_(mean=0.0, std=encoder_config.initializer_range)
        return Qformer, query_tokens
    def mean_pooling(self,model_output, attention_mask):
        token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    


    
    def forward(self, audio, describtion):
        #hubert
        with torch.no_grad():
            audio_feature=self.hubert_feature_extractor(audio, padding=True,return_tensors="pt",sampling_rate=16000).input_values.to(self.device)
            audio_feature = audio_feature.half()
            audio_feature=self.hubert_model(audio_feature).last_hidden_state
        audio_feature=self.audio_project(audio_feature)

        #text2vec
        with torch.no_grad():
            #describtion
            describtion=[s+"</s>" for s in describtion]
            describtion_input=self.text2vec_tokenizer(describtion, padding=True, truncation=True, return_tensors='pt').to(self.device)
            describtion_feature=self.text2vec_model(**describtion_input)
            describtion_feature=self.mean_pooling(describtion_feature,describtion_input['attention_mask']).unsqueeze(1)


        #Qformer
        audio_query_tokens=self.audio_query_tokens.expand(audio_feature.shape[0], -1, -1)
        frame_atts = torch.ones(audio_feature.size()[:-1], dtype=torch.long).to(audio_feature.device)
        #print(audio_query_tokens.shape,audio_feature.shape,frame_atts.shape)
        audio_query_output=self.audio_Qformer.bert(
            query_embeds=audio_query_tokens, #[32,768]
            encoder_hidden_states=audio_feature,
            encoder_attention_mask=frame_atts,
            return_dict=True,
            )
        audio_hidden=audio_query_output.last_hidden_state

        text_tokens=self.llama_tokenizer(describtion, padding="longest", truncation=True, return_tensors='pt',add_special_tokens=False).to(self.device)

        #print(audio_hidden.shape)
        audio_input=self.audio_llama_project(audio_hidden)
        batchsize=audio_input.shape[0]
        bos=torch.ones([batchsize, 1],dtype=text_tokens.input_ids.dtype).to(self.device) * self.llama_tokenizer.bos_token_id
        bos_embeds=self.llama_model.model.embed_tokens(bos.to(self.device))
        #in training, we use different prompts for each audio
        prompts=[ "请用一句话用中文表述音频中说话人的情感状态：", "请用一句中文概括音频中讲话者的情感：", "请用一句中文简述音频里说话者的情感表现：", "请用一句中文概述所给音频中说话人的情感：", "请用一句话用中文描述音频中说话人的情感：", "请用一句中文描绘音频中说话者的情感：", "请用一句中文描述所给音频中说话人的情感：", "请用一句中文简要表述音频中说话人的情感：", "请用一句中文概括所给音频中说话者的情感：", "请用一句话用中文描述所给音频中说话人的情感：", "请用一句中文简述所给音频里说话者的情感：", "请用一句中文描述音频中讲话者的情感：", "请用一句中文概述音频中说话人的情感：", "请用一句话用中文表达音频中说话者的情感：", "请用一句中文简要描述音频中说话人的情感：", "请用一句中文概括音频中说话人的情感：", "请用一句中文描述所给音频中讲话者的情感：", "请用一句中文简述音频中说话者的情感：", "请用一句中文概述所给音频中讲话者的情感：", "请用一句话用中文描述音频中讲话者的情感：", "请用一句中文描述音频中说话人的情感状态：", "请用一句中文概括所给音频里说话者的情感：", "请用一句中文简述所给音频中说话人的情感表现：", "请用一句中文概述音频里说话者的情感：", "请用一句话用中文描述音频中说话人的情感表现：", "请用一句中文描绘所给音频中说话者的情感：", "请用一句中文描述音频里讲话者的情感：", "请用一句中文简要表述所给音频中说话人的情感：", "请用一句中文概括音频里说话者的情感：", "请用一句话用中文描述所给音频中讲话者的情感：" ]
        import random
        prompt=prompts[random.randint(0,len(prompts)-1)]
        prompts_id=self.llama_tokenizer(prompt,return_tensors='pt').input_ids.to(self.device)
        prompts_id=prompts_id.expand(batchsize,-1)
        prompts_embeds=self.llama_model.model.embed_tokens(prompts_id)

        
        targets=text_tokens.input_ids.masked_fill(
            text_tokens.input_ids==self.llama_tokenizer.pad_token_id,-100
        )
        text_embeds=self.llama_model.model.embed_tokens(text_tokens.input_ids.to(self.device))
        input_embeds=torch.cat([bos_embeds,audio_input,prompts_embeds,text_embeds],dim=1)
        atts_audio=torch.ones(audio_input.size()[:-1], dtype=torch.long).to(audio_input.device)

        #atts_audio=atts_audio.to(self.device)
        attns_text=text_tokens.attention_mask
        attns_bos=atts_audio[:,:1]
        attns_prompt=torch.ones(prompts_embeds.size()[:-1], dtype=torch.long).to(prompts_embeds.device)
        attns=torch.cat([attns_bos,atts_audio,attns_prompt,attns_text],dim=1)
        print(input_embeds.shape,attns.shape,targets.shape)
        outputs=self.llama_model(
            inputs_embeds=input_embeds,
            attention_mask=attns,
            labels=targets,
            return_dict=True,
        )
        loss=outputs.loss
        #print(loss)

        return loss
    def training_step(self, batch, batch_idx):
        audio, describtion,_=batch
        loss=self.forward(audio, describtion)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True,batch_size=len(audio),sync_dist=True)
        return loss
    def validation_step(self, batch, batch_idx):
        audio, describtion,_=batch
        loss=self.forward(audio, describtion)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True,batch_size=len(audio),sync_dist=True)
        return loss
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, self.parameters()), lr=0.000013, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-6)
        return optimizer
    def inference(self, audio):
        with torch.no_grad():
            audio_feature=self.hubert_feature_extractor(audio, padding=True,return_tensors="pt",sampling_rate=16000).input_values.to(self.device)
            audio_feature = audio_feature.float()
            audio_feature=self.hubert_model(audio_feature).last_hidden_state
            
            audio_feature=self.audio_project(audio_feature)



        #Qformer
        audio_query_tokens=self.audio_query_tokens.expand(audio_feature.shape[0], -1, -1)
        frame_atts = torch.ones(audio_feature.size()[:-1], dtype=torch.long).to(audio_feature.device)
        audio_query_output=self.audio_Qformer.bert(
            query_embeds=audio_query_tokens, #[32,768]
            encoder_hidden_states=audio_feature,
            encoder_attention_mask=frame_atts,
            return_dict=True,
            )
        audio_hidden=audio_query_output.last_hidden_state

        #print(audio_hidden.shape)
        audio_input=self.audio_llama_project(audio_hidden)

        batchsize=audio_input.shape[0]
        #in inference, we use the same prompt for all audio
        #prompts=[ "请用一句话用中文表述音频中说话人的情感状态：", "请用一句中文概括音频中讲话者的情感：", "请用一句中文简述音频里说话者的情感表现：", "请用一句中文概述所给音频中说话人的情感：", "请用一句话用中文描述音频中说话人的情感：", "请用一句中文描绘音频中说话者的情感：", "请用一句中文描述所给音频中说话人的情感：", "请用一句中文简要表述音频中说话人的情感：", "请用一句中文概括所给音频中说话者的情感：", "请用一句话用中文描述所给音频中说话人的情感：", "请用一句中文简述所给音频里说话者的情感：", "请用一句中文描述音频中讲话者的情感：", "请用一句中文概述音频中说话人的情感：", "请用一句话用中文表达音频中说话者的情感：", "请用一句中文简要描述音频中说话人的情感：", "请用一句中文概括音频中说话人的情感：", "请用一句中文描述所给音频中讲话者的情感：", "请用一句中文简述音频中说话者的情感：", "请用一句中文概述所给音频中讲话者的情感：", "请用一句话用中文描述音频中讲话者的情感：", "请用一句中文描述音频中说话人的情感状态：", "请用一句中文概括所给音频里说话者的情感：", "请用一句中文简述所给音频中说话人的情感表现：", "请用一句中文概述音频里说话者的情感：", "请用一句话用中文描述音频中说话人的情感表现：", "请用一句中文描绘所给音频中说话者的情感：", "请用一句中文描述音频里讲话者的情感：", "请用一句中文简要表述所给音频中说话人的情感：", "请用一句中文概括音频里说话者的情感：", "请用一句话用中文描述所给音频中讲话者的情感：" ]
        prompt="请用一句中文简述音频里说话者的情感表现："
        #import random
        #prompt=prompts[random.randint(0,len(prompts)-1)]
        
        prompts_id=self.llama_tokenizer(prompt,return_tensors='pt').input_ids.to(self.device)
        prompts_id=prompts_id.expand(batchsize,-1)
        prompts_embeds=self.llama_model.model.embed_tokens(prompts_id)

        bos=torch.ones([batchsize, 1],dtype=torch.int64).to(self.device) * self.llama_tokenizer.bos_token_id
        bos_embeds=self.llama_model.model.embed_tokens(bos.to(self.device))
        embeds=torch.cat([bos_embeds,audio_input,prompts_embeds],dim=1)
        #print(embeds.dtype)
        embeds=embeds.half()
        outputs1=[]

        # you may change the num of generated sentences here and change the parameters of llama to get better results such as top_k, top_p, num_beams
        # to reduce randomness,we do this for 8 times and get 8 sentences
        # then we calculate the similarity between each sentence and the other 7 sentences, and remove the 3 sentences with the lowest average similarity
        for i in range(8):
            with torch.no_grad():
                outputs=self.llama_model.generate(
                    inputs_embeds=embeds,
                    max_new_tokens=50,
                    min_new_tokens=3,
                    do_sample=True,
                    top_k=10,
                    top_p=0.95,
                    num_beams=5,
                    repetition_penalty=10.0,
                    pad_token_id=self.llama_tokenizer.pad_token_id,
                    eos_token_id=self.llama_tokenizer.eos_token_id,
                    #stopping_criteria=stopping_criteria,
                    early_stopping=True,
                    num_return_sequences=1,
                    no_repeat_ngram_size=2,

                )
        #print(outputs)
            output_tokens=self.llama_tokenizer.batch_decode(outputs,skip_special_tokens=True)
            #output_tokens=self.post_processing(output_tokens)
            print(output_tokens)
            outputs1.append(output_tokens[0])
        outputs1=self.post_processing(outputs1,self.device)

        return outputs1,prompt
    
    def post_processing(self, sentences,device):
        similarities = np.zeros((len(sentences), len(sentences)))
        simi_cal=SimiCal(device=device)
        for i in range(len(sentences)):
            for j in range(len(sentences)):
                similarities[i, j] = simi_cal(sentences[i], sentences[j])
        avg_similarities = np.mean(similarities, axis=1)
        least_related_indices=avg_similarities.argsort()[:3]
        remaining_sentences = [sentences[i] for i in range(len(sentences)) if i not in least_related_indices]

        
        return remaining_sentences
    def test_step(self, batch, batch_idx):
        audio,_,describtion,fpath=batch
        output_tokens,prompt=self.inference(audio)
        path=os.path.dirname(os.path.abspath(__file__))
        test_file="result/result_1.txt"
        test_file=os.path.join(path,test_file)
        with open(test_file,"a",encoding="utf-8") as f:
            f.write("file: "+fpath[0]+"\n")
            #f.write("prompt: "+prompt+"\n")
            f.write("origin: "+describtion[0]+"\n")
            f.write("result: "+output_tokens[0]+"\n")
            f.write("result2: "+output_tokens[1]+"\n")
            f.write("result3: "+output_tokens[2]+"\n")
            f.write("result4: "+output_tokens[3]+"\n")
            f.write("result5: "+output_tokens[4]+"\n")
            f.write("\n")
        

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

if __name__ == "__main__":
    model=MotionAudio()
    print(count_parameters(model))



================================================
FILE: module/Qformer.py
================================================
import math
import os
import warnings
from dataclasses import dataclass
from typing import Optional, Tuple, Dict, Any

import torch
from torch import Tensor, device, dtype, nn
import torch.utils.checkpoint
from torch import nn
from torch.nn import CrossEntropyLoss
import torch.nn.functional as F

from transformers.activations import ACT2FN
from transformers.file_utils import (
    ModelOutput,
)
from transformers.modeling_outputs import (
    BaseModelOutputWithPastAndCrossAttentions,
    BaseModelOutputWithPoolingAndCrossAttentions,
    CausalLMOutputWithCrossAttentions,
    MaskedLMOutput,
    MultipleChoiceModelOutput,
    NextSentencePredictorOutput,
    QuestionAnsweringModelOutput,
    SequenceClassifierOutput,
    TokenClassifierOutput,
)
from transformers.modeling_utils import (
    PreTrainedModel,
    apply_chunking_to_forward,
    find_pruneable_heads_and_indices,
    prune_linear_layer,
)
from transformers.utils import logging
from transformers.models.bert.configuration_bert import BertConfig

logger = logging.get_logger(__name__)


class BertEmbeddings(nn.Module):
    """Construct the embeddings from word and position embeddings."""

    def __init__(self, config):
        super().__init__()
        self.word_embeddings = nn.Embedding(
            config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id
        )
        self.position_embeddings = nn.Embedding(
            config.max_position_embeddings, config.hidden_size
        )

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
        self.register_buffer(
            "position_ids", torch.arange(config.max_position_embeddings).expand((1, -1))
        )
        self.position_embedding_type = getattr(
            config, "position_embedding_type", "absolute"
        )

        self.config = config

    def forward(
        self,
        input_ids=None,
        position_ids=None,
        query_embeds=None,
        past_key_values_length=0,
    ):
        if input_ids is not None:
            seq_length = input_ids.size()[1]
        else:
            seq_length = 0

        if position_ids is None:
            position_ids = self.position_ids[
                :, past_key_values_length : seq_length + past_key_values_length
            ].clone()

        if input_ids is not None:
            embeddings = self.word_embeddings(input_ids)
            if self.position_embedding_type == "absolute":
                position_embeddings = self.position_embeddings(position_ids)
                embeddings = embeddings + position_embeddings

            if query_embeds is not None:
                embeddings = torch.cat((query_embeds, embeddings), dim=1)
        else:
            embeddings = query_embeds

        embeddings = self.LayerNorm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings


class BertSelfAttention(nn.Module):
    def __init__(self, config, is_cross_attention):
        super().__init__()
        self.config = config
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(
            config, "embedding_size"
        ):
            raise ValueError(
                "The hidden size (%d) is not a multiple of the number of attention "
                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
            )

        self.num_attention_heads = config.num_attention_heads
        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        self.query = nn.Linear(config.hidden_size, self.all_head_size)
        if is_cross_attention:
            self.key = nn.Linear(config.encoder_width, self.all_head_size)
            self.value = nn.Linear(config.encoder_width, self.all_head_size)
        else:
            self.key = nn.Linear(config.hidden_size, self.all_head_size)
            self.value = nn.Linear(config.hidden_size, self.all_head_size)

        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
        self.position_embedding_type = getattr(
            config, "position_embedding_type", "absolute"
        )
        if (
            self.position_embedding_type == "relative_key"
            or self.position_embedding_type == "relative_key_query"
        ):
            self.max_position_embeddings = config.max_position_embeddings
            self.distance_embedding = nn.Embedding(
                2 * config.max_position_embeddings - 1, self.attention_head_size
            )
        self.save_attention = False

    def save_attn_gradients(self, attn_gradients):
        self.attn_gradients = attn_gradients

    def get_attn_gradients(self):
        return self.attn_gradients

    def save_attention_map(self, attention_map):
        self.attention_map = attention_map

    def get_attention_map(self):
        return self.attention_map

    def transpose_for_scores(self, x):
        new_x_shape = x.size()[:-1] + (
            self.num_attention_heads,
            self.attention_head_size,
        )
        x = x.view(*new_x_shape)
        return x.permute(0, 2, 1, 3)

    def forward(
        self,
        hidden_states,
        attention_mask=None,
        head_mask=None,
        encoder_hidden_states=None,
        encoder_attention_mask=None,
        past_key_value=None,
        output_attentions=False,
    ):

        # If this is instantiated as a cross-attention module, the keys
        # and values come from an encoder; the attention mask needs to be
        # such that the encoder's padding tokens are not attended to.
        is_cross_attention = encoder_hidden_states is not None

        if is_cross_attention:
            key_layer = self.transpose_for_scores(self.key(encoder_hidden_states))
            value_layer = self.transpose_for_scores(self.value(encoder_hidden_states))
            attention_mask = encoder_attention_mask
        elif past_key_value is not None:
            key_layer = self.transpose_for_scores(self.key(hidden_states))
            value_layer = self.transpose_for_scores(self.value(hidden_states))
            key_layer = torch.cat([past_key_value[0], key_layer], dim=2)
            value_layer = torch.cat([past_key_value[1], value_layer], dim=2)
        else:
            key_layer = self.transpose_for_scores(self.key(hidden_states))
            value_layer = self.transpose_for_scores(self.value(hidden_states))

        mixed_query_layer = self.query(hidden_states)

        query_layer = self.transpose_for_scores(mixed_query_layer)

        past_key_value = (key_layer, value_layer)

        # Take the dot product between "query" and "key" to get the raw attention scores.
        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))

        if (
            self.position_embedding_type == "relative_key"
            or self.position_embedding_type == "relative_key_query"
        ):
            seq_length = hidden_states.size()[1]
            position_ids_l = torch.arange(
                seq_length, dtype=torch.long, device=hidden_states.device
            ).view(-1, 1)
            position_ids_r = torch.arange(
                seq_length, dtype=torch.long, device=hidden_states.device
            ).view(1, -1)
            distance = position_ids_l - position_ids_r
            positional_embedding = self.distance_embedding(
                distance + self.max_position_embeddings - 1
            )
            positional_embedding = positional_embedding.to(
                dtype=query_layer.dtype
            )  # fp16 compatibility

            if self.position_embedding_type == "relative_key":
                relative_position_scores = torch.einsum(
                    "bhld,lrd->bhlr", query_layer, positional_embedding
                )
                attention_scores = attention_scores + relative_position_scores
            elif self.position_embedding_type == "relative_key_query":
                relative_position_scores_query = torch.einsum(
                    "bhld,lrd->bhlr", query_layer, positional_embedding
                )
                relative_position_scores_key = torch.einsum(
                    "bhrd,lrd->bhlr", key_layer, positional_embedding
                )
                attention_scores = (
                    attention_scores
                    + relative_position_scores_query
                    + relative_position_scores_key
                )

        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
        if attention_mask is not None:
            # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
            attention_scores = attention_scores + attention_mask

        # Normalize the attention scores to probabilities.
        attention_probs = nn.Softmax(dim=-1)(attention_scores)

        if is_cross_attention and self.save_attention:
            self.save_attention_map(attention_probs)
            attention_probs.register_hook(self.save_attn_gradients)

        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.
        attention_probs_dropped = self.dropout(attention_probs)

        # Mask heads if we want to
        if head_mask is not None:
            attention_probs_dropped = attention_probs_dropped * head_mask

        context_layer = torch.matmul(attention_probs_dropped, value_layer)

        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
        context_layer = context_layer.view(*new_context_layer_shape)

        outputs = (
            (context_layer, attention_probs) if output_attentions else (context_layer,)
        )

        outputs = outputs + (past_key_value,)
        return outputs


class BertSelfOutput(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

    def forward(self, hidden_states, input_tensor):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.LayerNorm(hidden_states + input_tensor)
        return hidden_states


class BertAttention(nn.Module):
    def __init__(self, config, is_cross_attention=False):
        super().__init__()
        self.self = BertSelfAttention(config, is_cross_attention)
        self.output = BertSelfOutput(config)
        self.pruned_heads = set()

    def prune_heads(self, heads):
        if len(heads) == 0:
            return
        heads, index = find_pruneable_heads_and_indices(
            heads,
            self.self.num_attention_heads,
            self.self.attention_head_size,
            self.pruned_heads,
        )

        # Prune linear layers
        self.self.query = prune_linear_layer(self.self.query, index)
        self.self.key = prune_linear_layer(self.self.key, index)
        self.self.value = prune_linear_layer(self.self.value, index)
        self.output.dense = prune_linear_layer(self.output.dense, index, dim=1)

        # Update hyper params and store pruned heads
        self.self.num_attention_heads = self.self.num_attention_heads - len(heads)
        self.self.all_head_size = (
            self.self.attention_head_size * self.self.num_attention_heads
        )
        self.pruned_heads = self.pruned_heads.union(heads)

    def forward(
        self,
        hidden_states,
        attention_mask=None,
        head_mask=None,
        encoder_hidden_states=None,
        encoder_attention_mask=None,
        past_key_value=None,
        output_attentions=False,
    ):
        self_outputs = self.self(
            hidden_states,
            attention_mask,
            head_mask,
            encoder_hidden_states,
            encoder_attention_mask,
            past_key_value,
            output_attentions,
        )
        attention_output = self.output(self_outputs[0], hidden_states)

        outputs = (attention_output,) + self_outputs[
            1:
        ]  # add attentions if we output them
        return outputs


class BertIntermediate(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
        if isinstance(config.hidden_act, str):
            self.intermediate_act_fn = ACT2FN[config.hidden_act]
        else:
            self.intermediate_act_fn = config.hidden_act

    def forward(self, hidden_states):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.intermediate_act_fn(hidden_states)
        return hidden_states


class BertOutput(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

    def forward(self, hidden_states, input_tensor):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.dropout(hidden_states)
        hidden_states = self.LayerNorm(hidden_states + input_tensor)
        return hidden_states


class BertLayer(nn.Module):
    def __init__(self, config, layer_num):
        super().__init__()
        self.config = config
        self.chunk_size_feed_forward = config.chunk_size_feed_forward
        self.seq_len_dim = 1
        self.attention = BertAttention(config)
        self.layer_num = layer_num
        if (
            self.config.add_cross_attention
            and layer_num % self.config.cross_attention_freq == 0
        ):
            self.crossattention = BertAttention(
                config, is_cross_attention=self.config.add_cross_attention
            )
            self.has_cross_attention = True
        else:
            self.has_cross_attention = False
        self.intermediate = BertIntermediate(config)
        self.output = BertOutput(config)

        self.intermediate_query = BertIntermediate(config)
        self.output_query = BertOutput(config)

    def forward(
        self,
        hidden_states,
        attention_mask=None,
        head_mask=None,
        encoder_hidden_states=None,
        encoder_attention_mask=None,
        past_key_value=None,
        output_attentions=False,
        query_length=0,
    ):
        # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
        self_attn_past_key_value = (
            past_key_value[:2] if past_key_value is not None else None
        )
        self_attention_outputs = self.attention(
            hidden_states,
            attention_mask,
            head_mask,
            output_attentions=output_attentions,
            past_key_value=self_attn_past_key_value,
        )
        attention_output = self_attention_outputs[0]
        outputs = self_attention_outputs[1:-1]

        present_key_value = self_attention_outputs[-1]

        if query_length > 0:
            query_attention_output = attention_output[:, :query_length, :]

            if self.has_cross_attention and encoder_hidden_states is not None:
                assert (
                    encoder_hidden_states is not None
                ), "encoder_hidden_states must be given for cross-attention layers"
                cross_attention_outputs = self.crossattention(
                    query_attention_output,
                    attention_mask,
                    head_mask,
                    encoder_hidden_states,
                    encoder_attention_mask,
                    output_attentions=output_attentions,
                )
                query_attention_output = cross_attention_outputs[0]
                outputs = (
                    outputs + cross_attention_outputs[1:-1]
                )  # add cross attentions if we output attention weights

            layer_output = apply_chunking_to_forward(
                self.feed_forward_chunk_query,
                self.chunk_size_feed_forward,
                self.seq_len_dim,
                query_attention_output,
            )
            if attention_output.shape[1] > query_length:
                layer_output_text = apply_chunking_to_forward(
                    self.feed_forward_chunk,
                    self.chunk_size_feed_forward,
                    self.seq_len_dim,
                    attention_output[:, query_length:, :],
                )
                layer_output = torch.cat([layer_output, layer_output_text], dim=1)
        else:
            layer_output = apply_chunking_to_forward(
                self.feed_forward_chunk,
                self.chunk_size_feed_forward,
                self.seq_len_dim,
                attention_output,
            )
        outputs = (layer_output,) + outputs

        outputs = outputs + (present_key_value,)

        return outputs

    def feed_forward_chunk(self, attention_output):
        intermediate_output = self.intermediate(attention_output)
        layer_output = self.output(intermediate_output, attention_output)
        return layer_output

    def feed_forward_chunk_query(self, attention_output):
        intermediate_output = self.intermediate_query(attention_output)
        layer_output = self.output_query(intermediate_output, attention_output)
        return layer_output


class BertEncoder(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.layer = nn.ModuleList(
            [BertLayer(config, i) for i in range(config.num_hidden_layers)]
        )

    def forward(
        self,
        hidden_states,
        attention_mask=None,
        head_mask=None,
        encoder_hidden_states=None,
        encoder_attention_mask=None,
        past_key_values=None,
        use_cache=None,
        output_attentions=False,
        output_hidden_states=False,
        return_dict=True,
        query_length=0,
    ):
        all_hidden_states = () if output_hidden_states else None
        all_self_attentions = () if output_attentions else None
        all_cross_attentions = (
            () if output_attentions and self.config.add_cross_attention else None
        )

        next_decoder_cache = () if use_cache else None

        for i in range(self.config.num_hidden_layers):
            layer_module = self.layer[i]
            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states,)

            layer_head_mask = head_mask[i] if head_mask is not None else None
            past_key_value = past_key_values[i] if past_key_values is not None else None

            if getattr(self.config, "gradient_checkpointing", False) and self.training:

                if use_cache:
                    logger.warn(
                        "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
                    )
                    use_cache = False

                def create_custom_forward(module):
                    def custom_forward(*inputs):
                        return module(
                            *inputs, past_key_value, output_attentions, query_length
                        )

                    return custom_forward

                layer_outputs = torch.utils.checkpoint.checkpoint(
                    create_custom_forward(layer_module),
                    hidden_states,
                    attention_mask,
                    layer_head_mask,
                    encoder_hidden_states,
                    encoder_attention_mask,
                )
            else:
                layer_outputs = layer_module(
                    hidden_states,
                    attention_mask,
                    layer_head_mask,
                    encoder_hidden_states,
                    encoder_attention_mask,
                    past_key_value,
                    output_attentions,
                    query_length,
                )

            hidden_states = layer_outputs[0]
            if use_cache:
                next_decoder_cache += (layer_outputs[-1],)
            if output_attentions:
                all_self_attentions = all_self_attentions + (layer_outputs[1],)
                all_cross_attentions = all_cross_attentions + (layer_outputs[2],)

        if output_hidden_states:
            all_hidden_states = all_hidden_states + (hidden_states,)

        if not return_dict:
            return tuple(
                v
                for v in [
                    hidden_states,
                    next_decoder_cache,
                    all_hidden_states,
                    all_self_attentions,
                    all_cross_attentions,
                ]
                if v is not None
            )
        return BaseModelOutputWithPastAndCrossAttentions(
            last_hidden_state=hidden_states,
            past_key_values=next_decoder_cache,
            hidden_states=all_hidden_states,
            attentions=all_self_attentions,
            cross_attentions=all_cross_attentions,
        )


class BertPooler(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.activation = nn.Tanh()

    def forward(self, hidden_states):
        # We "pool" the model by simply taking the hidden state corresponding
        # to the first token.
        first_token_tensor = hidden_states[:, 0]
        pooled_output = self.dense(first_token_tensor)
        pooled_output = self.activation(pooled_output)
        return pooled_output


class BertPredictionHeadTransform(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        if isinstance(config.hidden_act, str):
            self.transform_act_fn = ACT2FN[config.hidden_act]
        else:
            self.transform_act_fn = config.hidden_act
        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)

    def forward(self, hidden_states):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.transform_act_fn(hidden_states)
        hidden_states = self.LayerNorm(hidden_states)
        return hidden_states


class BertLMPredictionHead(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.transform = BertPredictionHeadTransform(config)

        # The output weights are the same as the input embeddings, but there is
        # an output-only bias for each token.
        self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

        self.bias = nn.Parameter(torch.zeros(config.vocab_size))

        # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
        self.decoder.bias = self.bias

    def forward(self, hidden_states):
        hidden_states = self.transform(hidden_states)
        hidden_states = self.decoder(hidden_states)
        return hidden_states


class BertOnlyMLMHead(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.predictions = BertLMPredictionHead(config)

    def forward(self, sequence_output):
        prediction_scores = self.predictions(sequence_output)
        return prediction_scores


class BertPreTrainedModel(PreTrainedModel):
    """
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    """

    config_class = BertConfig
    base_model_prefix = "bert"
    _keys_to_ignore_on_load_missing = [r"position_ids"]

    def _init_weights(self, module):
        """Initialize the weights"""
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_()


class BertModel(BertPreTrainedModel):
    """
    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
    cross-attention is added between the self-attention layers, following the architecture described in `Attention is
    all you need <https://arxiv.org/abs/1706.03762>`__ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit,
    Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
    argument and :obj:`add_cross_attention` set to :obj:`True`; an :obj:`encoder_hidden_states` is then expected as an
    input to the forward pass.
    """

    def __init__(self, config, add_pooling_layer=False):
        super().__init__(config)
        self.config = config

        self.embeddings = BertEmbeddings(config)

        self.encoder = BertEncoder(config)

        self.pooler = BertPooler(config) if add_pooling_layer else None

        self.init_weights()

    def get_input_embeddings(self):
        return self.embeddings.word_embeddings

    def set_input_embeddings(self, value):
        self.embeddings.word_embeddings = value

    def _prune_heads(self, heads_to_prune):
        """
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        """
        for layer, heads in heads_to_prune.items():
            self.encoder.layer[layer].attention.prune_heads(heads)

    def get_extended_attention_mask(
        self,
        attention_mask: Tensor,
        input_shape: Tuple[int],
        device: device,
        is_decoder: bool,
        has_query: bool = False,
    ) -> Tensor:
        """
        Makes broadcastable attention and causal masks so that future and masked tokens are ignored.

        Arguments:
            attention_mask (:obj:`torch.Tensor`):
                Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
            input_shape (:obj:`Tuple[int]`):
                The shape of the input to the model.
            device: (:obj:`torch.device`):
                The device of the input to the model.

        Returns:
            :obj:`torch.Tensor` The extended attention mask, with a the same dtype as :obj:`attention_mask.dtype`.
        """
        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
        # ourselves in which case we just need to make it broadcastable to all heads.
        if attention_mask.dim() == 3:
            extended_attention_mask = attention_mask[:, None, :, :]
        elif attention_mask.dim() == 2:
            # Provided a padding mask of dimensions [batch_size, seq_length]
            # - if the model is a decoder, apply a causal mask in addition to the padding mask
            # - if the model is an encoder, make the mask broadcastable to [batch_size, num_heads, seq_length, seq_length]
            if is_decoder:
                batch_size, seq_length = input_shape

                seq_ids = torch.arange(seq_length, device=device)
                causal_mask = (
                    seq_ids[None, None, :].repeat(batch_size, seq_length, 1)
                    <= seq_ids[None, :, None]
                )

                # add a prefix ones mask to the causal mask
                # causal and attention masks must have same type with pytorch version < 1.3
                causal_mask = causal_mask.to(attention_mask.dtype)

                if causal_mask.shape[1] < attention_mask.shape[1]:
                    prefix_seq_len = attention_mask.shape[1] - causal_mask.shape[1]
                    if has_query:  # UniLM style attention mask
                        causal_mask = torch.cat(
                            [
                                torch.zeros(
                                    (batch_size, prefix_seq_len, seq_length),
                                    device=device,
                                    dtype=causal_mask.dtype,
                                ),
                                causal_mask,
                            ],
                            axis=1,
                        )
                    causal_mask = torch.cat(
                        [
                            torch.ones(
                                (batch_size, causal_mask.shape[1], prefix_seq_len),
                                device=device,
                                dtype=causal_mask.dtype,
                            ),
                            causal_mask,
                        ],
                        axis=-1,
                    )
                extended_attention_mask = (
                    causal_mask[:, None, :, :] * attention_mask[:, None, None, :]
                )
            else:
                extended_attention_mask = attention_mask[:, None, None, :]
        else:
            raise ValueError(
                "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
                    input_shape, attention_mask.shape
                )
            )

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        extended_attention_mask = extended_attention_mask.to(
            dtype=self.dtype
        )  # fp16 compatibility
        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
        return extended_attention_mask

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        position_ids=None,
        head_mask=None,
        query_embeds=None,
        encoder_hidden_states=None,
        encoder_attention_mask=None,
        past_key_values=None,
        use_cache=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        is_decoder=False,
    ):
        r"""
        encoder_hidden_states  (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
            the model is configured as a decoder.
        encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
            the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``:
            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.
        past_key_values (:obj:`tuple(tuple(torch.FloatTensor))` of length :obj:`config.n_layers` with each tuple having 4 tensors of shape :obj:`(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
            Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
            If :obj:`past_key_values` are used, the user can optionally input only the last :obj:`decoder_input_ids`
            (those that don't have their past key value states given to this model) of shape :obj:`(batch_size, 1)`
            instead of all :obj:`decoder_input_ids` of shape :obj:`(batch_size, sequence_length)`.
        use_cache (:obj:`bool`, `optional`):
            If set to :obj:`True`, :obj:`past_key_values` key value states are returned and can be used to speed up
            decoding (see :obj:`past_key_values`).
        """
        output_attentions = (
            output_attentions
            if output_attentions is not None
            else self.config.output_attentions
        )
        output_hidden_states = (
            output_hidden_states
            if output_hidden_states is not None
            else self.config.output_hidden_states
        )
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )

        # use_cache = use_cache if use_cache is not None else self.config.use_cache

        if input_ids is None:
            assert (
                query_embeds is not None
            ), "You have to specify query_embeds when input_ids is None"

        # past_key_values_length
        past_key_values_length = (
            past_key_values[0][0].shape[2] - self.config.query_length
            if past_key_values is not None
            else 0
        )

        query_length = query_embeds.shape[1] if query_embeds is not None else 0

        embedding_output = self.embeddings(
            input_ids=input_ids,
            position_ids=position_ids,
            query_embeds=query_embeds,
            past_key_values_length=past_key_values_length,
        )

        input_shape = embedding_output.size()[:-1]
        batch_size, seq_length = input_shape
        device = embedding_output.device

        if attention_mask is None:
            attention_mask = torch.ones(
                ((batch_size, seq_length + past_key_values_length)), device=device
            )

        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
        # ourselves in which case we just need to make it broadcastable to all heads.
        if is_decoder:
            extended_attention_mask = self.get_extended_attention_mask(
                attention_mask,
                input_ids.shape,
                device,
                is_decoder,
                has_query=(query_embeds is not None),
            )
        else:
            extended_attention_mask = self.get_extended_attention_mask(
                attention_mask, input_shape, device, is_decoder
            )

        # If a 2D or 3D attention mask is provided for the cross-attention
        # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
        if encoder_hidden_states is not None:
            if type(encoder_hidden_states) == list:
                encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states[
                    0
                ].size()
            else:
                (
                    encoder_batch_size,
                    encoder_sequence_length,
                    _,
                ) = encoder_hidden_states.size()
            encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)

            if type(encoder_attention_mask) == list:
                encoder_extended_attention_mask = [
                    self.invert_attention_mask(mask) for mask in encoder_attention_mask
                ]
            elif encoder_attention_mask is None:
                encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)
                encoder_extended_attention_mask = self.invert_attention_mask(
                    encoder_attention_mask
                )
            else:
                encoder_extended_attention_mask = self.invert_attention_mask(
                    encoder_attention_mask
                )
        else:
            encoder_extended_attention_mask = None

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head
        # attention_probs has shape bsz x n_heads x N x N
        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
        head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)

        encoder_outputs = self.encoder(
            embedding_output,
            attention_mask=extended_attention_mask,
            head_mask=head_mask,
            encoder_hidden_states=encoder_hidden_states,
            encoder_attention_mask=encoder_extended_attention_mask,
            past_key_values=past_key_values,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            query_length=query_length,
        )
        sequence_output = encoder_outputs[0]
        pooled_output = (
            self.pooler(sequence_output) if self.pooler is not None else None
        )

        if not return_dict:
            return (sequence_output, pooled_output) + encoder_outputs[1:]

        return BaseModelOutputWithPoolingAndCrossAttentions(
            last_hidden_state=sequence_output,
            pooler_output=pooled_output,
            past_key_values=encoder_outputs.past_key_values,
            hidden_states=encoder_outputs.hidden_states,
            attentions=encoder_outputs.attentions,
            cross_attentions=encoder_outputs.cross_attentions,
        )


class BertLMHeadModel(BertPreTrainedModel):

    _keys_to_ignore_on_load_unexpected = [r"pooler"]
    _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]

    def __init__(self, config):
        super().__init__(config)

        self.bert = BertModel(config, add_pooling_layer=False)
        self.cls = BertOnlyMLMHead(config)

        self.init_weights()

    def get_output_embeddings(self):
        return self.cls.predictions.decoder

    def set_output_embeddings(self, new_embeddings):
        self.cls.predictions.decoder = new_embeddings

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        position_ids=None,
        head_mask=None,
        query_embeds=None,
        encoder_hidden_states=None,
        encoder_attention_mask=None,
        labels=None,
        past_key_values=None,
        use_cache=True,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        return_logits=False,
        is_decoder=True,
        reduction="mean",
    ):
        r"""
        encoder_hidden_states  (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
            the model is configured as a decoder.
        encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
            the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``:
            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
            Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
            ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are
            ignored (masked), the loss is only computed for the tokens with labels n ``[0, ..., config.vocab_size]``
        past_key_values (:obj:`tuple(tuple(torch.FloatTensor))` of length :obj:`config.n_layers` with each tuple having 4 tensors of shape :obj:`(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
            Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
            If :obj:`past_key_values` are used, the user can optionally input only the last :obj:`decoder_input_ids`
            (those that don't have their past key value states given to this model) of shape :obj:`(batch_size, 1)`
            instead of all :obj:`decoder_input_ids` of shape :obj:`(batch_size, sequence_length)`.
        use_cache (:obj:`bool`, `optional`):
            If set to :obj:`True`, :obj:`past_key_values` key value states are returned and can be used to speed up
            decoding (see :obj:`past_key_values`).
        Returns:
        Example::
            >>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig
            >>> import torch
            >>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
            >>> config = BertConfig.from_pretrained("bert-base-cased")
            >>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config)
            >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
            >>> outputs = model(**inputs)
            >>> prediction_logits = outputs.logits
        """
        return_dict = (
            return_dict if return_dict is not None else self.config.use_return_dict
        )
        if labels is not None:
            use_cache = False
        if past_key_values is not None:
            query_embeds = None

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            head_mask=head_mask,
            query_embeds=query_embeds,
            encoder_hidden_states=encoder_hidden_states,
            encoder_attention_mask=encoder_attention_mask,
            past_key_values=past_key_values,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            is_decoder=is_decoder,
        )

        sequence_output = outputs[0]
        if query_embeds is not None:
            sequence_output = outputs[0][:, query_embeds.shape[1] :, :]

        prediction_scores = self.cls(sequence_output)

        if return_logits:
            return prediction_scores[:, :-1, :].contiguous()

        lm_loss = None
        if labels is not None:
            # we are doing next-token prediction; shift prediction scores and input ids by one
            shifted_prediction_scores = prediction_scores[:, :-1, :].contiguous()
            labels = labels[:, 1:].contiguous()
            loss_fct = CrossEntropyLoss(reduction=reduction, label_smoothing=0.1)
            lm_loss = loss_fct(
                shifted_prediction_scores.view(-1, self.config.vocab_size),
                labels.view(-1),
            )
            if reduction == "none":
                lm_loss = lm_loss.view(prediction_scores.size(0), -1).sum(1)

        if not return_dict:
            output = (prediction_scores,) + outputs[2:]
            return ((lm_loss,) + output) if lm_loss is not None else output

        return CausalLMOutputWithCrossAttentions(
            loss=lm_loss,
            logits=prediction_scores,
            past_key_values=outputs.past_key_values,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
            cross_attentions=outputs.cross_attentions,
        )

    def prepare_inputs_for_generation(
        self, input_ids, query_embeds, past=None, attention_mask=None, **model_kwargs
    ):
        # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
        if attention_mask is None:
            attention_mask = input_ids.new_ones(input_ids.shape)
        query_mask = input_ids.new_ones(query_embeds.shape[:-1])
        attention_mask = torch.cat([query_mask, attention_mask], dim=-1)

        # cut decoder_input_ids if past is used
        if past is not None:
            input_ids = input_ids[:, -1:]

        return {
            "input_ids": input_ids,
            "query_embeds": query_embeds,
            "attention_mask": attention_mask,
            "past_key_values": past,
            "encoder_hidden_states": model_kwargs.get("encoder_hidden_states", None),
            "encoder_attention_mask": model_kwargs.get("encoder_attention_mask", None),
            "is_decoder": True,
        }

    def _reorder_cache(self, past, beam_idx):
        reordered_past = ()
        for layer_past in past:
            reordered_past += (
                tuple(
                    past_state.index_select(0, beam_idx) for past_state in layer_past
                ),
            )
        return reordered_past


class BertForMaskedLM(BertPreTrainedModel):

    _keys_to_ignore_on_load_unexpected = [r"pooler"]
    _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]

    def __init__(self, config):
        super().__init__(config)

        self.bert = BertModel(config, add_pooling_layer=False)
        self.cls = BertOnlyMLMHead(config)

        self.init_weights()

    def get_output_embeddings(self):
        return self.cls.predictions.decoder

    def set_output_embeddings(self, new_embeddings):
        self.cls.predictions.decoder = new_embeddings

    def forward(
        self,

Download .txt

gitextract_kdqa59bx/

├── .gitignore
├── CLUB_modules/
│   ├── __init__.py
│   ├── mi_estimators.py
│   └── mi_estimators_dist.py
├── dataloader/
│   └── dataloader.py
├── dataset/
│   ├── fid2captions.json
│   ├── text.txt
│   └── wav.scp
├── environment.yml
├── model2.py
├── module/
│   ├── Qformer.py
│   └── modeling_llama.py
├── readme.md
├── result/
│   └── result.txt
├── scripts/
│   ├── inference.py
│   ├── test.py
│   └── train.py
└── tool/
    └── get_sentence_simi.py

Download .txt

SYMBOL INDEX (199 symbols across 7 files)

FILE: CLUB_modules/mi_estimators.py
  class CLUBVec2Seq (line 13) | class CLUBVec2Seq(nn.Module):
    method __init__ (line 16) | def __init__(
    method temporal_avg_pool (line 47) | def temporal_avg_pool(self, x, mask=None):
    method get_mu_logvar (line 62) | def get_mu_logvar(self, seq, mask):
    method loglikeli (line 71) | def loglikeli(self, seq, vec, mask=None):
    method forward (line 82) | def forward(self, seq, vec, mask=None):
    method learning_loss (line 118) | def learning_loss(self, seq, vec, mask=None):
  class CLUBForCategorical (line 122) | class CLUBForCategorical(nn.Module): # Update 04/27/2022
    method __init__ (line 128) | def __init__(self, input_dim, label_num, hidden_size=None):
    method forward (line 144) | def forward(self, inputs, labels):
    method loglikeli (line 172) | def loglikeli(self, inputs, labels):
    method learning_loss (line 176) | def learning_loss(self, inputs, labels):
  class CLUB (line 180) | class CLUB(nn.Module):  # CLUB: Mutual Information Contrastive Learning ...
    method __init__ (line 191) | def __init__(self, x_dim, y_dim, hidden_size, is_sampled_version=False):
    method get_mu_logvar (line 205) | def get_mu_logvar(self, x_samples):
    method forward (line 210) | def forward(self, x_samples, y_samples):
    method loglikeli (line 237) | def loglikeli(self, x_samples, y_samples): # unnormalized loglikelihood
    method learning_loss (line 241) | def learning_loss(self, x_samples, y_samples):
  class MINE (line 245) | class MINE(nn.Module):
    method __init__ (line 246) | def __init__(self, x_dim, y_dim, hidden_size):
    method forward (line 252) | def forward(self, x_samples, y_samples):  # samples have shape [sample...
    method learning_loss (line 267) | def learning_loss(self, x_samples, y_samples):
  class NWJ (line 271) | class NWJ(nn.Module):
    method __init__ (line 272) | def __init__(self, x_dim, y_dim, hidden_size):
    method forward (line 278) | def forward(self, x_samples, y_samples):
    method learning_loss (line 291) | def learning_loss(self, x_samples, y_samples):
  class InfoNCE (line 295) | class InfoNCE(nn.Module):
    method __init__ (line 296) | def __init__(self, x_dim, y_dim, hidden_size):
    method forward (line 303) | def forward(self, x_samples, y_samples):  # samples have shape [sample...
    method learning_loss (line 316) | def learning_loss(self, x_samples, y_samples):
  function log_sum_exp (line 320) | def log_sum_exp(value, dim=None, keepdim=False):
  class L1OutUB (line 341) | class L1OutUB(nn.Module):  # naive upper bound
    method __init__ (line 342) | def __init__(self, x_dim, y_dim, hidden_size):
    method get_mu_logvar (line 353) | def get_mu_logvar(self, x_samples):
    method forward (line 358) | def forward(self, x_samples, y_samples):
    method loglikeli (line 374) | def loglikeli(self, x_samples, y_samples):
    method learning_loss (line 378) | def learning_loss(self, x_samples, y_samples):
  class VarUB (line 382) | class VarUB(nn.Module):  #    variational upper bound
    method __init__ (line 383) | def __init__(self, x_dim, y_dim, hidden_size):
    method get_mu_logvar (line 394) | def get_mu_logvar(self, x_samples):
    method forward (line 399) | def forward(self, x_samples, y_samples): #[nsample, 1]
    method loglikeli (line 403) | def loglikeli(self, x_samples, y_samples):
    method learning_loss (line 407) | def learning_loss(self, x_samples, y_samples):

FILE: CLUB_modules/mi_estimators_dist.py
  class CLUBVec2Seq (line 14) | class CLUBVec2Seq(nn.Module):
    method __init__ (line 17) | def __init__(
    method temporal_avg_pool (line 48) | def temporal_avg_pool(self, x, mask=None):
    method get_mu_logvar (line 63) | def get_mu_logvar(self, seq, mask):
    method loglikeli (line 72) | def loglikeli(self, seq, vec, mask=None):
    method forward (line 83) | def forward(self, seq, vec, mask=None):
    method learning_loss (line 126) | def learning_loss(self, seq, vec, mask=None):
  class CLUBForCategorical (line 130) | class CLUBForCategorical(nn.Module):
    method __init__ (line 138) | def __init__(self, input_dim, label_num, hidden_size=None):
    method forward (line 154) | def forward(self, inputs, labels):
    method loglikeli (line 201) | def loglikeli(self, inputs, labels):
    method learning_loss (line 205) | def learning_loss(self, inputs, labels):
  class CLUB (line 209) | class CLUB(nn.Module):  # CLUB: Mutual Information Contrastive Learning ...
    method __init__ (line 220) | def __init__(self, x_dim, y_dim, hidden_size, is_sampled_version=False):
    method get_mu_logvar (line 234) | def get_mu_logvar(self, x_samples):
    method forward (line 239) | def forward(self, x_samples, y_samples):
    method loglikeli (line 275) | def loglikeli(self, x_samples, y_samples): # unnormalized loglikelihood
    method learning_loss (line 279) | def learning_loss(self, x_samples, y_samples):

FILE: dataloader/dataloader.py
  class AudioMotionDataset (line 7) | class AudioMotionDataset(Dataset):
    method __init__ (line 8) | def __init__(self, text_file, wav_scp_file,description_file):
    method __getitem__ (line 30) | def __getitem__(self, index):
    method __len__ (line 37) | def __len__(self):
  function collate_fn (line 42) | def collate_fn(batch):

FILE: model2.py
  class KeywordsStoppingCriteria (line 20) | class KeywordsStoppingCriteria(StoppingCriteria):
    method __init__ (line 21) | def __init__(self, keywords_ids:list):
    method __call__ (line 24) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  class MotionAudio (line 29) | class MotionAudio(pl.LightningModule):
    method __init__ (line 30) | def __init__(
    method init_Qformer (line 76) | def init_Qformer(self,num_query_token, vision_width, cross_attention_f...
    method mean_pooling (line 94) | def mean_pooling(self,model_output, attention_mask):
    method forward (line 102) | def forward(self, audio, describtion):
    method training_step (line 170) | def training_step(self, batch, batch_idx):
    method validation_step (line 175) | def validation_step(self, batch, batch_idx):
    method configure_optimizers (line 180) | def configure_optimizers(self):
    method inference (line 183) | def inference(self, audio):
    method post_processing (line 256) | def post_processing(self, sentences,device):
    method test_step (line 268) | def test_step(self, batch, batch_idx):
  function count_parameters (line 286) | def count_parameters(model):

FILE: module/Qformer.py
  class BertEmbeddings (line 41) | class BertEmbeddings(nn.Module):
    method __init__ (line 44) | def __init__(self, config):
    method forward (line 68) | def forward(
  class BertSelfAttention (line 101) | class BertSelfAttention(nn.Module):
    method __init__ (line 102) | def __init__(self, config, is_cross_attention):
    method save_attn_gradients (line 139) | def save_attn_gradients(self, attn_gradients):
    method get_attn_gradients (line 142) | def get_attn_gradients(self):
    method save_attention_map (line 145) | def save_attention_map(self, attention_map):
    method get_attention_map (line 148) | def get_attention_map(self):
    method transpose_for_scores (line 151) | def transpose_for_scores(self, x):
    method forward (line 159) | def forward(
  class BertSelfOutput (line 268) | class BertSelfOutput(nn.Module):
    method __init__ (line 269) | def __init__(self, config):
    method forward (line 275) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 282) | class BertAttention(nn.Module):
    method __init__ (line 283) | def __init__(self, config, is_cross_attention=False):
    method prune_heads (line 289) | def prune_heads(self, heads):
    method forward (line 312) | def forward(
  class BertIntermediate (line 339) | class BertIntermediate(nn.Module):
    method __init__ (line 340) | def __init__(self, config):
    method forward (line 348) | def forward(self, hidden_states):
  class BertOutput (line 354) | class BertOutput(nn.Module):
    method __init__ (line 355) | def __init__(self, config):
    method forward (line 361) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 368) | class BertLayer(nn.Module):
    method __init__ (line 369) | def __init__(self, config, layer_num):
    method forward (line 392) | def forward(
    method feed_forward_chunk (line 466) | def feed_forward_chunk(self, attention_output):
    method feed_forward_chunk_query (line 471) | def feed_forward_chunk_query(self, attention_output):
  class BertEncoder (line 477) | class BertEncoder(nn.Module):
    method __init__ (line 478) | def __init__(self, config):
    method forward (line 485) | def forward(
  class BertPooler (line 582) | class BertPooler(nn.Module):
    method __init__ (line 583) | def __init__(self, config):
    method forward (line 588) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 597) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 598) | def __init__(self, config):
    method forward (line 607) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 614) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 615) | def __init__(self, config):
    method forward (line 628) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 634) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 635) | def __init__(self, config):
    method forward (line 639) | def forward(self, sequence_output):
  class BertPreTrainedModel (line 644) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 654) | def _init_weights(self, module):
  class BertModel (line 667) | class BertModel(BertPreTrainedModel):
    method __init__ (line 677) | def __init__(self, config, add_pooling_layer=False):
    method get_input_embeddings (line 689) | def get_input_embeddings(self):
    method set_input_embeddings (line 692) | def set_input_embeddings(self, value):
    method _prune_heads (line 695) | def _prune_heads(self, heads_to_prune):
    method get_extended_attention_mask (line 703) | def get_extended_attention_mask(
    method forward (line 794) | def forward(
  class BertLMHeadModel (line 958) | class BertLMHeadModel(BertPreTrainedModel):
    method __init__ (line 963) | def __init__(self, config):
    method get_output_embeddings (line 971) | def get_output_embeddings(self):
    method set_output_embeddings (line 974) | def set_output_embeddings(self, new_embeddings):
    method forward (line 977) | def forward(
    method prepare_inputs_for_generation (line 1087) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1110) | def _reorder_cache(self, past, beam_idx):
  class BertForMaskedLM (line 1121) | class BertForMaskedLM(BertPreTrainedModel):
    method __init__ (line 1126) | def __init__(self, config):
    method get_output_embeddings (line 1134) | def get_output_embeddings(self):
    method set_output_embeddings (line 1137) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1140) | def forward(

FILE: module/modeling_llama.py
  function _make_causal_mask (line 42) | def _make_causal_mask(
  function _expand_mask (line 60) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
  class LlamaRMSNorm (line 74) | class LlamaRMSNorm(nn.Module):
    method __init__ (line 75) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 83) | def forward(self, hidden_states):
  class LlamaRotaryEmbedding (line 91) | class LlamaRotaryEmbedding(torch.nn.Module):
    method __init__ (line 92) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 107) | def forward(self, x, seq_len=None):
  function rotate_half (line 124) | def rotate_half(x):
  function apply_rotary_pos_emb (line 131) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
  class LlamaMLP (line 142) | class LlamaMLP(nn.Module):
    method __init__ (line 143) | def __init__(
    method forward (line 155) | def forward(self, x):
  class LlamaAttention (line 159) | class LlamaAttention(nn.Module):
    method __init__ (line 162) | def __init__(self, config: LlamaConfig):
    method _shape (line 181) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 184) | def forward(
  class LlamaDecoderLayer (line 253) | class LlamaDecoderLayer(nn.Module):
    method __init__ (line 254) | def __init__(self, config: LlamaConfig):
    method forward (line 266) | def forward(
  class LlamaPreTrainedModel (line 342) | class LlamaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 349) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 360) | def _set_gradient_checkpointing(self, module, value=False):
  class LlamaModel (line 433) | class LlamaModel(LlamaPreTrainedModel):
    method __init__ (line 441) | def __init__(self, config: LlamaConfig):
    method get_input_embeddings (line 454) | def get_input_embeddings(self):
    method set_input_embeddings (line 457) | def set_input_embeddings(self, value):
    method _prepare_decoder_attention_mask (line 461) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape,...
    method forward (line 485) | def forward(
  class LlamaForCausalLM (line 613) | class LlamaForCausalLM(LlamaPreTrainedModel):
    method __init__ (line 616) | def __init__(self, config):
    method get_input_embeddings (line 625) | def get_input_embeddings(self):
    method set_input_embeddings (line 628) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 631) | def get_output_embeddings(self):
    method set_output_embeddings (line 634) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 637) | def set_decoder(self, decoder):
    method get_decoder (line 640) | def get_decoder(self):
    method forward (line 645) | def forward(
    method prepare_inputs_for_generation (line 732) | def prepare_inputs_for_generation(
    method _reorder_cache (line 763) | def _reorder_cache(past_key_values, beam_idx):
  class LlamaForSequenceClassification (line 787) | class LlamaForSequenceClassification(LlamaPreTrainedModel):
    method __init__ (line 788) | def __init__(self, config):
    method get_input_embeddings (line 797) | def get_input_embeddings(self):
    method set_input_embeddings (line 800) | def set_input_embeddings(self, value):
    method forward (line 804) | def forward(

FILE: tool/get_sentence_simi.py
  class SimiCal (line 13) | class SimiCal():
    method __init__ (line 14) | def __init__(self, device=torch.device('cuda')):
    method mean_pooling (line 25) | def mean_pooling(self, model_output, attention_mask):
    method cos_sim (line 30) | def cos_sim(self, a: Union[torch.Tensor, np.ndarray], b: Union[torch.T...
    method __call__ (line 51) | def __call__(self,inp1,inp2):
  function test_SimiCal (line 61) | def test_SimiCal():
  function calculate_mean_variance (line 66) | def calculate_mean_variance(lst):
  function predictSimiWrapper (line 75) | def predictSimiWrapper(fpath):

Download .json

Condensed preview — 18 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (538K chars).

[
  {
    "path": ".gitignore",
    "chars": 60,
    "preview": "model.ckpt\nweights/\nft_local/\n__pycache__/\n*.pyc\n*.pyo\n*.pyd"
  },
  {
    "path": "CLUB_modules/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "CLUB_modules/mi_estimators.py",
    "chars": 15991,
    "preview": "\"\"\"\nAdapted from https://github.com/Linear95/CLUB/blob/master/mi_estimators.py\n\"\"\"\n\nimport numpy as np\nimport math\n\nimpo"
  },
  {
    "path": "CLUB_modules/mi_estimators_dist.py",
    "chars": 11101,
    "preview": "\"\"\"\nAdapted from https://github.com/Linear95/CLUB/blob/master/mi_estimators.py\n\"\"\"\n\nimport numpy as np\nimport math\n\nimpo"
  },
  {
    "path": "dataloader/dataloader.py",
    "chars": 2532,
    "preview": "import torch\nimport torchaudio\nfrom torch.utils.data import Dataset, DataLoader\nimport json\nimport random\nimport os\nclas"
  },
  {
    "path": "dataset/fid2captions.json",
    "chars": 32493,
    "preview": "{\n    \"tx_emotion_00201000015\": \"悲伤逆流成河\",\n    \"tx_emotion_00201000107\": \"伤心难过，声音颤抖，情绪激动失望\",\n    \"tx_emotion_00201000148\""
  },
  {
    "path": "dataset/text.txt",
    "chars": 28290,
    "preview": "tx_emotion_00201000015 一年又一年,一日复一日,一聚一离别,一生一场梦。 \ntx_emotion_00201000107 人生就像剥洋葱,总有一片会让你流泪。 \ntx_emotion_00201000148 心痛?要怪"
  },
  {
    "path": "dataset/wav.scp",
    "chars": 40468,
    "preview": "tx_emotion_00403000359 dataset/wav/tx_emotion_00403000359.wav\ntx_emulate_00_103_0001_000017 dataset/wav/tx_emulate_00_10"
  },
  {
    "path": "environment.yml",
    "chars": 7002,
    "preview": "name: secap\nchannels:\n  - defaults\ndependencies:\n  - _libgcc_mutex=0.1=main\n  - _openmp_mutex=5.1=1_gnu\n  - appdirs=1.4."
  },
  {
    "path": "model2.py",
    "chars": 14984,
    "preview": "import torch\nimport torch.nn as nn\nimport lightning.pytorch as pl\nfrom module.Qformer import BertConfig, BertLMHeadModel"
  },
  {
    "path": "module/Qformer.py",
    "chars": 48055,
    "preview": "import math\nimport os\nimport warnings\nfrom dataclasses import dataclass\nfrom typing import Optional, Tuple, Dict, Any\n\ni"
  },
  {
    "path": "module/modeling_llama.py",
    "chars": 39313,
    "preview": "# coding=utf-8\n# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.\n#\n# This code is based on"
  },
  {
    "path": "readme.md",
    "chars": 4890,
    "preview": "# SECAP: Speech Emotion Captioning with Large Language Model\r\nSECap: [Paper](https://ojs.aaai.org/index.php/AAAI/article"
  },
  {
    "path": "result/result.txt",
    "chars": 111401,
    "preview": "file: tx_emotion_00303000260.wav\norigin: 心情快乐舒畅\nresult: 快乐而愉悦，心情舒畅\nresult2: 情绪很舒畅\nresult3: 情绪很舒畅\nresult4: 快乐而愉悦，心情舒畅\nres"
  },
  {
    "path": "scripts/inference.py",
    "chars": 1518,
    "preview": "import sys\n\nsys.path.append(\"..\")\n\nfrom dataloader.dataloader import AudioMotionDataset, collate_fn\nfrom torch.utils.dat"
  },
  {
    "path": "scripts/test.py",
    "chars": 1338,
    "preview": "import os\nimport sys\nsys.path.append(\"..\")\n\nfrom dataloader.dataloader import AudioMotionDataset, collate_fn\nfrom torch."
  },
  {
    "path": "scripts/train.py",
    "chars": 2084,
    "preview": "from dataloader1 import AudioMotionDataset, collate_fn\nfrom torch.utils.data import DataLoader\nfrom torch.utils.data imp"
  },
  {
    "path": "tool/get_sentence_simi.py",
    "chars": 7652,
    "preview": "from typing import List, Union\n\nimport numpy as np\nimport torch\nimport torch.nn.functional\nfrom transformers import Bert"
  }
]

About this extraction

This page contains the full source code of the thuhcsi/SECap GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 18 files (360.5 KB), approximately 166.2k tokens, and a symbol index with 199 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo