Full Code of thuhcsi/SpeechCraft for AI

master 3ef02110bd7b cached
10 files
136.3 KB
43.4k tokens
50 symbols
1 requests
Download .txt
Repository: thuhcsi/SpeechCraft
Branch: master
Commit: 3ef02110bd7b
Files: 10
Total size: 136.3 KB

Directory structure:
gitextract_tgmekqoo/

├── AutomaticPipeline/
│   ├── AgePreTrainModel.py
│   ├── AutoPipeline.py
│   ├── Clustering.py
│   ├── PitchEnergy.py
│   ├── models/
│   │   └── SECap/
│   │       └── model2.py
│   └── outputs/
│       ├── labels_LJspeech_0.json
│       └── labels_LJspeech_0.scp
├── README.md
├── llama-ft/
│   └── llama_infer.py
└── requirements.yaml

================================================
FILE CONTENTS
================================================

================================================
FILE: AutomaticPipeline/AgePreTrainModel.py
================================================
import numpy as np
import torch
import torch.nn as nn
from transformers.models.wav2vec2.modeling_wav2vec2 import (
    Wav2Vec2Model,
    Wav2Vec2PreTrainedModel,
)
from torch.nn import functional as F

class ModelHead(nn.Module):
    r"""Classification head."""

    def __init__(self, config, num_labels):

        super().__init__()

        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.dropout = nn.Dropout(config.final_dropout)
        self.out_proj = nn.Linear(config.hidden_size, num_labels)

    def forward(self, features, **kwargs):

        x = features
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.out_proj(x)

        return x

class AgeGenderModel(Wav2Vec2PreTrainedModel):
    r"""Speech emotion classifier."""

    def __init__(self, config):

        super().__init__(config)

        self.config = config
        self.wav2vec2 = Wav2Vec2Model(config)
        self.age = ModelHead(config, 1)
        self.init_weights()

    def forward(
            self,
            input_values,
    ):
        outputs = self.wav2vec2(input_values)
        hidden_states = outputs[0]
        hidden_states = torch.mean(hidden_states, dim=1)
        logits_age = self.age(hidden_states)

        return logits_age


================================================
FILE: AutomaticPipeline/AutoPipeline.py
================================================
# coding=utf-8
import os
import argparse
import numpy as np
import torch
import librosa
from typing import List, Optional, Union, Dict
from tqdm import tqdm
import torchaudio
from torch.utils.data import DataLoader
from torch.nn import functional as F
from transformers import (
    AutoModelForAudioClassification,
    Wav2Vec2Processor,
    LlamaTokenizer,
    AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
)
from funasr import AutoModel
import sys
sys.path.append('../SECap') # path to the directory of SECap
from AgePreTrainModel import AgeGenderModel
from model2 import MotionAudio
from PitchEnergy import process_audio
from g2p_en import G2p
torch.multiprocessing.set_start_method('spawn', force=True)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float32


def to_device(tensors, device):
    tensors_to_device = []
    for tensor in tensors:
        if isinstance(tensor, torch.Tensor):
            tensors_to_device.append(tensor.to(device))
        else:
            tensors_to_device.append(tensor)
    return tensors_to_device

class CustomDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        basedir: Optional[str] = None,
        sampling_rate: int = 16000,
        max_audio_len: int = 5,  
        number: int=0,
        num_devices: int=4
    ):
        self.num_devices = num_devices
        self.number = number
        self.basedir = basedir
        self.sampling_rate = sampling_rate
        self.max_audio_len = max_audio_len
        self.dataset = []
        self.category = []
        self.text_tn = []
        self.__preprocess__()  
    
    def __preprocess__(self):  
        paths = []      
        for dirname, subdirs, files in os.walk(self.basedir):
            for filename in files:
                if filename.startswith('.'):
                    continue
                if filename.endswith('.wav'):                    
                    path = os.path.join(dirname, filename)
                    paths.append(path)
        subset_size = len(paths) // self.num_devices
        self.dataset = paths[self.number * subset_size: (self.number+1) * subset_size]

    def __len__(self):
        """
        Return the length of the dataset
        """
        return len(self.dataset)

    def _cutorpad(self, audio: np.ndarray) -> np.ndarray:
        """
        Cut or pad audio to the wished length
        """
        effective_length = self.sampling_rate * self.max_audio_len
        len_audio = len(audio)

        # If audio length is bigger than wished audio length
        if len_audio > effective_length:
            audio = audio[:effective_length]
        elif len_audio < effective_length:
            audio_feature_tensor = torch.zeros(1, effective_length)
            audio_feature_tensor[:len(audio)] = audio
            audio = audio_feature_tensor
        # Expand one dimension related to the channel dimension
        return audio


    def __getitem__(self, index) -> torch.Tensor:
        """
        Return the audio and the sampling rate
        """
        filepath = self.dataset[index]
        speech_array, sr = librosa.load(filepath)
        if len(speech_array)==0:
            return None
        speech_array = speech_array[:self.max_audio_len * sr]
        speech_array = torch.Tensor(speech_array).unsqueeze(0)

        # Transform to mono
        if speech_array.shape[0] > 1:
            speech_array = torch.mean(speech_array, dim=0, keepdim=True)

        if sr != self.sampling_rate:
            transform = torchaudio.transforms.Resample(sr, self.sampling_rate)
            speech_array = transform(speech_array)
        
        speech_array = speech_array.squeeze().numpy()

        return speech_array, filepath

class CollateFunc:
    def __init__(
        self,
        w2v_processor: Wav2Vec2Processor,
        max_length: Optional[int] = None,
        padding: Union[bool, str] = True,
        pad_to_multiple_of: Optional[int] = None,
        sampling_rate: int = 16000,
    ):
        self.padding = padding
        self.w2v_processor = w2v_processor
        self.max_length = max_length
        self.sampling_rate = sampling_rate
        self.pad_to_multiple_of = pad_to_multiple_of

    def __call__(self, batch: List):
        audios = []
        input_features = []
        audiopaths = []
        durations = []

        for audio, audiopath in batch:
            audios.append(audio)
            audiopaths.append(audiopath)

            input_tensor = self.w2v_processor(audio, sampling_rate=self.sampling_rate).input_values
            input_tensor = np.squeeze(input_tensor)
            input_features.append({"input_values": input_tensor})
            durations.append(len(audio) / self.sampling_rate)

        batch = self.w2v_processor.pad(
            input_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )

        return batch, audiopaths, durations

def age_predict(batch, model, device):
    r"""Predict age from raw audio signal."""
    audios = batch.to(device)
    preds = model(audios)
    preds = preds.detach().cpu().numpy()
    ages = [int(i*100) for i in preds]
    return ages

def gender_predict(batch, model, device):
    r"""Predict gender from raw audio signal."""
    G = ['female', 'male']
    input_values, attention_mask = batch['input_values'].to(device), batch['attention_mask'].to(device)
    logits = model(input_values, attention_mask=attention_mask).logits
    scores = F.softmax(logits, dim=-1)
    pred = torch.argmax(scores, dim=1).cpu().detach().numpy()
    genders = [G[pred[i]] for i in range(len(pred))]
    return genders

# def emotion_predict(batch, model, device, feature_extractor):
def emotion_predict(audiopaths, model): 
    emotionlabels = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'neutral', 'sad', 'surprised', 'neutral']
    results = model.generate(audiopaths, granularity="utterance", extract_embedding=False)
    scores = [result['scores'] for result in results]
    emotion_indexs = [score.index(max(score)) for score in scores]
    emotions = [emotionlabels[emotion_index] for emotion_index in emotion_indexs]
    return emotions

def pitch_energy_calculate(input_values):
    r"""Predict pitch and energy from raw audio signal."""
    pitchs = []
    energys = []
    input_values = input_values.cpu().detach().numpy()
    for audio in input_values:
        mean_pitch, mean_energy = process_audio(audio, sr=16000)
        pitchs.append(mean_pitch)
        energys.append(mean_energy)
    return pitchs, energys

def inference_on_device(device, i, num_devices, language, basedir, scp_path):

    sampling_rate = 16000
    batch_size = 4
    gender_model_path = "alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
    age_model_path = "audeering/wav2vec2-large-robust-24-ft-age-gender"
    asr_path = "openai/whisper-medium"
    # asr_path = "openai/whisper-large-v3"
    scp_path = scp_path[:-4]+'_'+str(i)+'.scp'

    # Gender Predict    
    gender_model = AutoModelForAudioClassification.from_pretrained(
        pretrained_model_name_or_path = gender_model_path,
        num_labels = 2,
        label2id = { "female": 0, "male": 1 },
        id2label = { 0: "female", 1: "male" },
    )
    gender_model.to(device)
    gender_model.eval()
    
    # Age Predict
    w2v_processor = Wav2Vec2Processor.from_pretrained(age_model_path)
    age_model = AgeGenderModel.from_pretrained(age_model_path, use_auth_token=False)
    age_model.to(device)
    
    #Emotion Predict
    if language == 'english':
        emotion_model = AutoModel(model="./models/emotion2vec_base_finetuned", model_revision="v2.0.4")
    else:
        emotion_model= MotionAudio()
        llama_ckpt = "../SECap/weights/models--minlik--chinese-llama-7b-merged/snapshots/1ca4d87576f1fef4d44a949fb65bbe6b96675872"
        llama_tokenizer = LlamaTokenizer.from_pretrained(llama_ckpt)
        ckpt_path="../SECap/model.ckpt"
        torch.cuda.empty_cache()
        state_dict = torch.load(ckpt_path, map_location=torch.device('cpu'))
        emotion_model.load_state_dict(state_dict, strict=False)
        emotion_model.to(device)

    g2p_model = G2p()
    
    # ASR
    asr_processor = AutoProcessor.from_pretrained(asr_path)
    asr_model = AutoModelForSpeechSeq2Seq.from_pretrained(asr_path)
    asr_model.to(device)
    asr_pipe = pipeline(
        "automatic-speech-recognition",
        model=asr_model,
        tokenizer=asr_processor.tokenizer,
        feature_extractor=asr_processor.feature_extractor,
        max_new_tokens=128,
        chunk_length_s=30,
        batch_size=batch_size,
        return_timestamps=False,
        torch_dtype=torch_dtype,
        device=device,
    )

    inferset = CustomDataset(basedir, sampling_rate = sampling_rate, number = i, num_devices= num_devices)
    data_collator = CollateFunc(
        w2v_processor=w2v_processor,
        padding=True,
        sampling_rate=16000,
    )
    test_dataloader = DataLoader(
        dataset=inferset,
        batch_size=batch_size,
        collate_fn=data_collator,
        shuffle=False,
        num_workers=0
    ) 
    
    with torch.no_grad():
        for s, load_data in enumerate(tqdm(test_dataloader)):
            audios, audiopaths, durations = to_device(load_data, device)
            audio_features = audios['input_values']
            transcripts = [asr_pipe(audio_features.numpy()[i], return_timestamps=False, generate_kwargs={"language": language})["text"] for i in range(len(audiopaths))]
            
            ages = age_predict(audio_features, model=age_model, device=device)
            genders = gender_predict(audios, model=gender_model, device=device)
            pitchs, energys = pitch_energy_calculate(audio_features)
    
            if language == 'english':
                phonemes = [g2p_model(transcripts) for i in range(len(audiopaths))]
                speeds = [durations[i] / len(phonemes[i] ) for i in range(len(audiopaths))]
                emotions = emotion_predict(audiopaths, model=emotion_model)
            else:
                speeds = [durations[i] / len(transcripts[i] ) for i in range(len(audiopaths))]
                prompts = emotion_model.inference(audio_features.to(device))
                emotions = llama_tokenizer.batch_decode(prompts,skip_special_tokens=True)
            
            with open(scp_path, 'a', encoding='utf-8') as file:
               for i in range(len(audiopaths)):
                   file.write(f"{audiopaths[i].split('/')[-2]}\t{audiopaths[i].split('/')[-1][:-4]}\t{ages[i]}\t{genders[i]}\t{pitchs[i]}\t{energys[i]}\t{speeds[i]}\t{emotions[i]}\t{transcripts[i]}\n")

def main(args):

    language = args.language
    basedir = args.basedir
    devices = list(map(int, args.devices.split(',')))
    num_devices = len(devices)
    scp_path = args.scp_path

    processes = []
    for i in range(num_devices):
        device_num = devices[i]
        device = torch.device(f'cuda:{device_num}')
        p = torch.multiprocessing.Process(target=inference_on_device, args=(device, i, num_devices, language, basedir, scp_path))
        p.start()
        processes.append(p)
    
    for p in processes:
        p.join()



if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--language', type=str, default = 'chinese')
    parser.add_argument('--devices', type=str, default = '0')
    parser.add_argument('--basedir', type=str, default = '../FastSpeech2/raw_data/AISHELL3/SSB0005')
    parser.add_argument('--scp_path', type=str, default = './outputs/labels_AISHELL.scp')

    args = parser.parse_args()
    
    main(args)


================================================
FILE: AutomaticPipeline/Clustering.py
================================================
import pandas as pd
import csv
import numpy as np
import argparse
import json
def assign_pitch_group(row, language, male_percentiles, female_percentiles):
    if row['gender'] == 'male':
        if row['pitch'] <= male_percentiles[0]:
            return 'low' if language == 'en' else '低'
        elif row['pitch'] <= male_percentiles[1]:
            return 'normal' if language == 'en' else '中'
        else:
            return 'high' if language == 'en' else '高'
    else: 
        if row['pitch'] <= female_percentiles[0]:
            return 'low' if language == 'en' else '低'
        elif row['pitch'] <= female_percentiles[1]:
            return 'normal' if language == 'en' else '中'
        else:
            return 'high' if language == 'en' else '高'

def replace_age_with_text(row, language):
    age = row['age']
    if age < 14:
        return "Child" if language == 'en' else '小孩'
    elif age < 26:
        return "Teenager" if language == 'en' else '少年'
    elif age < 40:
        return "Young Adult" if language == 'en' else '青年'
    elif age < 55:
        return "Middle-aged" if language == 'en' else '中年'
    else:
        return "Elderly" if language == 'en' else '老年'


def main(args):

    input_path = args.input_path
    language = args.language
    df = pd.read_csv(input_path, encoding = 'utf-8', sep='\t', header=None,  quoting=csv.QUOTE_NONE)
    df.columns = ['filename1', 'filename2', 'age', 'gender', 'pitch', 'energy', 'speed', 'emotion', 'transcript']
    df = df.dropna()
    # df = df.dropna(subset=['pitch'])

    male_percentiles = np.percentile(df[df['gender'] == 'male']['pitch'], [40, 90])
    female_percentiles = np.percentile(df[df['gender'] == 'female']['pitch'], [10, 60])

    df['age'] = df.apply(replace_age_with_text, axis=1, language = language)
    df['pitch_group'] = df.apply(assign_pitch_group, axis=1, language = language, male_percentiles = male_percentiles, female_percentiles = female_percentiles)
    df['energy_group'] = pd.qcut(df['energy'], 3, labels=["low", "normal", "high"] if language=='en' else ["低", "中", "高"])
    df['speed_group'] = pd.qcut(df['speed'], 3, labels=["fast", "normal", "slow"] if language=='en' else ["快", "中", "慢"])

    df_to_save = df[['filename1', 'filename2',  'age', 'gender', 'pitch_group', 'energy_group', 'speed_group', 'emotion', 'transcript']]
    df_to_save.to_csv(input_path.replace('.scp', '_clusterd.scp'), sep='\t', header=0, index=False,  quoting=csv.QUOTE_NONE)
    
    
    result_dict = {}
    for index, row in df_to_save.iterrows():
        key = f"{row['filename1']}_{row['filename2']}"
        if language == 'en':
            value = (
                f"age:{row['age']}\t"
                f"gender:{row['gender']}\t"
                f"pitch:{row['pitch_group']}\t"
                f"volume:{row['energy_group']}\t"
                f"speed:{row['speed_group']}\t"
                f"emotion:{row['emotion']}\t"
                f"transcription:{row['transcript']}"
            )
        else:
            value = (
                f"语气:{row['emotion']}\t"
                f"年龄:{row['age']}\t"
                f"性别:{row['gender']}\t"
                f"音高:{row['pitch_group']}\t"
                f"音量:{row['energy_group']}\t"
                f"语速:{row['speed_group']}"
                f"文本:{row['transcript']}\t"
            )
        result_dict[key] = {}
        result_dict[key]['labels'] = value

    with open(input_path.replace('.scp', '.json'), 'w', encoding='utf-8') as json_file:
        json.dump(result_dict, json_file, ensure_ascii=False, indent=4)

        
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--language', type=str, default = 'en')
    parser.add_argument('--input_path', type=str, default = './outputs/labels_LJspeech_0.scp')
    args = parser.parse_args()
    main(args)

================================================
FILE: AutomaticPipeline/PitchEnergy.py
================================================
import librosa
import numpy as np

def extract_pitch(wav, sr):
    pitches, magnitudes = librosa.core.piptrack(y=wav, sr=sr)
    return pitches

def calculate_mean_pitch(pitches):
    return np.mean([np.mean(p[p > 0]) for p in pitches.T if np.sum(p > 0) > 0])

def process_audio(audio, sr):
    pitches = extract_pitch(audio, sr = sr)
    mean_pitch = calculate_mean_pitch(pitches)
    energy = librosa.feature.rms(y=audio)
    mean_energy = np.mean(energy[~np.isnan(energy)])

    return mean_pitch, mean_energy

================================================
FILE: AutomaticPipeline/models/SECap/model2.py
================================================
import torch
import torch.nn as nn
import lightning.pytorch as pl
from module.Qformer import BertConfig, BertLMHeadModel
from transformers import (
    Wav2Vec2FeatureExtractor,
    HubertModel,
    BertTokenizer, 
    BertModel,
    LlamaTokenizer
)
from module.modeling_llama import LlamaForCausalLM
from CLUB_modules.mi_estimators import *
from tool.get_sentence_simi import SimiCal
import torch.nn.functional as F
from transformers import StoppingCriteria, StoppingCriteriaList
import numpy as np
import os

class KeywordsStoppingCriteria(StoppingCriteria):
    def __init__(self, keywords_ids:list):
        self.keywords = keywords_ids

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        if input_ids[0][-1] in self.keywords:
            return True
        return False

class MotionAudio(pl.LightningModule):
    def __init__(
        self,
        hubert_ckpt="weights/models--TencentGameMate--chinese-hubert-large/snapshots/90cb660492214f687e60f5ca509b20edae6e75bd",
        text2vec_ckpt="weights/models--shibing624--text2vec-base-chinese/snapshots/26420fdf61ddfd92fafbaf3bc21a7c06b1812248",
        llama_ckpt="weights/models--minlik--chinese-llama-7b-merged/snapshots/1ca4d87576f1fef4d44a949fb65bbe6b96675872"):
        super(MotionAudio,self).__init__()
        
        #path
        current_directory = os.path.dirname(os.path.abspath(__file__))
        hubert_ckpt = os.path.join(current_directory, hubert_ckpt)
        text2vec_ckpt = os.path.join(current_directory, text2vec_ckpt)
        llama_ckpt = os.path.join(current_directory, llama_ckpt)

        #hubert
        self.hubert_model=HubertModel.from_pretrained(hubert_ckpt)
        self.hubert_feature_extractor=Wav2Vec2FeatureExtractor.from_pretrained(hubert_ckpt)
        #text2vec
        self.text2vec_model=BertModel.from_pretrained(text2vec_ckpt)
        self.text2vec_tokenizer=BertTokenizer.from_pretrained(text2vec_ckpt)


        #llama
        self.llama_model=LlamaForCausalLM.from_pretrained(llama_ckpt, torch_dtype="auto")
        #self.llama_model = self.llama_model.to(torch.float32)
        self.llama_tokenizer=LlamaTokenizer.from_pretrained(llama_ckpt)
        if self.llama_tokenizer.pad_token_id is None:
            self.llama_tokenizer.pad_token = self.llama_tokenizer.unk_token
        #self.llama_model.model.resize_token_embeddings(len(self.llama_tokenizer))

        for p in self.parameters():
            p.requires_grad = False
        #Qformer
        self.audio_Qformer,self.audio_query_tokens=self.init_Qformer(num_query_token=32,vision_width=768)
        self.audio_Qformer.cls = None
        self.audio_Qformer.bert.embeddings.word_embeddings = None
        self.audio_Qformer.bert.embeddings.position_embeddings = None
        for layer in self.audio_Qformer.bert.encoder.layer:
            layer.output = None
            layer.intermediate = None
        
        self.audio_project=nn.Linear(1024,768)

        self.audio_llama_project=nn.Linear(768,4096)

        
        
    def init_Qformer(self,num_query_token, vision_width, cross_attention_freq=2):
        path=os.path.dirname(os.path.abspath(__file__))
        config_path=os.path.join(path,"weights/models--bert-base-chinese/snapshots/8d2a91f91cc38c96bb8b4556ba70c392f8d5ee55")
        encoder_config = BertConfig.from_pretrained(config_path)
        encoder_config.encoder_width = vision_width
        # insert cross-attention layer every other block
        encoder_config.add_cross_attention = True
        encoder_config.cross_attention_freq = cross_attention_freq
        encoder_config.query_length = num_query_token
        Qformer = BertLMHeadModel(config=encoder_config)
        ckpt=os.path.join(path,"weights/models--bert-base-chinese/snapshots/8d2a91f91cc38c96bb8b4556ba70c392f8d5ee55/pytorch_model.bin")
        Qformer.load_state_dict(torch.load(ckpt),strict=False)

        query_tokens = nn.Parameter(
            torch.zeros(1, num_query_token, encoder_config.hidden_size)
        )
        query_tokens.data.normal_(mean=0.0, std=encoder_config.initializer_range)
        return Qformer, query_tokens
    def mean_pooling(self,model_output, attention_mask):
        token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    


    
    def forward(self, audio, describtion):
        #hubert
        with torch.no_grad():
            audio_feature=self.hubert_feature_extractor(audio, padding=True,return_tensors="pt",sampling_rate=16000).input_values.to(self.device)
            audio_feature = audio_feature.half()
            audio_feature=self.hubert_model(audio_feature).last_hidden_state
        audio_feature=self.audio_project(audio_feature)

        #text2vec
        with torch.no_grad():
            #describtion
            describtion=[s+"</s>" for s in describtion]
            describtion_input=self.text2vec_tokenizer(describtion, padding=True, truncation=True, return_tensors='pt').to(self.device)
            describtion_feature=self.text2vec_model(**describtion_input)
            describtion_feature=self.mean_pooling(describtion_feature,describtion_input['attention_mask']).unsqueeze(1)


        #Qformer
        audio_query_tokens=self.audio_query_tokens.expand(audio_feature.shape[0], -1, -1)
        frame_atts = torch.ones(audio_feature.size()[:-1], dtype=torch.long).to(audio_feature.device)
        #print(audio_query_tokens.shape,audio_feature.shape,frame_atts.shape)
        audio_query_output=self.audio_Qformer.bert(
            query_embeds=audio_query_tokens, #[32,768]
            encoder_hidden_states=audio_feature,
            encoder_attention_mask=frame_atts,
            return_dict=True,
            )
        audio_hidden=audio_query_output.last_hidden_state

        text_tokens=self.llama_tokenizer(describtion, padding="longest", truncation=True, return_tensors='pt',add_special_tokens=False).to(self.device)

        #print(audio_hidden.shape)
        audio_input=self.audio_llama_project(audio_hidden)
        batchsize=audio_input.shape[0]
        bos=torch.ones([batchsize, 1],dtype=text_tokens.input_ids.dtype).to(self.device) * self.llama_tokenizer.bos_token_id
        bos_embeds=self.llama_model.model.embed_tokens(bos.to(self.device))
        #in training, we use different prompts for each audio
        prompts=[ "请用一句话用中文表述音频中说话人的情感状态:", "请用一句中文概括音频中讲话者的情感:", "请用一句中文简述音频里说话者的情感表现:", "请用一句中文概述所给音频中说话人的情感:", "请用一句话用中文描述音频中说话人的情感:", "请用一句中文描绘音频中说话者的情感:", "请用一句中文描述所给音频中说话人的情感:", "请用一句中文简要表述音频中说话人的情感:", "请用一句中文概括所给音频中说话者的情感:", "请用一句话用中文描述所给音频中说话人的情感:", "请用一句中文简述所给音频里说话者的情感:", "请用一句中文描述音频中讲话者的情感:", "请用一句中文概述音频中说话人的情感:", "请用一句话用中文表达音频中说话者的情感:", "请用一句中文简要描述音频中说话人的情感:", "请用一句中文概括音频中说话人的情感:", "请用一句中文描述所给音频中讲话者的情感:", "请用一句中文简述音频中说话者的情感:", "请用一句中文概述所给音频中讲话者的情感:", "请用一句话用中文描述音频中讲话者的情感:", "请用一句中文描述音频中说话人的情感状态:", "请用一句中文概括所给音频里说话者的情感:", "请用一句中文简述所给音频中说话人的情感表现:", "请用一句中文概述音频里说话者的情感:", "请用一句话用中文描述音频中说话人的情感表现:", "请用一句中文描绘所给音频中说话者的情感:", "请用一句中文描述音频里讲话者的情感:", "请用一句中文简要表述所给音频中说话人的情感:", "请用一句中文概括音频里说话者的情感:", "请用一句话用中文描述所给音频中讲话者的情感:" ]
        import random
        prompt=prompts[random.randint(0,len(prompts)-1)]
        prompts_id=self.llama_tokenizer(prompt,return_tensors='pt').input_ids.to(self.device)
        prompts_id=prompts_id.expand(batchsize,-1)
        prompts_embeds=self.llama_model.model.embed_tokens(prompts_id)

        
        targets=text_tokens.input_ids.masked_fill(
            text_tokens.input_ids==self.llama_tokenizer.pad_token_id,-100
        )
        text_embeds=self.llama_model.model.embed_tokens(text_tokens.input_ids.to(self.device))
        input_embeds=torch.cat([bos_embeds,audio_input,prompts_embeds,text_embeds],dim=1)
        atts_audio=torch.ones(audio_input.size()[:-1], dtype=torch.long).to(audio_input.device)

        #atts_audio=atts_audio.to(self.device)
        attns_text=text_tokens.attention_mask
        attns_bos=atts_audio[:,:1]
        attns_prompt=torch.ones(prompts_embeds.size()[:-1], dtype=torch.long).to(prompts_embeds.device)
        attns=torch.cat([attns_bos,atts_audio,attns_prompt,attns_text],dim=1)
        print(input_embeds.shape,attns.shape,targets.shape)
        outputs=self.llama_model(
            inputs_embeds=input_embeds,
            attention_mask=attns,
            labels=targets,
            return_dict=True,
        )
        loss=outputs.loss
        #print(loss)

        return loss
    def training_step(self, batch, batch_idx):
        audio, describtion,_=batch
        loss=self.forward(audio, describtion)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True,batch_size=len(audio),sync_dist=True)
        return loss
    def validation_step(self, batch, batch_idx):
        audio, describtion,_=batch
        loss=self.forward(audio, describtion)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True,batch_size=len(audio),sync_dist=True)
        return loss
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, self.parameters()), lr=0.000013, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-6)
        return optimizer
    def inference(self, audio_feature):
        with torch.no_grad():
            audio_feature=self.hubert_model(audio_feature).last_hidden_state
            audio_feature=self.audio_project(audio_feature)



        #Qformer
        audio_query_tokens=self.audio_query_tokens.expand(audio_feature.shape[0], -1, -1)
        frame_atts = torch.ones(audio_feature.size()[:-1], dtype=torch.long).to(audio_feature.device)
        audio_query_output=self.audio_Qformer.bert(
            query_embeds=audio_query_tokens, #[32,768]
            encoder_hidden_states=audio_feature,
            encoder_attention_mask=frame_atts,
            return_dict=True,
            )
        audio_hidden=audio_query_output.last_hidden_state

        #print(audio_hidden.shape)
        audio_input=self.audio_llama_project(audio_hidden)

        batchsize=audio_input.shape[0]
        #in inference, we use the same prompt for all audio
        #prompts=[ "请用一句话用中文表述音频中说话人的情感状态:", "请用一句中文概括音频中讲话者的情感:", "请用一句中文简述音频里说话者的情感表现:", "请用一句中文概述所给音频中说话人的情感:", "请用一句话用中文描述音频中说话人的情感:", "请用一句中文描绘音频中说话者的情感:", "请用一句中文描述所给音频中说话人的情感:", "请用一句中文简要表述音频中说话人的情感:", "请用一句中文概括所给音频中说话者的情感:", "请用一句话用中文描述所给音频中说话人的情感:", "请用一句中文简述所给音频里说话者的情感:", "请用一句中文描述音频中讲话者的情感:", "请用一句中文概述音频中说话人的情感:", "请用一句话用中文表达音频中说话者的情感:", "请用一句中文简要描述音频中说话人的情感:", "请用一句中文概括音频中说话人的情感:", "请用一句中文描述所给音频中讲话者的情感:", "请用一句中文简述音频中说话者的情感:", "请用一句中文概述所给音频中讲话者的情感:", "请用一句话用中文描述音频中讲话者的情感:", "请用一句中文描述音频中说话人的情感状态:", "请用一句中文概括所给音频里说话者的情感:", "请用一句中文简述所给音频中说话人的情感表现:", "请用一句中文概述音频里说话者的情感:", "请用一句话用中文描述音频中说话人的情感表现:", "请用一句中文描绘所给音频中说话者的情感:", "请用一句中文描述音频里讲话者的情感:", "请用一句中文简要表述所给音频中说话人的情感:", "请用一句中文概括音频里说话者的情感:", "请用一句话用中文描述所给音频中讲话者的情感:" ]
        prompt="请用一句中文简述音频里说话者的情感表现:"
        #import random
        #prompt=prompts[random.randint(0,len(prompts)-1)]
        
        prompts_id=self.llama_tokenizer(prompt,return_tensors='pt').input_ids.to(self.device)
        prompts_id=prompts_id.expand(batchsize,-1)
        prompts_embeds=self.llama_model.model.embed_tokens(prompts_id)

        bos=torch.ones([batchsize, 1],dtype=torch.int64).to(self.device) * self.llama_tokenizer.bos_token_id
        bos_embeds=self.llama_model.model.embed_tokens(bos.to(self.device))
        embeds=torch.cat([bos_embeds,audio_input,prompts_embeds],dim=1)
        #print(embeds.dtype)
        embeds=embeds.half()

        with torch.no_grad():
            outputs=self.llama_model.generate(
                inputs_embeds=embeds,
                max_new_tokens=50,
                min_new_tokens=3,
                do_sample=True,
                top_k=10,
                top_p=0.95,
                num_beams=5,
                repetition_penalty=10.0,
                pad_token_id=self.llama_tokenizer.pad_token_id,
                eos_token_id=self.llama_tokenizer.eos_token_id,
                #stopping_criteria=stopping_criteria,
                early_stopping=True,
                num_return_sequences=1,
                no_repeat_ngram_size=2,

            )
        # print(output_tokens)
        return outputs
    
    def post_processing(self, sentences,device):
        similarities = np.zeros((len(sentences), len(sentences)))
        simi_cal=SimiCal(device=device)
        for i in range(len(sentences)):
            for j in range(len(sentences)):
                similarities[i, j] = simi_cal(sentences[i], sentences[j])
        avg_similarities = np.mean(similarities, axis=1)
        least_related_indices=avg_similarities.argsort()[:3]
        remaining_sentences = [sentences[i] for i in range(len(sentences)) if i not in least_related_indices]

        
        return remaining_sentences
    def test_step(self, batch, batch_idx):
        audio,_,describtion,fpath=batch
        output_tokens,prompt=self.inference(audio)
        path=os.path.dirname(os.path.abspath(__file__))
        test_file="result/result_1.txt"
        test_file=os.path.join(path,test_file)
        with open(test_file,"a",encoding="utf-8") as f:
            f.write("file: "+fpath[0]+"\n")
            #f.write("prompt: "+prompt+"\n")
            f.write("origin: "+describtion[0]+"\n")
            f.write("result: "+output_tokens[0]+"\n")
            f.write("result2: "+output_tokens[1]+"\n")
            f.write("result3: "+output_tokens[2]+"\n")
            f.write("result4: "+output_tokens[3]+"\n")
            f.write("result5: "+output_tokens[4]+"\n")
            f.write("\n")
        

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

if __name__ == "__main__":
    model=MotionAudio()
    print(count_parameters(model))



================================================
FILE: AutomaticPipeline/outputs/labels_LJspeech_0.json
================================================
{
    "LJSpeech_LJ010-0227": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:fast\temotion:sad\ttranscription: He went from Newgate first to Bethlehem, from which he was removed to Broadmoor."
    },
    "LJSpeech_LJ050-0106": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: to devise a practical system which has any reasonable possibility of revealing"
    },
    "LJSpeech_LJ043-0140": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: He also studied Dallas bus schedules to prepare for his later use of"
    },
    "LJSpeech_LJ018-0032": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:sad\ttranscription: There was no mystery about his departure. He had gone to Canada by the Victoria"
    },
    "LJSpeech_LJ012-0117": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: By and by the occupant of the room noticed something glittering in the center of the fire."
    },
    "LJSpeech_LJ038-0057": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: Deputy Sheriff Walther's brought a shotgun into the theater, but laid it on some"
    },
    "LJSpeech_LJ017-0158": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:fast\temotion:happy\ttranscription: She had a little fortune of her own, some 1,700 pounds"
    },
    "LJSpeech_LJ026-0094": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: This however is probably not a source of vital energy, but only contributes to the"
    },
    "LJSpeech_LJ045-0152": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:happy\ttranscription: The events of that evening can best be appreciated through Marina Oswald's testimony."
    },
    "LJSpeech_LJ019-0264": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: and it was decidedly of opinion that in all short sentences the hard labor"
    },
    "LJSpeech_LJ040-0047": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: That associate did not think that Oswald was a communist."
    },
    "LJSpeech_LJ012-0015": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: Weedon and LaCassar to 12 and 6 months respectively in cold baths."
    },
    "LJSpeech_LJ019-0097": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:sad\ttranscription: The want of uniformity in prison discipline became air long and acknowledged evil."
    },
    "LJSpeech_LJ039-0093": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:normal\temotion:neutral\ttranscription: at a distance of 265.3 feet was, quote,"
    },
    "LJSpeech_LJ018-0028": {
        "labels": "age:Elderly\tgender:female\tpitch:high\tvolume:normal\tspeed:normal\temotion:sad\ttranscription: who had been a lodger of his. Muehler had given the cabman's little daughter"
    },
    "LJSpeech_LJ028-0287": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:angry\ttranscription: but found none by which he could hope to prevail unless he maimed himself."
    },
    "LJSpeech_LJ024-0117": {
        "labels": "age:Elderly\tgender:female\tpitch:low\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: You will find that many of those who pretend to support you will sabotage your plans."
    },
    "LJSpeech_LJ029-0172": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: The two Dallas newspapers provided their readers with a steady stream of information and"
    },
    "LJSpeech_LJ024-0044": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:angry\ttranscription: that I will appoint justices who will act as justices and not as"
    },
    "LJSpeech_LJ045-0246": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:normal\temotion:sad\ttranscription: He sought for himself a place in history, a role as the great"
    },
    "LJSpeech_LJ042-0191": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: While, quote, resourcefulness and patient working towards the aforesaid goal"
    },
    "LJSpeech_LJ050-0099": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: It is apparent that a good deal of further consideration and experimentation"
    },
    "LJSpeech_LJ017-0101": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: That day, Palmer had bought more strychnia and had called in a fresh doctor."
    },
    "LJSpeech_LJ016-0388": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:happy\ttranscription: A few go further and are almost gluttonous."
    },
    "LJSpeech_LJ007-0129": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:normal\temotion:angry\ttranscription: the sane and the insane, the young and the old, the trivial offender and the"
    },
    "LJSpeech_LJ014-0195": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: Two of them supported Cope, who was still alive, although insensible, and Marley"
    },
    "LJSpeech_LJ027-0056": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:happy\ttranscription: The relation of skeleton and muscle in arthropods is exactly the reverse."
    },
    "LJSpeech_LJ030-0227": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: Special Agent George W. Hickey Jr. in the rear seat of the Presidential"
    },
    "LJSpeech_LJ047-0210": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: I think all of those, if we had them all together..."
    },
    "LJSpeech_LJ048-0228": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: and others who were present say that no agent was inebriated or acted improperly."
    },
    "LJSpeech_LJ009-0104": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: The women set up a yell, which is mixed with a rustling noise, occasioned by the"
    },
    "LJSpeech_LJ017-0085": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: Palmer's plan was to administer poison in quantities insufficient to"
    },
    "LJSpeech_LJ018-0275": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: On Tarpey's defense, it was stated that the idea of the theft had been suggested"
    },
    "LJSpeech_LJ027-0064": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: All vertebrates and none other have two cavities."
    },
    "LJSpeech_LJ036-0084": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: Craig also claimed that when Fritz pointed out to Oswald that Craig had identified"
    },
    "LJSpeech_LJ040-0211": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: This would make him approximately ten, well almost eleven years old."
    },
    "LJSpeech_LJ032-0241": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:happy\ttranscription: Ten days prior to the Walker attempt, Oswald had undoubtedly received the rifle."
    },
    "LJSpeech_LJ002-0158": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: Other cases are recorded elsewhere, as at the Gilsburg Street Comptor, where"
    },
    "LJSpeech_LJ050-0077": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: and in taking preventive steps."
    },
    "LJSpeech_LJ032-0236": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: By checking the actual mailing dates of these issues and the time it usually takes"
    },
    "LJSpeech_LJ026-0102": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:happy\ttranscription: but root pressure due to osmosis, capillary action and evaporation."
    },
    "LJSpeech_LJ025-0057": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: such as the charai, are in constant and regular motion, was made out"
    },
    "LJSpeech_LJ018-0270": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: The assistant called with the jewels on approbation at a house specially hired for"
    },
    "LJSpeech_LJ013-0021": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:slow\temotion:sad\ttranscription: The larboard pump was suffered to remain choked up and the longboat was"
    },
    "LJSpeech_LJ042-0050": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:sad\ttranscription: I have been a pro-communist for years and yet I have never met a communist."
    },
    "LJSpeech_LJ006-0075": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:happy\ttranscription: He saw certain rooms fill up, and yet took no steps to open others that were locked"
    },
    "LJSpeech_LJ048-0187": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: In addition, Secret Service agents riding in the motorcade were trained to"
    },
    "LJSpeech_LJ022-0186": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:happy\ttranscription: Twenty years of experience with this system have justified the efforts made to"
    },
    "LJSpeech_LJ013-0078": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: Barbara was subsequently pardoned, but was not replaced on the roles as an attorney."
    },
    "LJSpeech_LJ006-0015": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: one which required discretion, judgment and knowledge of law, with sufficient"
    },
    "LJSpeech_LJ048-0061": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: Under proper procedures, knowledge of the pending Presidential visit might have prompted"
    },
    "LJSpeech_LJ015-0190": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:sad\ttranscription: that of dishonest rogues who assume piety and philanthropy as a cloak for the world."
    },
    "LJSpeech_LJ003-0115": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: The judge sat in proper form. He was punctiliously styled, my"
    },
    "LJSpeech_LJ045-0244": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:angry\ttranscription: Long before the assassination, he expressed his hatred for American society."
    },
    "LJSpeech_LJ004-0051": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:happy\ttranscription: When in Belgium he had examined with great satisfaction the admirable manner"
    },
    "LJSpeech_LJ011-0265": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:sad\ttranscription: This Mr. Canning had left his widow a life interest in two thousand pounds."
    },
    "LJSpeech_LJ028-0087": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: Such was the appearance of the builder of the walls of Babylon."
    },
    "LJSpeech_LJ028-0178": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:happy\ttranscription: The walls of Babylon were so long and wide and high that all who"
    },
    "LJSpeech_LJ011-0113": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:sad\ttranscription: He met his death with unshaken firmness, only in treating that a certain blue"
    },
    "LJSpeech_LJ039-0162": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:sad\ttranscription: In an effort to test the rifle under conditions which simulated those which prevailed during"
    },
    "LJSpeech_LJ045-0066": {
        "labels": "age:Middle-aged\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:sad\ttranscription: Oswald struck his wife on occasion."
    },
    "LJSpeech_LJ028-0277": {
        "labels": "age:Middle-aged\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:surprised\ttranscription: One of his Sumter mules gave birth to a foal."
    },
    "LJSpeech_LJ048-0197": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: I then told the officers that their primary duty was traffic and crowd control and that"
    },
    "LJSpeech_LJ019-0170": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: More officers were appointed, as the time of so many of those already on the staff"
    },
    "LJSpeech_LJ041-0092": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: and quote, individual that you would brainwash and quite easy, but I think"
    },
    "LJSpeech_LJ039-0027": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:happy\ttranscription: and switch with Azure."
    },
    "LJSpeech_LJ030-0077": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: Each agent carried a .38 caliber pistol and a shotgun and"
    },
    "LJSpeech_LJ038-0112": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: As previously indicated, Marina Oswald testified that she took the"
    },
    "LJSpeech_LJ017-0119": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:low\tspeed:fast\temotion:happy\ttranscription: All the circumstances were so suspicious that he could not escape the criminal charge."
    },
    "LJSpeech_LJ007-0242": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: No complete and permanent improvement was indeed possible while Newgate remained unchanged."
    },
    "LJSpeech_LJ020-0046": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:happy\ttranscription: Close the dough over it, dust your hands and kneading board with flour,"
    },
    "LJSpeech_LJ018-0001": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: The Chronicles of Newgate, Volume 2, by Arthur Griffiths. Section 21."
    },
    "LJSpeech_LJ026-0098": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: The circulatory system distributes these foods in animals,"
    },
    "LJSpeech_LJ021-0152": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: and to experiment for a reasonable time with measures suitable to"
    },
    "LJSpeech_LJ047-0223": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: I don't recall the exact date, it was about a week prior.\""
    },
    "LJSpeech_LJ007-0040": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:happy\ttranscription: In a second room were fourteen more who had every hope of a reprieve."
    },
    "LJSpeech_LJ050-0103": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:sad\ttranscription: were all men who acted alone in their criminal acts against our leaders."
    },
    "LJSpeech_LJ037-0016": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: Scoggins hurriedly left his seat and hid behind the cab, as the man came back to"
    },
    "LJSpeech_LJ028-0336": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:sad\ttranscription: Darius now, still keeping to the plan agreed upon."
    },
    "LJSpeech_LJ004-0233": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: Under the new rule, visitors were not allowed to pass into the interior of the prison,"
    },
    "LJSpeech_LJ024-0127": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:angry\ttranscription: You who know me can have no fear that I would tolerate the destruction"
    },
    "LJSpeech_LJ004-0189": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:slow\temotion:happy\ttranscription: The great principles of classification, cleanliness and employment were closely observed."
    },
    "LJSpeech_LJ005-0045": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: Nor did it confine itself to mere verbal recommendations."
    },
    "LJSpeech_LJ005-0158": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:happy\ttranscription: Bedding and clothing was still denied, but only in a few jails."
    },
    "LJSpeech_LJ011-0018": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: The crime, long carried on without detection, was first discovered in 1820."
    },
    "LJSpeech_LJ017-0022": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:normal\temotion:neutral\ttranscription: was that of Eliza Fenning, who was convicted of an attempt to poison a whole family."
    },
    "LJSpeech_LJ030-0136": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:normal\temotion:sad\ttranscription: The President replied, that is very obvious."
    },
    "LJSpeech_LJ039-0078": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: The effect of a four-power telescopic sight on the difficulty of these shots"
    },
    "LJSpeech_LJ048-0254": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: advised in the course of the Secret Service investigation of these events that each agent"
    },
    "LJSpeech_LJ049-0083": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:angry\ttranscription: It has long been a federal crime to conspire to injure any federal officer on account of"
    },
    "LJSpeech_LJ006-0043": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: The disgraceful overcrowding had been partially ended, but the same evils of"
    },
    "LJSpeech_LJ025-0109": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: has been as completely invalidated as the third and second."
    },
    "LJSpeech_LJ044-0163": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: Marina Oswald testified that her husband engaged in fair play for Cuba."
    },
    "LJSpeech_LJ003-0037": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: A site was purchased between Red Lion and White Cross streets and a new"
    },
    "LJSpeech_LJ013-0117": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:happy\ttranscription: and with another carry off the plate-chest in broad daylight and as a matter of business."
    },
    "LJSpeech_LJ027-0096": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: as indeed she must be according to the derivation theory."
    },
    "LJSpeech_LJ021-0035": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:happy\ttranscription: saved debtors and creditors alike in many other fields of enterprise."
    },
    "LJSpeech_LJ049-0159": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: the Secret Service, then the only federal investigative agency, assumed"
    },
    "LJSpeech_LJ039-0100": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:happy\ttranscription: During the first week of an intensive eight-week training period, he received and"
    },
    "LJSpeech_LJ019-0302": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: In 1862, there were in all 193 jails."
    },
    "LJSpeech_LJ030-0229": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: At this point, the cars were speeding through the underpass and had left the scene of the"
    },
    "LJSpeech_LJ005-0151": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: In others, women were very properly exempted from it, and also from all severe"
    },
    "LJSpeech_LJ015-0115": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:fast\temotion:sad\ttranscription: Little more remains to be said about Robson. He appears to have accepted his position."
    },
    "LJSpeech_LJ010-0279": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: I shall mention briefly one more case, in which, however, there was no murderous"
    },
    "LJSpeech_LJ044-0119": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: into which Oswald, in his own words, had quote, thrown himself. He sought it"
    },
    "LJSpeech_LJ049-0115": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: of the person who is actually in the exercise of the executive power or"
    },
    "LJSpeech_LJ004-0139": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:fast\temotion:happy\ttranscription: In the morning, the stench and heat were so oppressive that he and everyone else"
    },
    "LJSpeech_LJ007-0223": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: The prison officials appear to be on the side of the inspectors, to the great dissatisfaction"
    },
    "LJSpeech_LJ032-0070": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:normal\temotion:neutral\ttranscription: Ordinarily, Inspector Holmes testified, identification is not requested because"
    },
    "LJSpeech_LJ015-0294": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: He forgot to add that it was to be placed to Ralph's credit, and when he called"
    },
    "LJSpeech_LJ014-0083": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: which, having possessed herself of the murdered man's keys, she rifled from"
    },
    "LJSpeech_LJ040-0195": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: It appears that he did not want to do any of the things which the authorities suggest"
    },
    "LJSpeech_LJ030-0028": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: and the Vice President and Mrs. Johnson were in the receiving line to greet President"
    },
    "LJSpeech_LJ009-0145": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: and he seemed to suffer great inward agitation when the ordinary, particularly"
    },
    "LJSpeech_LJ031-0117": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: assisted by doctors William Osborne and John Parker."
    },
    "LJSpeech_LJ004-0115": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:happy\ttranscription: Infirmaries separating the sexes were also to be provided. A chapel too."
    },
    "LJSpeech_LJ039-0200": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: And one of these agents, Robert A. Frazier,"
    },
    "LJSpeech_LJ028-0481": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:slow\temotion:happy\ttranscription: Nabo-Polisar, the father, my begetter, built Imgur-Bel."
    },
    "LJSpeech_LJ003-0245": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:disgusted\ttranscription: preparatory to their appearance in the Old Bailey. Irons were seldom removed."
    },
    "LJSpeech_LJ017-0261": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: Seven were found guilty of murder on the high seas and one, Carlos, a criminal."
    },
    "LJSpeech_LJ007-0137": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: More attention to ventilation, which was altogether neglected and inadequate, would"
    },
    "LJSpeech_LJ041-0017": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: Wobel also recalled that Oswald once outlined a plan to cut the glass in the"
    },
    "LJSpeech_LJ043-0061": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: At the time of his defection, when he evidenced no interest in his father and"
    },
    "LJSpeech_LJ033-0097": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: Frazier parked the car in the company parking lot about two blocks north of the depository"
    },
    "LJSpeech_LJ019-0234": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:slow\temotion:sad\ttranscription: The old buildings were entirely disused, and the whole of the inmates of Newgate were"
    },
    "LJSpeech_LJ016-0296": {
        "labels": "age:Middle-aged\tgender:male\tpitch:high\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: The actual execution made some impression."
    },
    "LJSpeech_LJ023-0072": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: Congress passed a statute which, in 1803, the courts"
    },
    "LJSpeech_LJ050-0100": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: The Commission recognizes that no set of meaningful criteria will yield"
    },
    "LJSpeech_LJ028-0113": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: With mortar and bricks, he built two moat walls about the city."
    },
    "LJSpeech_LJ013-0121": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:normal\temotion:sad\ttranscription: Howe and his accomplice were arrested. The former was found guilty and sentenced"
    },
    "LJSpeech_LJ014-0273": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: The detection of these frauds came while he was still prominently before the world as"
    },
    "LJSpeech_LJ027-0136": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: Illustrations quoted from the works of Romains and Locante will make this principle"
    },
    "LJSpeech_LJ003-0067": {
        "labels": "age:Middle-aged\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: in the then existing state of the law,"
    },
    "LJSpeech_LJ039-0220": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: and that one would not have to be an expert marksman to have accomplished the assassination."
    },
    "LJSpeech_LJ018-0029": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: A photograph of Mueller shown the jeweler was identified as the likeness of the jewel."
    },
    "LJSpeech_LJ001-0080": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: He seems to have taken the letter of the Elseviers of the 17th century for his mom."
    },
    "LJSpeech_LJ028-0454": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: But both sections originally reached the river."
    },
    "LJSpeech_LJ037-0244": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: Westbrook identified Commission Exhibit 162 as the light-colored"
    },
    "LJSpeech_LJ008-0174": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:sad\ttranscription: One cartload of spectators having broken down, some of its occupants fell"
    },
    "LJSpeech_LJ009-0302": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:sad\ttranscription: Another man was hired, himself a convict, whose fees for self and wife were"
    },
    "LJSpeech_LJ002-0182": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: the fleet and the Marshall Sea prisons especially devoted to them."
    },
    "LJSpeech_LJ048-0023": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: and he had told us during one of the interviews that he would probably take his wife back to"
    },
    "LJSpeech_LJ019-0257": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:low\tspeed:slow\temotion:sad\ttranscription: Here, the tread wheel was in use. There, cellular cranks."
    },
    "LJSpeech_LJ040-0046": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: which one associate described as, quote, irrevocable, end quote."
    },
    "LJSpeech_LJ006-0295": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: The governor was also personally responsible for gross contravention of this rule of"
    },
    "LJSpeech_LJ028-0252": {
        "labels": "age:Middle-aged\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: Only the king's son, Belshazzar, was killed."
    },
    "LJSpeech_LJ035-0002": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: Chapter 4 The Assassin Part 4 Oswald's Actions"
    },
    "LJSpeech_LJ009-0189": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: Persons were still living in 1855 who had witnessed dissections at Hicks Hall."
    },
    "LJSpeech_LJ011-0032": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: declared that they had hitherto formed a high opinion of his honor, integrity, and"
    },
    "LJSpeech_LJ042-0155": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: It appears to be the work of a fairly well-organized person."
    },
    "LJSpeech_LJ004-0039": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:sad\ttranscription: They were hopeless of any general reform by the action of the executive alone."
    },
    "LJSpeech_LJ025-0157": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: under these circumstances, unnatural as they are, with proper management"
    },
    "LJSpeech_LJ030-0121": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:happy\ttranscription: Several times, Special Agent John D. Reddy came forward from the right front"
    },
    "LJSpeech_LJ010-0135": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:low\tspeed:fast\temotion:happy\ttranscription: yelled out three cheers to the populace whom he faced."
    },
    "LJSpeech_LJ050-0118": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: Since these agencies are already obliged constantly to evaluate the activities"
    },
    "LJSpeech_LJ040-0175": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:normal\temotion:neutral\ttranscription: through which they could not reach him, but that he preferred the veil to remain intact."
    },
    "LJSpeech_LJ013-0042": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: the foundations of which had been laid by buying old ships on purpose to cast the more"
    },
    "LJSpeech_LJ014-0076": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:happy\ttranscription: He was seen afterwards smoking and talking with his hosts in their back parlor."
    },
    "LJSpeech_LJ008-0097": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:normal\temotion:sad\ttranscription: No sooner was the job finished than half a dozen competitors appeared."
    },
    "LJSpeech_LJ043-0166": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: Possibly he might have wanted to be caught and wanted his involvement made clear"
    },
    "LJSpeech_LJ045-0127": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: He absolved the Soviet embassy in Mexico City of any blame for his difficulties."
    },
    "LJSpeech_LJ027-0117": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:happy\ttranscription: Now, here again the former theory appears to be triumphant over the latter."
    },
    "LJSpeech_LJ035-0076": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:happy\ttranscription: Special Agent John Howlett of the Secret Service carried a rifle from the south"
    },
    "LJSpeech_LJ005-0101": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:sad\ttranscription: Quince it deduced the practice and condition of every prison that replied."
    },
    "LJSpeech_LJ028-0302": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:happy\ttranscription: I will desert to the enemy as I am, and when I get into their city,"
    },
    "LJSpeech_LJ036-0196": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: Tippett patrolled District 78 in the Oak Cliff area of Dixie."
    },
    "LJSpeech_LJ028-0191": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:happy\ttranscription: The old enemies of Babylon rejoiced."
    },
    "LJSpeech_LJ038-0214": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: and the other paragraphs instructed her on the disposal of Oswald's personal effects"
    },
    "LJSpeech_LJ003-0304": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: with the penalty of forfeiting the day's allowance of food, an increase of which the committee"
    },
    "LJSpeech_LJ019-0009": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: or to insist upon the construction of prisons on the most approved plan."
    },
    "LJSpeech_LJ022-0192": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: They contemplate the enrichment of our national life."
    },
    "LJSpeech_LJ007-0026": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:sad\ttranscription: And with all this, the most dreadful oaths, the worst language, too bad to be repeated."
    },
    "LJSpeech_LJ035-0158": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: A blue jacket, later identified by Marina Oswald as her husband's, was"
    },
    "LJSpeech_LJ033-0014": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:sad\ttranscription: Lee Harvey Oswald lived in a rooming house in Dallas, while his wife and children lived"
    },
    "LJSpeech_LJ018-0369": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:normal\temotion:neutral\ttranscription: the mysterious Bravo case, that of Dr. Lamson, and that of"
    },
    "LJSpeech_LJ017-0028": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: that she had had a quarrel with her mistress, and that the latter, with all others,"
    },
    "LJSpeech_LJ004-0109": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: according to their categories or crimes."
    },
    "LJSpeech_LJ011-0239": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: Where robbery with violence was intended, the perpetrators had now to adopt various"
    },
    "LJSpeech_LJ049-0034": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:normal\tspeed:normal\temotion:surprised\ttranscription: could have reached the President in time to protect him from the second and fatal shot"
    },
    "LJSpeech_LJ016-0389": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: Giovanni Lanni, the Italian boy who murdered a French woman in the hay market."
    },
    "LJSpeech_LJ037-0018": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:sad\ttranscription: Skaggins saw him and heard him mutter either, Poor damn cop or..."
    },
    "LJSpeech_LJ041-0163": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: Out of a combination of Oswald's known Marxist sympathies and George Orwell's"
    },
    "LJSpeech_LJ017-0178": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:surprised\ttranscription: It appeared that several persons with whom she was intimate had succumbed suddenly."
    },
    "LJSpeech_LJ048-0006": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: or to the Vice President.\""
    },
    "LJSpeech_LJ002-0184": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: the latter two being also a prison for felons and vagrants arrested within certain"
    },
    "LJSpeech_LJ012-0143": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: Money Moses had received the stolen gold dust from Moss's father-in-law, Davis."
    },
    "LJSpeech_LJ023-0012": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: But when almost two years later it came before the Supreme"
    },
    "LJSpeech_LJ036-0068": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:sad\ttranscription: Both buses stopped within one block of the depository building."
    },
    "LJSpeech_LJ046-0158": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: At the time of the assassination, the active PRS general files contained"
    },
    "LJSpeech_LJ011-0287": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: but at length managed to wriggle out of the chain which confined his body and"
    },
    "LJSpeech_LJ016-0399": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: Wainwright was allowed a cigar the night before execution, which he smoked in the prison yard."
    },
    "LJSpeech_LJ006-0012": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: but he would not arm them with any authority lest their cooperation might be offensive."
    },
    "LJSpeech_LJ005-0102": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: Upon these and the private visitations made by various members, the Society"
    },
    "LJSpeech_LJ001-0169": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:fast\temotion:neutral\ttranscription: The paper used for printing the small, highly ornamented French service books."
    },
    "LJSpeech_LJ011-0271": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: Mr. G went in the coach sent for him and alighted at 27 York Street,"
    },
    "LJSpeech_LJ014-0057": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:fast\temotion:neutral\ttranscription: London did not escape the contagion and, prominent among the detestable crimes of"
    },
    "LJSpeech_LJ008-0191": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:slow\temotion:disgusted\ttranscription: At Courvoisier's execution in 1840, it was the same, or worse."
    },
    "LJSpeech_LJ007-0120": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:sad\ttranscription: some to the infirmary, many more to the governor's house."
    },
    "LJSpeech_LJ012-0268": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:low\tspeed:normal\temotion:neutral\ttranscription: Suspicion grew almost to certainty as the evidence was unfolded."
    },
    "LJSpeech_LJ015-0298": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:happy\ttranscription: But while Hardwick was in communication with Sawward, the bank was in communication with"
    },
    "LJSpeech_LJ030-0213": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:neutral\ttranscription: Hill heard a second shot, approximately five seconds after the first, which removed"
    },
    "LJSpeech_LJ009-0121": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:normal\temotion:happy\ttranscription: for all thy goodness and loving kindness to us and to all men."
    },
    "LJSpeech_LJ037-0084": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:neutral\ttranscription: Barbara Jeanette Davis testified that no one had shown her a picture of Oswald before."
    },
    "LJSpeech_LJ009-0219": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:happy\ttranscription: A clause was inserted to the effect that"
    },
    "LJSpeech_LJ040-0143": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: Contrary to reports that appeared after the assassination, the psychiatric examiner"
    },
    "LJSpeech_LJ006-0048": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: To these were still added an average of about 50, expecting the last penalty."
    },
    "LJSpeech_LJ033-0006": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: In this connection, the Commission considered one"
    },
    "LJSpeech_LJ044-0207": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:high\tspeed:normal\temotion:neutral\ttranscription: the economic embargo against that country, and the general policy of the United States."
    },
    "LJSpeech_LJ032-0260": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:normal\tspeed:normal\temotion:neutral\ttranscription: He thought it contained tent poles or possibly other camping equipment, such as"
    },
    "LJSpeech_LJ017-0030": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:high\tspeed:normal\temotion:happy\ttranscription: When the spread of scientific knowledge places nefarious means at the disposal of"
    },
    "LJSpeech_LJ005-0084": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:normal\temotion:neutral\ttranscription: so as to prevent them from seeing, conversing, or holding any interaction."
    },
    "LJSpeech_LJ008-0236": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: Coiled up on the floor of the scaffold like a serpent, the hangman's rope."
    },
    "LJSpeech_LJ027-0020": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:happy\ttranscription: The unity in life, then, is not less a fact than is life's great diversity."
    },
    "LJSpeech_LJ017-0238": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:low\tspeed:slow\temotion:fearful\ttranscription: Tefer the second mate agreed, but constantly went in fear of his life."
    },
    "LJSpeech_LJ045-0233": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:normal\tspeed:slow\temotion:angry\ttranscription: He consistently refused to admit involvement in the assassination"
    },
    "LJSpeech_LJ038-0099": {
        "labels": "age:Middle-aged\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: From the outset, Oswald denied owning a rifle."
    },
    "LJSpeech_LJ028-0231": {
        "labels": "age:Elderly\tgender:male\tpitch:low\tvolume:low\tspeed:fast\temotion:neutral\ttranscription: whereupon they withdrew within their defenses."
    },
    "LJSpeech_LJ044-0239": {
        "labels": "age:Elderly\tgender:male\tpitch:high\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: and raises serious questions as to whether or not he ever expected to"
    },
    "LJSpeech_LJ050-0086": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: Under these criteria, whether the case should be referred to the Secret Service depends"
    },
    "LJSpeech_LJ047-0021": {
        "labels": "age:Elderly\tgender:male\tpitch:normal\tvolume:high\tspeed:slow\temotion:neutral\ttranscription: information regarding his relations with the U.S. Embassy in Moscow and background"
    }
}

================================================
FILE: AutomaticPipeline/outputs/labels_LJspeech_0.scp
================================================
LJSpeech	LJ010-0227	68	male	968.5380249023438	0.8162265419960022	0.022741336633663366	sad	 He went from Newgate first to Bethlehem, from which he was removed to Broadmoor.
LJSpeech	LJ050-0106	59	male	906.479736328125	0.8606864213943481	0.022741336633663366	neutral	 to devise a practical system which has any reasonable possibility of revealing
LJSpeech	LJ043-0140	57	male	1299.147705078125	0.8262879848480225	0.022741336633663366	neutral	 He also studied Dallas bus schedules to prepare for his later use of
LJSpeech	LJ018-0032	66	male	1098.609619140625	0.763823390007019	0.022741336633663366	sad	 There was no mystery about his departure. He had gone to Canada by the Victoria
LJSpeech	LJ012-0117	62	male	988.9229736328125	0.8686652183532715	0.022741336633663366	neutral	 By and by the occupant of the room noticed something glittering in the center of the fire.
LJSpeech	LJ038-0057	65	male	1146.2008056640625	0.838049590587616	0.022741336633663366	neutral	 Deputy Sheriff Walther's brought a shotgun into the theater, but laid it on some
LJSpeech	LJ017-0158	62	male	951.9786376953125	0.8371432423591614	0.022741336633663366	happy	 She had a little fortune of her own, some 1,700 pounds
LJSpeech	LJ026-0094	53	male	1013.83544921875	0.8242807984352112	0.022741336633663366	neutral	 This however is probably not a source of vital energy, but only contributes to the
LJSpeech	LJ045-0152	65	male	928.56396484375	0.8731129169464111	0.024118640350877192	happy	 The events of that evening can best be appreciated through Marina Oswald's testimony.
LJSpeech	LJ019-0264	67	male	1011.5900268554688	0.874998152256012	0.024177631578947367	neutral	 and it was decidedly of opinion that in all short sentences the hard labor
LJSpeech	LJ040-0047	63	male	1101.93212890625	0.5375705361366272	0.016539692982456142	neutral	 That associate did not think that Oswald was a communist.
LJSpeech	LJ012-0015	63	male	1143.9190673828125	0.8230547904968262	0.024177631578947367	neutral	 Weedon and LaCassar to 12 and 6 months respectively in cold baths.
LJSpeech	LJ019-0097	63	male	976.0210571289062	0.868423581123352	0.024009146341463415	sad	 The want of uniformity in prison discipline became air long and acknowledged evil.
LJSpeech	LJ039-0093	65	male	1167.13720703125	0.7914684414863586	0.024009146341463415	neutral	 at a distance of 265.3 feet was, quote,
LJSpeech	LJ018-0028	63	female	1036.7177734375	0.8202313780784607	0.024009146341463415	sad	 who had been a lodger of his. Muehler had given the cabman's little daughter
LJSpeech	LJ028-0287	66	male	1013.0613403320312	0.7936339974403381	0.024009146341463415	angry	 but found none by which he could hope to prevail unless he maimed himself.
LJSpeech	LJ024-0117	65	female	746.5477294921875	0.8057168126106262	0.024093094405594404	neutral	 You will find that many of those who pretend to support you will sabotage your plans.
LJSpeech	LJ029-0172	64	male	1144.1251220703125	0.8575349450111389	0.024093094405594404	neutral	 The two Dallas newspapers provided their readers with a steady stream of information and
LJSpeech	LJ024-0044	59	male	1082.681884765625	0.8042803406715393	0.024093094405594404	angry	 that I will appoint justices who will act as justices and not as
LJSpeech	LJ045-0246	60	male	916.866943359375	0.77986079454422	0.024093094405594404	sad	 He sought for himself a place in history, a role as the great
LJSpeech	LJ042-0191	59	male	1080.6220703125	0.7938746213912964	0.026502403846153846	neutral	 While, quote, resourcefulness and patient working towards the aforesaid goal
LJSpeech	LJ050-0099	66	male	1119.674072265625	0.8032099604606628	0.026502403846153846	neutral	 It is apparent that a good deal of further consideration and experimentation
LJSpeech	LJ017-0101	63	male	889.54541015625	0.7770697474479675	0.026502403846153846	neutral	 That day, Palmer had bought more strychnia and had called in a fresh doctor.
LJSpeech	LJ016-0388	56	male	941.869384765625	0.5061151385307312	0.01628389423076923	happy	 A few go further and are almost gluttonous.
LJSpeech	LJ007-0129	66	male	657.7396850585938	0.7841930985450745	0.022816639072847682	angry	 the sane and the insane, the young and the old, the trivial offender and the
LJSpeech	LJ014-0195	62	male	899.5761108398438	0.8505150675773621	0.022816639072847682	neutral	 Two of them supported Cope, who was still alive, although insensible, and Marley
LJSpeech	LJ027-0056	60	male	1063.888671875	0.8467499017715454	0.02244308774834437	happy	 The relation of skeleton and muscle in arthropods is exactly the reverse.
LJSpeech	LJ030-0227	61	male	1070.8468017578125	0.8109928965568542	0.022816639072847682	neutral	 Special Agent George W. Hickey Jr. in the rear seat of the Presidential
LJSpeech	LJ047-0210	53	male	734.2883911132812	0.4516647458076477	0.013350046641791045	neutral	 I think all of those, if we had them all together...
LJSpeech	LJ048-0228	69	male	914.8754272460938	0.8221648335456848	0.025711287313432835	neutral	 and others who were present say that no agent was inebriated or acted improperly.
LJSpeech	LJ009-0104	63	male	910.867919921875	0.8145198225975037	0.025711287313432835	neutral	 The women set up a yell, which is mixed with a rustling noise, occasioned by the
LJSpeech	LJ017-0085	61	male	1220.1307373046875	0.800680935382843	0.025711287313432835	neutral	 Palmer's plan was to administer poison in quantities insufficient to
LJSpeech	LJ018-0275	66	male	1315.436279296875	0.8228096961975098	0.025520833333333333	neutral	 On Tarpey's defense, it was stated that the idea of the theft had been suggested
LJSpeech	LJ027-0064	54	male	993.9934692382812	0.5912888646125793	0.019532638888888888	neutral	 All vertebrates and none other have two cavities.
LJSpeech	LJ036-0084	59	male	1092.259521484375	0.8476306796073914	0.025520833333333333	neutral	 Craig also claimed that when Fritz pointed out to Oswald that Craig had identified
LJSpeech	LJ040-0211	64	male	897.1492919921875	0.7998844981193542	0.025520833333333333	neutral	 This would make him approximately ten, well almost eleven years old.
LJSpeech	LJ032-0241	59	male	979.5408935546875	0.8054627180099487	0.02630009541984733	happy	 Ten days prior to the Walker attempt, Oswald had undoubtedly received the rifle.
LJSpeech	LJ002-0158	61	male	1082.5684814453125	0.8261975646018982	0.02630009541984733	neutral	 Other cases are recorded elsewhere, as at the Gilsburg Street Comptor, where
LJSpeech	LJ050-0077	58	male	1372.689453125	0.3803936243057251	0.011823711832061068	neutral	 and in taking preventive steps.
LJSpeech	LJ032-0236	64	male	1258.6331787109375	0.8708815574645996	0.02630009541984733	neutral	 By checking the actual mailing dates of these issues and the time it usually takes
LJSpeech	LJ026-0102	58	male	1112.703857421875	0.7878386974334717	0.02505681818181818	happy	 but root pressure due to osmosis, capillary action and evaporation.
LJSpeech	LJ025-0057	63	male	1100.768798828125	0.8493995666503906	0.02505681818181818	neutral	 such as the charai, are in constant and regular motion, was made out
LJSpeech	LJ018-0270	61	male	1052.5274658203125	0.8861708641052246	0.02505681818181818	neutral	 The assistant called with the jewels on approbation at a house specially hired for
LJSpeech	LJ013-0021	65	male	896.8339233398438	0.7397869229316711	0.02505681818181818	sad	 The larboard pump was suffered to remain choked up and the longboat was
LJSpeech	LJ042-0050	62	male	895.3709716796875	0.8398865461349487	0.023122902684563757	sad	 I have been a pro-communist for years and yet I have never met a communist.
LJSpeech	LJ006-0075	65	male	1124.275634765625	0.8591945767402649	0.023122902684563757	happy	 He saw certain rooms fill up, and yet took no steps to open others that were locked
LJSpeech	LJ048-0187	62	male	1194.6612548828125	0.8206942677497864	0.023122902684563757	neutral	 In addition, Secret Service agents riding in the motorcade were trained to
LJSpeech	LJ022-0186	59	male	1200.8306884765625	0.8014236688613892	0.023122902684563757	happy	 Twenty years of experience with this system have justified the efforts made to
LJSpeech	LJ013-0078	67	male	1060.476318359375	0.8209429979324341	0.02173698738170347	neutral	 Barbara was subsequently pardoned, but was not replaced on the roles as an attorney.
LJSpeech	LJ006-0015	62	male	1111.1729736328125	0.8129136562347412	0.02173698738170347	neutral	 one which required discretion, judgment and knowledge of law, with sufficient
LJSpeech	LJ048-0061	64	male	1116.922119140625	0.8459006547927856	0.02173698738170347	neutral	 Under proper procedures, knowledge of the pending Presidential visit might have prompted
LJSpeech	LJ015-0190	63	male	1128.6842041015625	0.7676864862442017	0.02173698738170347	sad	 that of dishonest rogues who assume piety and philanthropy as a cloak for the world.
LJSpeech	LJ003-0115	62	male	1147.26904296875	0.832510232925415	0.023842993079584776	neutral	 The judge sat in proper form. He was punctiliously styled, my
LJSpeech	LJ045-0244	67	male	1213.853271484375	0.797639787197113	0.023842993079584776	angry	 Long before the assassination, he expressed his hatred for American society.
LJSpeech	LJ004-0051	66	male	850.4898681640625	0.8350055813789368	0.023842993079584776	happy	 When in Belgium he had examined with great satisfaction the admirable manner
LJSpeech	LJ011-0265	62	male	1222.8548583984375	0.8727220892906189	0.023842993079584776	sad	 This Mr. Canning had left his widow a life interest in two thousand pounds.
LJSpeech	LJ028-0087	53	male	983.9290161132812	0.5809707641601562	0.01661304151624549	neutral	 Such was the appearance of the builder of the walls of Babylon.
LJSpeech	LJ028-0178	65	male	945.8756713867188	0.8222951292991638	0.024875902527075812	happy	 The walls of Babylon were so long and wide and high that all who
LJSpeech	LJ011-0113	64	male	1133.138427734375	0.8332394957542419	0.024875902527075812	sad	 He met his death with unshaken firmness, only in treating that a certain blue
LJSpeech	LJ039-0162	61	male	1100.3876953125	0.8697624802589417	0.024875902527075812	sad	 In an effort to test the rifle under conditions which simulated those which prevailed during
LJSpeech	LJ045-0066	53	male	1083.7061767578125	0.3979305326938629	0.014269723360655738	sad	 Oswald struck his wife on occasion.
LJSpeech	LJ028-0277	50	male	1167.56591796875	0.5111944675445557	0.017220543032786886	surprised	 One of his Sumter mules gave birth to a foal.
LJSpeech	LJ048-0197	64	male	992.088134765625	0.8570470213890076	0.028240266393442622	neutral	 I then told the officers that their primary duty was traffic and crowd control and that
LJSpeech	LJ019-0170	66	male	1029.4417724609375	0.8437999486923218	0.028240266393442622	neutral	 More officers were appointed, as the time of so many of those already on the staff
LJSpeech	LJ041-0092	59	male	839.0108642578125	0.7990822792053223	0.030625	neutral	 and quote, individual that you would brainwash and quite easy, but I think
LJSpeech	LJ039-0027	52	male	838.1315307617188	0.2564748227596283	0.00900361111111111	happy	 and switch with Azure.
LJSpeech	LJ030-0077	64	male	1158.3037109375	0.809492826461792	0.030625	neutral	 Each agent carried a .38 caliber pistol and a shotgun and
LJSpeech	LJ038-0112	64	male	1158.54931640625	0.7811505198478699	0.030625	neutral	 As previously indicated, Marina Oswald testified that she took the
LJSpeech	LJ017-0119	62	male	1471.981689453125	0.7784394025802612	0.022372159090909092	happy	 All the circumstances were so suspicious that he could not escape the criminal charge.
LJSpeech	LJ007-0242	63	male	898.47412109375	0.860797107219696	0.022161728896103895	neutral	 No complete and permanent improvement was indeed possible while Newgate remained unchanged.
LJSpeech	LJ020-0046	66	male	936.8101806640625	0.7930992841720581	0.022372159090909092	happy	 Close the dough over it, dust your hands and kneading board with flour,
LJSpeech	LJ018-0001	68	male	1067.754638671875	0.8335044980049133	0.022372159090909092	neutral	 The Chronicles of Newgate, Volume 2, by Arthur Griffiths. Section 21.
LJSpeech	LJ026-0098	57	male	1459.13232421875	0.8028250336647034	0.02745268924302789	neutral	 The circulatory system distributes these foods in animals,
LJSpeech	LJ021-0152	61	male	1135.7069091796875	0.7745066285133362	0.02745268924302789	neutral	 and to experiment for a reasonable time with measures suitable to
LJSpeech	LJ047-0223	62	male	809.3446655273438	0.6974994540214539	0.02534586653386454	neutral	 I don't recall the exact date, it was about a week prior."
LJSpeech	LJ007-0040	61	male	817.2888793945312	0.6683530211448669	0.020692480079681275	happy	 In a second room were fourteen more who had every hope of a reprieve.
LJSpeech	LJ050-0103	63	male	908.8638305664062	0.795621395111084	0.02478642086330935	sad	 were all men who acted alone in their criminal acts against our leaders.
LJSpeech	LJ037-0016	62	male	1227.5238037109375	0.8029683232307434	0.02478642086330935	neutral	 Scoggins hurriedly left his seat and hid behind the cab, as the man came back to
LJSpeech	LJ028-0336	59	male	1117.00927734375	0.6433566808700562	0.018222347122302158	sad	 Darius now, still keeping to the plan agreed upon.
LJSpeech	LJ004-0233	63	male	873.744384765625	0.8720524311065674	0.02478642086330935	neutral	 Under the new rule, visitors were not allowed to pass into the interior of the prison,
LJSpeech	LJ024-0127	66	male	790.1196899414062	0.8227817416191101	0.025333180147058824	angry	 You who know me can have no fear that I would tolerate the destruction
LJSpeech	LJ004-0189	67	male	882.4647216796875	0.8654625415802002	0.025333180147058824	happy	 The great principles of classification, cleanliness and employment were closely observed.
LJSpeech	LJ005-0045	60	male	1204.24072265625	0.5985238552093506	0.017624310661764703	neutral	 Nor did it confine itself to mere verbal recommendations.
LJSpeech	LJ005-0158	63	male	1125.4774169921875	0.658680260181427	0.021271369485294116	happy	 Bedding and clothing was still denied, but only in a few jails.
LJSpeech	LJ011-0018	65	male	1102.3902587890625	0.8398444652557373	0.024434840425531915	neutral	 The crime, long carried on without detection, was first discovered in 1820.
LJSpeech	LJ017-0022	63	male	950.7742919921875	0.7860767841339111	0.024434840425531915	neutral	 was that of Eliza Fenning, who was convicted of an attempt to poison a whole family.
LJSpeech	LJ030-0136	58	male	961.9500122070312	0.7000767588615417	0.024434840425531915	sad	 The President replied, that is very obvious.
LJSpeech	LJ039-0078	59	male	994.359375	0.811722457408905	0.024434840425531915	neutral	 The effect of a four-power telescopic sight on the difficulty of these shots
LJSpeech	LJ048-0254	63	male	1418.6072998046875	0.8071127533912659	0.023045568561872908	neutral	 advised in the course of the Secret Service investigation of these events that each agent
LJSpeech	LJ049-0083	63	male	1159.44287109375	0.8841239213943481	0.023045568561872908	angry	 It has long been a federal crime to conspire to injure any federal officer on account of
LJSpeech	LJ006-0043	61	male	963.5223999023438	0.8555874228477478	0.023045568561872908	neutral	 The disgraceful overcrowding had been partially ended, but the same evils of
LJSpeech	LJ025-0109	62	male	1129.241943359375	0.8006194829940796	0.023045568561872908	neutral	 has been as completely invalidated as the third and second.
LJSpeech	LJ044-0163	62	male	957.9028930664062	0.8367945551872253	0.02478642086330935	neutral	 Marina Oswald testified that her husband engaged in fair play for Cuba.
LJSpeech	LJ003-0037	65	male	1216.96533203125	0.8291783332824707	0.02478642086330935	neutral	 A site was purchased between Red Lion and White Cross streets and a new
LJSpeech	LJ013-0117	61	male	1048.94189453125	0.8147414922714233	0.02478642086330935	happy	 and with another carry off the plate-chest in broad daylight and as a matter of business.
LJSpeech	LJ027-0096	52	male	987.2114868164062	0.5842734575271606	0.01701371402877698	neutral	 as indeed she must be according to the derivation theory.
LJSpeech	LJ021-0035	61	male	1195.374755859375	0.8064736127853394	0.024875902527075812	happy	 saved debtors and creditors alike in many other fields of enterprise.
LJSpeech	LJ049-0159	67	male	1158.23046875	0.8147270679473877	0.024875902527075812	neutral	 the Secret Service, then the only federal investigative agency, assumed
LJSpeech	LJ039-0100	60	male	1073.9599609375	0.7953296303749084	0.024875902527075812	happy	 During the first week of an intensive eight-week training period, he received and
LJSpeech	LJ019-0302	66	male	927.6077270507812	0.7891265153884888	0.024875902527075812	neutral	 In 1862, there were in all 193 jails.
LJSpeech	LJ030-0229	59	male	1128.42724609375	0.8108355402946472	0.0216686320754717	neutral	 At this point, the cars were speeding through the underpass and had left the scene of the
LJSpeech	LJ005-0151	55	male	1023.0142211914062	0.8571876287460327	0.0216686320754717	neutral	 In others, women were very properly exempted from it, and also from all severe
LJSpeech	LJ015-0115	69	male	1087.5670166015625	0.8020297884941101	0.0216686320754717	sad	 Little more remains to be said about Robson. He appears to have accepted his position.
LJSpeech	LJ010-0279	66	male	988.0106201171875	0.8697272539138794	0.0216686320754717	neutral	 I shall mention briefly one more case, in which, however, there was no murderous
LJSpeech	LJ044-0119	62	male	947.2424926757812	0.8160279393196106	0.022372159090909092	neutral	 into which Oswald, in his own words, had quote, thrown himself. He sought it
LJSpeech	LJ049-0115	65	male	1047.099853515625	0.8133270740509033	0.022372159090909092	neutral	 of the person who is actually in the exercise of the executive power or
LJSpeech	LJ004-0139	64	male	965.0425415039062	0.8386383652687073	0.022372159090909092	happy	 In the morning, the stench and heat were so oppressive that he and everyone else
LJSpeech	LJ007-0223	61	male	1323.4381103515625	0.8278164267539978	0.022372159090909092	neutral	 The prison officials appear to be on the side of the inspectors, to the great dissatisfaction
LJSpeech	LJ032-0070	67	male	1133.4202880859375	0.764536440372467	0.023122902684563757	neutral	 Ordinarily, Inspector Holmes testified, identification is not requested because
LJSpeech	LJ015-0294	64	male	1171.9451904296875	0.816940426826477	0.023122902684563757	neutral	 He forgot to add that it was to be placed to Ralph's credit, and when he called
LJSpeech	LJ014-0083	60	male	1259.9434814453125	0.8244383335113525	0.023122902684563757	neutral	 which, having possessed herself of the murdered man's keys, she rifled from
LJSpeech	LJ040-0195	67	male	1063.7657470703125	0.8319800496101379	0.023122902684563757	neutral	 It appears that he did not want to do any of the things which the authorities suggest
LJSpeech	LJ030-0028	57	male	1052.3692626953125	0.8686795830726624	0.024093094405594404	neutral	 and the Vice President and Mrs. Johnson were in the receiving line to greet President
LJSpeech	LJ009-0145	63	male	1096.4228515625	0.8129713535308838	0.024093094405594404	neutral	 and he seemed to suffer great inward agitation when the ordinary, particularly
LJSpeech	LJ031-0117	50	male	923.3846435546875	0.5800350308418274	0.01670563811188811	neutral	 assisted by doctors William Osborne and John Parker.
LJSpeech	LJ004-0115	64	male	937.3268432617188	0.8438901305198669	0.024093094405594404	happy	 Infirmaries separating the sexes were also to be provided. A chapel too.
LJSpeech	LJ039-0200	50	male	1002.3143310546875	0.47214874625205994	0.014967765748031497	neutral	 And one of these agents, Robert A. Frazier,
LJSpeech	LJ028-0481	59	male	761.4593505859375	0.7362769842147827	0.027128444881889764	happy	 Nabo-Polisar, the father, my begetter, built Imgur-Bel.
LJSpeech	LJ003-0245	68	male	855.0972290039062	0.8284653425216675	0.027128444881889764	disgusted	 preparatory to their appearance in the Old Bailey. Irons were seldom removed.
LJSpeech	LJ017-0261	55	male	1154.7520751953125	0.7629727721214294	0.027128444881889764	neutral	 Seven were found guilty of murder on the high seas and one, Carlos, a criminal.
LJSpeech	LJ007-0137	62	male	1012.0001220703125	0.8231872320175171	0.021805775316455698	neutral	 More attention to ventilation, which was altogether neglected and inadequate, would
LJSpeech	LJ041-0017	57	male	845.4912719726562	0.8081146478652954	0.021805775316455698	neutral	 Wobel also recalled that Oswald once outlined a plan to cut the glass in the
LJSpeech	LJ043-0061	65	male	1211.890625	0.7867423295974731	0.021805775316455698	neutral	 At the time of his defection, when he evidenced no interest in his father and
LJSpeech	LJ033-0097	60	male	961.7119750976562	0.8147653937339783	0.021805775316455698	neutral	 Frazier parked the car in the company parking lot about two blocks north of the depository
LJSpeech	LJ019-0234	60	male	982.4412841796875	0.8467894792556763	0.0275625	sad	 The old buildings were entirely disused, and the whole of the inmates of Newgate were
LJSpeech	LJ016-0296	54	male	1260.2855224609375	0.4604700207710266	0.01482325	neutral	 The actual execution made some impression.
LJSpeech	LJ023-0072	69	male	1206.0687255859375	0.7865973711013794	0.0275625	neutral	 Congress passed a statute which, in 1803, the courts
LJSpeech	LJ050-0100	67	male	934.5872192382812	0.8466907143592834	0.0275625	neutral	 The Commission recognizes that no set of meaningful criteria will yield
LJSpeech	LJ028-0113	63	male	1025.2725830078125	0.6758575439453125	0.018996001683501684	neutral	 With mortar and bricks, he built two moat walls about the city.
LJSpeech	LJ013-0121	65	male	1238.6756591796875	0.7939990162849426	0.023200757575757576	sad	 Howe and his accomplice were arrested. The former was found guilty and sentenced
LJSpeech	LJ014-0273	63	male	1042.2625732421875	0.8731350302696228	0.023200757575757576	neutral	 The detection of these frauds came while he was still prominently before the world as
LJSpeech	LJ027-0136	63	male	1052.698974609375	0.8104369044303894	0.023200757575757576	neutral	 Illustrations quoted from the works of Romains and Locante will make this principle
LJSpeech	LJ003-0067	52	male	1101.8662109375	0.4360215961933136	0.011485659246575342	neutral	 in the then existing state of the law,
LJSpeech	LJ039-0220	65	male	1079.593994140625	0.8265351057052612	0.023598030821917807	neutral	 and that one would not have to be an expert marksman to have accomplished the assassination.
LJSpeech	LJ018-0029	63	male	892.7648315429688	0.8415856957435608	0.023598030821917807	neutral	 A photograph of Mueller shown the jeweler was identified as the likeness of the jewel.
LJSpeech	LJ001-0080	70	male	945.280517578125	0.8912354707717896	0.023598030821917807	neutral	 He seems to have taken the letter of the Elseviers of the 17th century for his mom.
LJSpeech	LJ028-0454	59	male	1161.063720703125	0.4748651683330536	0.01538449074074074	neutral	 But both sections originally reached the river.
LJSpeech	LJ037-0244	61	male	1057.122802734375	0.8424121737480164	0.025520833333333333	neutral	 Westbrook identified Commission Exhibit 162 as the light-colored
LJSpeech	LJ008-0174	64	male	970.2979125976562	0.8039829730987549	0.025520833333333333	sad	 One cartload of spectators having broken down, some of its occupants fell
LJSpeech	LJ009-0302	62	male	1106.8897705078125	0.7679134011268616	0.025520833333333333	sad	 Another man was hired, himself a convict, whose fees for self and wife were
LJSpeech	LJ002-0182	60	male	1128.78076171875	0.7411836981773376	0.021869359205776172	neutral	 the fleet and the Marshall Sea prisons especially devoted to them.
LJSpeech	LJ048-0023	61	male	983.9628295898438	0.8298522233963013	0.024875902527075812	neutral	 and he had told us during one of the interviews that he would probably take his wife back to
LJSpeech	LJ019-0257	65	male	1225.7010498046875	0.7593720555305481	0.024875902527075812	sad	 Here, the tread wheel was in use. There, cellular cranks.
LJSpeech	LJ040-0046	67	male	991.2825927734375	0.7420750856399536	0.024875902527075812	neutral	 which one associate described as, quote, irrevocable, end quote.
LJSpeech	LJ006-0295	60	male	1199.44775390625	0.8367014527320862	0.02745268924302789	neutral	 The governor was also personally responsible for gross contravention of this rule of
LJSpeech	LJ028-0252	51	male	1160.4306640625	0.45107951760292053	0.014317978087649402	neutral	 Only the king's son, Belshazzar, was killed.
LJSpeech	LJ035-0002	64	male	921.927734375	0.7000311613082886	0.02745268924302789	neutral	 Chapter 4 The Assassin Part 4 Oswald's Actions
LJSpeech	LJ009-0189	65	male	1282.5374755859375	0.8760916590690613	0.02745268924302789	neutral	 Persons were still living in 1855 who had witnessed dissections at Hicks Hall.
LJSpeech	LJ011-0032	61	male	919.7144165039062	0.8366391062736511	0.02452179715302491	neutral	 declared that they had hitherto formed a high opinion of his honor, integrity, and
LJSpeech	LJ042-0155	61	male	1077.17431640625	0.6541056036949158	0.018654137010676156	neutral	 It appears to be the work of a fairly well-organized person.
LJSpeech	LJ004-0039	55	male	796.300537109375	0.7321549654006958	0.02127335409252669	sad	 They were hopeless of any general reform by the action of the executive alone.
LJSpeech	LJ025-0157	61	male	1216.162353515625	0.8438274264335632	0.02452179715302491	neutral	 under these circumstances, unnatural as they are, with proper management
LJSpeech	LJ030-0121	56	male	970.6068725585938	0.7965176701545715	0.02469758064516129	happy	 Several times, Special Agent John D. Reddy came forward from the right front
LJSpeech	LJ010-0135	60	male	1289.2467041015625	0.5666248798370361	0.019533378136200718	happy	 yelled out three cheers to the populace whom he faced.
LJSpeech	LJ050-0118	67	male	922.9232788085938	0.8592643141746521	0.02469758064516129	neutral	 Since these agencies are already obliged constantly to evaluate the activities
LJSpeech	LJ040-0175	60	male	1017.7406005859375	0.7815579175949097	0.02331832437275986	neutral	 through which they could not reach him, but that he preferred the veil to remain intact.
LJSpeech	LJ013-0042	62	male	1047.09814453125	0.8341838717460632	0.023279138513513514	neutral	 the foundations of which had been laid by buying old ships on purpose to cast the more
LJSpeech	LJ014-0076	61	male	1151.91357421875	0.835938036441803	0.023279138513513514	happy	 He was seen afterwards smoking and talking with his hosts in their back parlor.
LJSpeech	LJ008-0097	64	male	1172.0582275390625	0.7829859852790833	0.023279138513513514	sad	 No sooner was the job finished than half a dozen competitors appeared.
LJSpeech	LJ043-0166	64	male	824.7908325195312	0.7988844513893127	0.023279138513513514	neutral	 Possibly he might have wanted to be caught and wanted his involvement made clear
LJSpeech	LJ045-0127	64	male	1247.29638671875	0.8624638915061951	0.023122902684563757	neutral	 He absolved the Soviet embassy in Mexico City of any blame for his difficulties.
LJSpeech	LJ027-0117	65	male	964.4253540039062	0.79942786693573	0.023122902684563757	happy	 Now, here again the former theory appears to be triumphant over the latter.
LJSpeech	LJ035-0076	63	male	1200.859130859375	0.8349494338035583	0.023122902684563757	happy	 Special Agent John Howlett of the Secret Service carried a rifle from the south
LJSpeech	LJ005-0101	58	male	1129.3095703125	0.7242733836174011	0.0196302432885906	sad	 Quince it deduced the practice and condition of every prison that replied.
LJSpeech	LJ028-0302	64	male	1078.13818359375	0.7867964506149292	0.027235671936758892	happy	 I will desert to the enemy as I am, and when I get into their city,
LJSpeech	LJ036-0196	65	male	1095.0950927734375	0.803264319896698	0.027235671936758892	neutral	 Tippett patrolled District 78 in the Oak Cliff area of Dixie.
LJSpeech	LJ028-0191	60	male	1061.2279052734375	0.42850354313850403	0.013888586956521738	happy	 The old enemies of Babylon rejoiced.
LJSpeech	LJ038-0214	57	male	1284.11328125	0.8420044779777527	0.027235671936758892	neutral	 and the other paragraphs instructed her on the disposal of Oswald's personal effects
LJSpeech	LJ003-0304	65	male	832.8008422851562	0.8691624999046326	0.023760775862068966	neutral	 with the penalty of forfeiting the day's allowance of food, an increase of which the committee
LJSpeech	LJ019-0009	56	male	1075.1240234375	0.7964742183685303	0.02298556034482759	neutral	 or to insist upon the construction of prisons on the most approved plan.
LJSpeech	LJ022-0192	57	male	1077.5343017578125	0.5187281370162964	0.014764870689655173	neutral	 They contemplate the enrichment of our national life.
LJSpeech	LJ007-0026	58	male	1158.8209228515625	0.793979287147522	0.023760775862068966	sad	 And with all this, the most dreadful oaths, the worst language, too bad to be repeated.
LJSpeech	LJ035-0158	66	male	997.3316040039062	0.8228869438171387	0.023679123711340205	neutral	 A blue jacket, later identified by Marina Oswald as her husband's, was
LJSpeech	LJ033-0014	54	male	959.2794799804688	0.8163527250289917	0.023679123711340205	sad	 Lee Harvey Oswald lived in a rooming house in Dallas, while his wife and children lived
LJSpeech	LJ018-0369	66	male	1060.0634765625	0.7843106389045715	0.023679123711340205	neutral	 the mysterious Bravo case, that of Dr. Lamson, and that of
LJSpeech	LJ017-0028	61	male	1008.4209594726562	0.8143091797828674	0.023679123711340205	neutral	 that she had had a quarrel with her mistress, and that the latter, with all others,
LJSpeech	LJ004-0109	57	male	1024.0537109375	0.4420858919620514	0.013042491007194245	neutral	 according to their categories or crimes.
LJSpeech	LJ011-0239	63	male	950.5305786132812	0.8259799480438232	0.02478642086330935	neutral	 Where robbery with violence was intended, the perpetrators had now to adopt various
LJSpeech	LJ049-0034	62	male	1265.04345703125	0.8034409284591675	0.02478642086330935	surprised	 could have reached the President in time to protect him from the second and fatal shot
LJSpeech	LJ016-0389	65	male	846.389892578125	0.8337498307228088	0.024438174460431655	neutral	 Giovanni Lanni, the Italian boy who murdered a French woman in the hay market.
LJSpeech	LJ037-0018	58	male	978.4235229492188	0.7971556186676025	0.029197563559322032	sad	 Skaggins saw him and heard him mutter either, Poor damn cop or...
LJSpeech	LJ041-0163	65	male	961.3748779296875	0.867605984210968	0.029197563559322032	neutral	 Out of a combination of Oswald's known Marxist sympathies and George Orwell's
LJSpeech	LJ017-0178	61	male	1041.3836669921875	0.7767281532287598	0.029197563559322032	surprised	 It appeared that several persons with whom she was intimate had succumbed suddenly.
LJSpeech	LJ048-0006	54	male	986.5155639648438	0.377620667219162	0.014482256355932204	neutral	 or to the Vice President."
LJSpeech	LJ002-0184	64	male	914.2891845703125	0.8724705576896667	0.025333180147058824	neutral	 the latter two being also a prison for felons and vagrants arrested within certain
LJSpeech	LJ012-0143	65	male	1187.48974609375	0.8944374322891235	0.025333180147058824	neutral	 Money Moses had received the stolen gold dust from Moss's father-in-law, Davis.
LJSpeech	LJ023-0012	59	male	1082.41943359375	0.7502818703651428	0.025333180147058824	neutral	 But when almost two years later it came before the Supreme
LJSpeech	LJ036-0068	63	male	865.4218139648438	0.6849870085716248	0.02191842830882353	sad	 Both buses stopped within one block of the depository building.
LJSpeech	LJ046-0158	66	male	1267.1650390625	0.8534243106842041	0.02222782258064516	neutral	 At the time of the assassination, the active PRS general files contained
LJSpeech	LJ011-0287	60	male	1136.369384765625	0.8295741677284241	0.02222782258064516	neutral	 but at length managed to wriggle out of the chain which confined his body and
LJSpeech	LJ016-0399	65	male	1150.46533203125	0.8492998480796814	0.02222782258064516	neutral	 Wainwright was allowed a cigar the night before execution, which he smoked in the prison yard.
LJSpeech	LJ006-0012	61	male	944.3914794921875	0.8449038863182068	0.02222782258064516	neutral	 but he would not arm them with any authority lest their cooperation might be offensive.
LJSpeech	LJ005-0102	62	male	1208.780029296875	0.8509426712989807	0.02194466560509554	neutral	 Upon these and the private visitations made by various members, the Society
LJSpeech	LJ001-0169	66	male	1138.5301513671875	0.825796365737915	0.02194466560509554	neutral	 The paper used for printing the small, highly ornamented French service books.
LJSpeech	LJ011-0271	66	male	1177.6123046875	0.8682052493095398	0.02194466560509554	neutral	 Mr. G went in the coach sent for him and alighted at 27 York Street,
LJSpeech	LJ014-0057	65	male	1001.4357299804688	0.8337165117263794	0.02194466560509554	neutral	 London did not escape the contagion and, prominent among the detestable crimes of
LJSpeech	LJ008-0191	65	male	960.4435424804688	0.7966058254241943	0.025904605263157895	disgusted	 At Courvoisier's execution in 1840, it was the same, or worse.
LJSpeech	LJ007-0120	60	male	1031.863525390625	0.5902575850486755	0.018803806390977444	sad	 some to the infirmary, many more to the governor's house.
LJSpeech	LJ012-0268	56	male	1237.5946044921875	0.7429749965667725	0.023254934210526317	neutral	 Suspicion grew almost to certainty as the evidence was unfolded.
LJSpeech	LJ015-0298	70	male	1055.9366455078125	0.7635851502418518	0.025904605263157895	happy	 But while Hardwick was in communication with Sawward, the bank was in communication with
LJSpeech	LJ030-0213	62	male	1213.4908447265625	0.7537742257118225	0.02620009505703422	neutral	 Hill heard a second shot, approximately five seconds after the first, which removed
LJSpeech	LJ009-0121	61	male	1158.9774169921875	0.7147454023361206	0.02333769011406844	happy	 for all thy goodness and loving kindness to us and to all men.
LJSpeech	LJ037-0084	63	male	1030.7886962890625	0.796912431716919	0.02620009505703422	neutral	 Barbara Jeanette Davis testified that no one had shown her a picture of Oswald before.
LJSpeech	LJ009-0219	56	male	1173.5	0.4054138660430908	0.01342134030418251	happy	 A clause was inserted to the effect that
LJSpeech	LJ040-0143	64	male	1050.6236572265625	0.8078874945640564	0.023598030821917807	neutral	 Contrary to reports that appeared after the assassination, the psychiatric examiner
LJSpeech	LJ006-0048	62	male	1121.3856201171875	0.8324679732322693	0.023598030821917807	neutral	 To these were still added an average of about 50, expecting the last penalty.
LJSpeech	LJ033-0006	58	male	1046.85986328125	0.5671167969703674	0.01674593321917808	neutral	 In this connection, the Commission considered one
LJSpeech	LJ044-0207	64	male	804.4202270507812	0.8754435181617737	0.023598030821917807	neutral	 the economic embargo against that country, and the general policy of the United States.
LJSpeech	LJ032-0260	62	male	1013.1453247070312	0.8036773204803467	0.023598030821917807	neutral	 He thought it contained tent poles or possibly other camping equipment, such as
LJSpeech	LJ017-0030	63	male	1237.06982421875	0.842721164226532	0.023598030821917807	happy	 When the spread of scientific knowledge places nefarious means at the disposal of
LJSpeech	LJ005-0084	62	male	1217.72900390625	0.7495614290237427	0.023598030821917807	neutral	 so as to prevent them from seeing, conversing, or holding any interaction.
LJSpeech	LJ008-0236	52	male	1014.2316284179688	0.7807179093360901	0.02271853595890411	neutral	 Coiled up on the floor of the scaffold like a serpent, the hangman's rope.
LJSpeech	LJ027-0020	60	male	1027.0858154296875	0.7961769104003906	0.026502403846153846	happy	 The unity in life, then, is not less a fact than is life's great diversity.
LJSpeech	LJ017-0238	66	male	1097.978515625	0.7882980704307556	0.026502403846153846	fearful	 Tefer the second mate agreed, but constantly went in fear of his life.
LJSpeech	LJ045-0233	66	male	1085.60400390625	0.800809919834137	0.026502403846153846	angry	 He consistently refused to admit involvement in the assassination
LJSpeech	LJ038-0099	48	male	939.3925170898438	0.5590772032737732	0.019237740384615384	neutral	 From the outset, Oswald denied owning a rifle.
LJSpeech	LJ028-0231	59	male	916.1470947265625	0.5505949854850769	0.01667814781021898	neutral	 whereupon they withdrew within their defenses.
LJSpeech	LJ044-0239	66	male	1262.919677734375	0.8477627635002136	0.025148266423357664	neutral	 and raises serious questions as to whether or not he ever expected to
LJSpeech	LJ050-0086	65	male	1170.1837158203125	0.8474505543708801	0.025148266423357664	neutral	 Under these criteria, whether the case should be referred to the Secret Service depends
LJSpeech	LJ047-0021	66	male	1138.4693603515625	0.8763906955718994	0.025148266423357664	neutral	 information regarding his relations with the U.S. Embassy in Moscow and background

================================================
FILE: README.md
================================================
# SpeechCraft

This is the official repository of the ACM Multimedia 2024 paper *"SpeechCraft: A Fine-Grained Expressive Speech Dataset with Natural Language Description"*.

For details of the pipeline and dataset, please refer to our [Paper](http://arxiv.org/abs/2408.13608) and [Demo Page](https://speechcraft2024.github.io/speechcraft2024/)

<!-- Dataset and pipeline are coming soon. -->


## News
[2024-09-26]: **Structured metadata** (pitch, energy, speed, age, gender, emotion tone, emphasis, topic/category, and transcript) has been made available to facilitate further enhancements and augmentations of the dataset.

[2024-12-20]: Code and checkpoint of the **Annotation pipeline** are released.

## SpeechCraft Dataset
### 1. Download Speech Corpus

|Language|Speech Corpus|#Duration|#Clips|
|:--------:|:--------:|--------:|--------:|
|ZH|[Zhvoice](https://github.com/fighting41love/zhvoice)|799.68h|1,020,427|
|ZH|[AISHELL-3](https://www.openslr.org/93/)|63.70h|63,011|
|EN|[GigaSpeech-M](https://huggingface.co/datasets/speechcolab/gigaspeech/tree/main/data/audio/m_files_additional)|739.91h|670,070|
|EN|[LibriTTS-R](https://www.openslr.org/141/)|548.88h|352,265|

<!-- ## Metadata Walkthrough -->
### 2. Download Speech Annotation
||Description|Instruction|Labels|
|:--------:|:--------:|:--------:|:--------:|
|ZH|[download](https://cloud.tsinghua.edu.cn/f/e66664542f534f399802/?dl=1)|[download](https://cloud.tsinghua.edu.cn/f/d6f00e027f504751b4c0/?dl=1)|[download](https://cloud.tsinghua.edu.cn/f/02a69d7c862e4422850e/?dl=1)|
|EN|[download](https://cloud.tsinghua.edu.cn/f/517428835bd5486e87e8/?dl=1)|[download](https://cloud.tsinghua.edu.cn/f/cce83dd884ed4104b1a1/?dl=1)|[download](https://cloud.tsinghua.edu.cn/f/6f05dcbcfb384ea1870b/?dl=1)|

### 3. Labels and Prompts
####  EN Version
- `--gender`: Male, Female 
- `--age`: Child, Teenager, Youth adult, Middle-aged, Elderly
- `--pitch`: low, normal, high
- `--speed`: slow, normal, fast
- `--volume`: low, normal, high
- `--emotion (English)`: Fearful, Happy, Disgusted, Sad, Surprised, Angry, Neutral
- `--emphasis`: Non-label words
- `--transcript`: Non-label sentence
- `--LLM Prompt`: 
```Given the pitch, volume, age, gender, tone, and transcript, use sentiment analysis techniques to describe in natural language what age, what gender of a person, with what kind of emotion and tone, using what kind of pitch and volume, spoke the words in the transcript.
Note: You must vividly describe the sentence’s intonation, pitch, tone, and emotion. All outputs must strictly avoid identical wording and sentence structure. There is no need to describe body language or psychological state and do not repeat the input content.
Refer to the format of the following four cases:

*Example Input - Example Output*

Now try to process the following sentences, directly output the converted sentences according to the examples without missing any labels.
```

#### ZH Version
- `--年龄`:儿童,少年,青年,中年,老年
- `--性别`:男,女
- `--语速`:快,中,慢
- `--音高`:高,中,低
- `--音量`:高,中,低
- `--重读`:无标签,字词
- `--语气`:无标签,自然语句
- `--文本`:无标签,自然语句
- `--LLM Prompt`: 
```
请参照以下转换案例,使用中文自然语言描述一个人按照给定风格属性,如音高、音量、年龄、性别、语调,来说文本中的话。注意,仅描述说话风格,不需要描述肢体动作或心理状态,不要重复输入的内容。

*示例输入-示例输出*

现在尝试处理以下句子,根据示例直接输出转换后的句子,不要遗漏任何标签。
```

### 4. Request Access to Emphasis Speech Dataset

Since we do not own the copyright of the original audio files, for researchers and educators who wish to use the audio files for non-commercial research and/or educational purposes, we can provide access to our regenerated version under certain conditions and terms. To apply for the AISHELL-3 and LibriTTS-R with fine-grained keyword emphasis, please fill out the EULA form at `Emphasis-SpeechCraft-EULA.pdf` and send the scanned form to jinzeyu23@mails.tsinghua.edu.cn. Once approved, you will be supplied with a download link. **([2024-09-26]: With metadata updated!)**

Please first refer to some emphasis examples provided [here](https://speechcraft2024.github.io/speechcraft2024/#13-examples-of-the-regenerated-emphasis-data-from-aishell-3-and-libritts-r). We are actively working on improving methods for large-scale fine-grained data construction that align with human perception.

|Language|Speech Corpus|#Duration|#Clips|
|:--------:|:--------:|--------:|--------:|
|ZH|AISHELL-3-stress|50.59h|63,258|
|EN|LibriTTS-R-stress|148.78h|75,654|


## Annotation Pipeline

### Step 0 : Installation

1. Download models for speech style recognition.

    Models from 🤗:

    ```
    llama_base_model = "baichuan-inc/Baichuan2-13B-Base"
    gender_model_path = "alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
    age_model_path = "audeering/wav2vec2-large-robust-24-ft-age-gender"
    asr_path = "openai/whisper-medium" / "openai/whisper-large-v3"
    ```

    Model from funasr (for English emotion classification):

    ```
    emotion_model = "iic/emotion2vec_base_finetuned"
    ```

    Prepare SECap from [here](https://github.com/thuhcsi/SECap) (for Chinese emotion captioning).

2. Create conda environment
    ```
    conda env create -f ./requirements.yaml
    mv ./AutomaticPipeline/models/SECap/model2.py $your_SECap_dir
    ```

3. Download the lora ckpt from [here](https://cloud.tsinghua.edu.cn/d/548948399d7c4816b677/) as `./llama-ft/finetuned-llama/` for description rewriting.

    Remember to change the path of LLM ckpt at "`base_model_name_or_path`" in `./llama-ft/finetuned-llama/adapter_config.json`.


### Step 1 : Labeling with the Automatic Annotation Pipeline

1. Get the scp file with raw scores for the audio corpus.

    ```
    cd ./AutomaticPipeline
    python AutoPipeline.py
    ```

2. Get the json file with classified result prepared for the description rewriting.
    ```
    python Clustering.py
    ```

### Step 2 : Rewriting with the Finetuned Llama
```
cd ../llama-ft
python llama_infer.py
```


## Citation
Please cite our paper if you find this work useful:
```
@inproceedings{jin2024speechcraft,
  title={Speechcraft: A fine-grained expressive speech dataset with natural language description},
  author={Jin, Zeyu and Jia, Jia and Wang, Qixin and Li, Kehan and Zhou, Shuoyi and Zhou, Songtao and Qin, Xiaoyu and Wu, Zhiyong},
  booktitle={Proceedings of the 32nd ACM International Conference on Multimedia},
  pages={1255--1264},
  year={2024}
}
```


================================================
FILE: llama-ft/llama_infer.py
================================================
import os
import torch
import argparse
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import AutoPeftModelForCausalLM
import json
from tqdm import tqdm
import random
import torch.nn as nn
import torch.distributed as dist
from torch.utils.data import DataLoader, Subset
torch.multiprocessing.set_start_method('spawn', force=True)

PROMPT_DICT = {
    "prompt_input": (
        "Below is an instruction that describes a task, paired with an input that provides further context. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"
    ),
    "prompt_no_input": (
        "Below is an instruction that describes a task. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Response:"
    ),
    "chinese": (
        "Below is an instruction that describes a task. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n给定音高、音量、年龄、性别、语气等信息以及文本,运用情感分析的技巧,用中文自然语言描述。"
        "注意必须生动且多样化地描述,不需要描述肢体动作或心理状态,不要重复input内容。\n\n"
        "###Input:\n{labels}\n\n### Response:"
    ),
    "english": (
        "Below is an instruction that describes a task. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\nGiven information such as pitch, volume, age, gender, tone, category and text, describe using English natural language with the techniques of emotional analysis."
        "Make sure the description is vivid and diverse, without the need to describe physical actions or psychological states, and avoid repeating the input content.\n\n"
        "###Input:\n{labels}\n\n### Response:"

    )
}

class Dataset(torch.utils.data.Dataset):
    def __init__(self, args):
        self.language = args.language
        self.ckpt_path = args.ckpt_path
        self.testdata = json.load(open(args.json_path))
        self.keyindex = list(self.testdata.keys())
        self.tokenizer = AutoTokenizer.from_pretrained(args.ckpt_path, use_fast=False, trust_remote_code=True)

    def __len__(self):
        return len(self.testdata)

    def __getitem__(self, index):
        user_tokens=[195]
        assistant_tokens=[196]
        key = self.keyindex[index]
        value = self.testdata[key]
        tags = value['labels'].strip().split('\t')

        # shuffle tags
        random.shuffle(tags)
        value['labels'] = '\t'.join(tags)

        prompt = '<reserved_106>' + PROMPT_DICT[self.language].format_map(value) + '<reserved_107>'
        inputs = self.tokenizer(prompt, return_tensors='pt')

        return inputs, key, prompt, value['labels']


def extract(input_text, language):
    if language=='chinese':
        if "“" not in input_text:
            return None
        else:
            start_index = input_text.find("“")
            end_index = input_text.find("”", start_index+1)
            return input_text[start_index:end_index]
    else:
        if input_text.count("\"") < 2:
            return None
        else:
            start_index = input_text.find("\"")
            end_index = input_text.rfind("\"")
            return input_text[start_index+1:end_index]

def inference_on_device(args, tokenizer, device, dataloader):

    language = args.language
    output_path = args.output_path
    error_path = args.error_path
    ckpt_path = args.ckpt_path

    model = AutoPeftModelForCausalLM.from_pretrained(
        ckpt_path, 
        revision="v2.0",
        trust_remote_code=True, 
        torch_dtype=torch.bfloat16, 
        attn_implementation="flash_attention_2", 
        )

    # model = model.quantize(4)
    model.to(device)
    top_p = 0.3
    temperature = 0.7
    count = 0
    test_result  = {}
    error = {}
    output_path = output_path[:-4]+str(device)+'.json'
    with torch.no_grad():  
        for inputs, keys, prompts, labels in tqdm(dataloader):
            # unwrap the input
            inputs = {key: value[0].to(device, dtype=torch.long) for key, value in inputs.items()}
            # inputs = {key: value[0].to(device, dtype=torch.bfloat16) for key, value in inputs.items()}
            key = keys[0]
            prompt = prompts[0]
            info = {}
            labels = labels[0]

            try:
                pred = model.sample(**inputs, max_new_tokens=128, repetition_penalty=1.1, do_sample=True, use_cache=True, temperature=temperature, top_p=top_p)
                generation = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)[len(prompt):]
                ct = 0

                while (extract(generation, language) is None or (len(generation)-len(extract(generation, language))<3)) and ct<5:
                    pred = model.sample(**inputs, max_new_tokens=128, repetition_penalty=1.1, do_sample=True, use_cache=True, temperature=temperature, top_p=top_p)
                    generation = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)[len(prompt):]
                    ct+=1

                if ct==5:
                    with open(error_path, 'a') as file:
                        file.write(key+'\n')
                    continue

                info['labels'] = labels
                info['llama-instruction'] = generation

                test_result[key] = info
                count+=1

            except Exception as e:
                with open(error_path, 'a') as file:
                    file.write(key+'\n')
                # raise e
            
            if count % 1000 ==0 or count==50:
                json.dump(test_result, open(output_path, 'w'), indent=4, ensure_ascii=False)
        json.dump(test_result, open(output_path, 'w'), indent=4, ensure_ascii=False)
        

def main(args):
    devices = list(map(int, args.devices.split(',')))

    tokenizer = AutoTokenizer.from_pretrained(args.ckpt_path, revision="v2.0", use_fast=False, trust_remote_code=True)
    inferset = Dataset(args)
    num_devices = len(devices)
    subset_size = len(inferset) // num_devices
    subsets = [ Subset(inferset, range(i * subset_size, (i + 1) * subset_size)) for i in range(num_devices) ]
    data_loaders = [ DataLoader(subset, batch_size=1) for subset in subsets ]
    
    processes = []
    for i in range(num_devices):
        device_num = devices[i]
        device = torch.device(f'cuda:{device_num}')
        # device = torch.device('cpu')
        p = torch.multiprocessing.Process(target=inference_on_device, args=(args, tokenizer, device, data_loaders[i]))
        p.start()
        processes.append(p)
    
    for p in processes:
        p.join()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--language', type=str, default = 'english')
    parser.add_argument('--devices', type=str, default = '0')
    parser.add_argument('--json_path', type=str, default = './example_libritts.json')
    parser.add_argument('--output_path', type=str, default = './inference_libritts.json')
    parser.add_argument('--error_path', type=str, default = './error.txt')
    parser.add_argument('--ckpt_path', type=str, default = './finetuned-llama')
    
    args = parser.parse_args()
    
    main(args)


================================================
FILE: requirements.yaml
================================================
name: speechcraft
channels:
  - http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
  - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
  - http://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
  - simpleitk
  - pytorch
  - menpo
  - msys2
  - nvidia
  - defaults
  - bioconda
  - conda-forge
dependencies:
  - _libgcc_mutex=0.1=conda_forge
  - _openmp_mutex=4.5=2_gnu
  - bzip2=1.0.8=h5eee18b_6
  - ca-certificates=2024.11.26=h06a4308_0
  - ld_impl_linux-64=2.40=h12ee557_0
  - libffi=3.4.4=h6a678d5_1
  - libgcc=14.2.0=h77fa898_1
  - libgcc-ng=14.2.0=h69a702a_1
  - libgomp=14.2.0=h77fa898_1
  - libnsl=2.0.1=hd590300_0
  - libsqlite=3.47.0=hadc24fc_1
  - libstdcxx-ng=11.2.0=h1234567_1
  - libuuid=2.38.1=h0b41bf4_0
  - libxcrypt=4.4.36=hd590300_1
  - libzlib=1.3.1=hb9d3cd8_2
  - ncurses=6.4=h6a678d5_0
  - openssl=3.4.0=hb9d3cd8_0
  - python=3.9.18=h0755675_1_cpython
  - readline=8.2=h5eee18b_0
  - setuptools=75.1.0=py39h06a4308_0
  - tk=8.6.13=noxft_h4845f30_101
  - wheel=0.44.0=py39h06a4308_0
  - xz=5.4.6=h5eee18b_1
  - pip:
    - absl-py==2.1.0
    - accelerate==0.28.0
    - aiohappyeyeballs==2.4.4
    - aiohttp==3.11.10
    - aiosignal==1.3.1
    - aliyun-python-sdk-core==2.16.0
    - aliyun-python-sdk-kms==2.16.5
    - annotated-types==0.7.0
    - antlr4-python3-runtime==4.9.3
    - anyio==4.7.0
    - argparse==1.4.0
    - asgiref==3.8.1
    - async-timeout==4.0.3
    - asyncio==3.4.3
    - attrs==24.2.0
    - audioread==3.0.1
    - backoff==2.2.1
    - bcrypt==4.2.1
    - beartype==0.17.2
    - beautifulsoup4==4.12.3
    - bitarray==3.0.0
    - bitsandbytes==0.43.0
    - black==24.10.0
    - build==1.2.2.post1
    - cachetools==5.5.0
    - certifi==2024.8.30
    - cffi==1.17.1
    - charset-normalizer==3.4.0
    - chroma-hnswlib==0.7.6
    - chromadb==0.5.23
    - click==8.1.7
    - cohere==5.1.8
    - colorama==0.4.6
    - coloredlogs==15.0.1
    - colt5-attention==0.10.20
    - contourpy==1.3.0
    - crcmod==1.7
    - cryptography==44.0.0
    - cycler==0.12.1
    - cython==3.0.11
    - dalle3==0.1.0
    - dataclasses-json==0.6.7
    - datasets==2.19.2
    - decorator==5.1.1
    - deprecated==1.2.15
    - diffusers==0.31.0
    - dill==0.3.8
    - distance==0.1.3
    - docstring-parser==0.16
    - duckduckgo-search==6.4.1
    - durationpy==0.9
    - editdistance==0.8.1
    - einops==0.7.0
    - einops-exts==0.0.4
    - einx==0.3.0
    - encodec==0.1.1
    - exceptiongroup==1.2.2
    - fairscale==0.4.13
    - faiss-cpu==1.9.0.post1
    - fastapi==0.115.6
    - fastavro==1.9.7
    - filelock==3.13.1
    - flake8==7.1.1
    - flash-attn==2.3.6
    - flatbuffers==24.3.25
    - fonttools==4.55.2
    - frozendict==2.4.6
    - frozenlist==1.5.0
    - fsspec==2024.2.0
    - ftfy==6.3.1
    - funasr==1.1.16
    - g2p-en==2.1.0
    - ggl==1.1.0
    - google-ai-generativelanguage==0.6.10
    - google-api-core==2.24.0
    - google-api-python-client==2.155.0
    - google-auth==2.37.0
    - google-auth-httplib2==0.2.0
    - google-generativeai==0.8.3
    - googleapis-common-protos==1.66.0
    - greenlet==3.1.1
    - grpcio==1.68.1
    - grpcio-status==1.68.1
    - h11==0.14.0
    - httpcore==1.0.7
    - httplib2==0.22.0
    - httptools==0.6.4
    - httpx==0.28.1
    - huggingface-hub==0.26.3
    - humanfriendly==10.0
    - hydra-core==1.3.2
    - idna==3.10
    - importlib-metadata==8.5.0
    - importlib-resources==6.4.5
    - inflect==7.4.0
    - iniconfig==2.0.0
    - jaconv==0.4.0
    - jamo==0.4.1
    - jieba==0.42.1
    - jinja2==3.1.3
    - jmespath==0.10.0
    - joblib==1.4.2
    - jsonpatch==1.33
    - jsonpointer==3.0.0
    - kaldiio==2.18.0
    - kiwisolver==1.4.7
    - kubernetes==31.0.0
    - langchain==0.1.13
    - langchain-community==0.0.29
    - langchain-core==0.1.53
    - langchain-experimental==0.0.55
    - langchain-text-splitters==0.0.2
    - langsmith==0.1.147
    - lazy-loader==0.4
    - libcst==1.5.1
    - librosa==0.10.2.post1
    - lightning==2.4.0
    - lightning-utilities==0.11.9
    - lion-pytorch==0.2.3
    - llvmlite==0.43.0
    - local-attention==1.9.3
    - loguru==0.7.2
    - lxml==5.3.0
    - markdown==3.7
    - markdown-it-py==3.0.0
    - markupsafe==2.1.5
    - marshmallow==3.23.1
    - matplotlib==3.9.3
    - mccabe==0.7.0
    - mdurl==0.1.2
    - mmh3==5.0.1
    - modelscope==1.21.0
    - monotonic==1.6
    - more-itertools==10.5.0
    - mpmath==1.3.0
    - msgpack==1.1.0
    - multidict==6.1.0
    - multiprocess==0.70.16
    - mypy-extensions==1.0.0
    - nest-asyncio==1.6.0
    - networkx==3.2.1
    - ninja==1.11.1.2
    - nltk==3.9.1
    - numba==0.60.0
    - numpy==1.25.2
    - nvidia-cublas-cu12==12.1.3.1
    - nvidia-cuda-cupti-cu12==12.1.105
    - nvidia-cuda-nvrtc-cu12==12.1.105
    - nvidia-cuda-runtime-cu12==12.1.105
    - nvidia-cudnn-cu12==8.9.2.26
    - nvidia-cufft-cu12==11.0.2.54
    - nvidia-curand-cu12==10.3.2.106
    - nvidia-cusolver-cu12==11.4.5.107
    - nvidia-cusparse-cu12==12.1.0.106
    - nvidia-nccl-cu12==2.19.3
    - nvidia-nvjitlink-cu12==12.4.127
    - nvidia-nvtx-cu12==12.1.105
    - oauthlib==3.2.2
    - omegaconf==2.3.0
    - onnxruntime==1.19.2
    - open-clip-torch==2.29.0
    - openai==0.28.0
    - opencv-python==4.10.0.84
    - opencv-python-headless==4.10.0.84
    - opentelemetry-api==1.29.0
    - opentelemetry-exporter-otlp-proto-common==1.29.0
    - opentelemetry-exporter-otlp-proto-grpc==1.29.0
    - opentelemetry-instrumentation==0.50b0
    - opentelemetry-instrumentation-asgi==0.50b0
    - opentelemetry-instrumentation-fastapi==0.50b0
    - opentelemetry-proto==1.29.0
    - opentelemetry-sdk==1.29.0
    - opentelemetry-semantic-conventions==0.50b0
    - opentelemetry-util-http==0.50b0
    - orjson==3.10.12
    - oss2==2.19.1
    - outcome==1.3.0.post0
    - overrides==7.7.0
    - packaging==23.2
    - pandas==2.2.3
    - pathspec==0.12.1
    - peft==0.13.2
    - pillow==10.3.0
    - pip==24.3.1
    - platformdirs==4.3.6
    - playwright==1.49.1
    - pluggy==1.5.0
    - pooch==1.8.2
    - portalocker==3.0.0
    - posthog==3.7.4
    - primp==0.8.3
    - propcache==0.2.1
    - proto-plus==1.25.0
    - protobuf==5.29.1
    - psutil==6.1.0
    - pyarrow==18.1.0
    - pyarrow-hotfix==0.6
    - pyasn1==0.6.1
    - pyasn1-modules==0.4.1
    - pycodestyle==2.12.1
    - pycparser==2.22
    - pycryptodome==3.21.0
    - pydantic==2.7.1
    - pydantic-core==2.18.2
    - pydub==0.25.1
    - pyee==12.0.0
    - pyflakes==3.2.0
    - pygments==2.18.0
    - pynndescent==0.5.13
    - pyparsing==3.2.0
    - pypdf==4.1.0
    - pypdf2==3.0.1
    - pypika==0.48.9
    - pyproject-hooks==1.2.0
    - pysocks==1.7.1
    - pytest==8.1.1
    - python-dateutil==2.9.0.post0
    - python-dotenv==1.0.1
    - pytorch-lightning==2.4.0
    - pytorch-wpe==0.0.1
    - pytz==2024.2
    - pyyaml==6.0.2
    - qformer==0.0.5
    - ratelimit==2.2.1
    - regex==2024.11.6
    - requests==2.32.3
    - requests-oauthlib==2.0.0
    - requests-toolbelt==1.0.0
    - rich==13.7.1
    - rsa==4.9
    - sacrebleu==2.4.3
    - safetensors==0.4.5
    - scikit-learn==1.5.2
    - scipy==1.9.3
    - selenium==4.27.1
    - sentencepiece==0.2.0
    - sentry-sdk==2.19.2
    - shellingham==1.5.4
    - six==1.17.0
    - sniffio==1.3.1
    - sortedcontainers==2.4.0
    - soundfile==0.12.1
    - soupsieve==2.6
    - soxr==0.5.0.post1
    - sqlalchemy==2.0.36
    - starlette==0.41.3
    - swarms==2.4.0
    - sympy==1.13.1
    - tabulate==0.9.0
    - tenacity==8.2.3
    - tensorboard==2.18.0
    - tensorboard-data-server==0.7.2
    - tensorboardx==2.6.2.2
    - termcolor==2.5.0
    - threadpoolctl==3.5.0
    - tiktoken==0.8.0
    - timm==0.4.12
    - tokenizers==0.13.3
    - toml==0.10.2
    - tomli==2.2.1
    - torch==2.2.0
    - torch-complex==0.4.4
    - torchaudio==2.2.0
    - torchfix==0.7.0
    - torchmetrics==1.6.0
    - torchvision==0.17.0
    - tqdm==4.66.2
    - transformers==4.33.3
    - trio==0.27.0
    - trio-websocket==0.11.1
    - triton==2.2.0
    - typeguard==4.4.1
    - typer==0.15.1
    - types-requests==2.32.0.20241016
    - typing==3.7.4.3
    - typing-extensions==4.12.2
    - typing-inspect==0.9.0
    - tzdata==2024.2
    - umap-learn==0.5.7
    - undetected-chromedriver==3.5.5
    - uritemplate==4.1.1
    - urllib3==2.2.3
    - uvicorn==0.32.1
    - uvloop==0.21.0
    - vector-quantize-pytorch==1.14.5
    - vocos==0.1.0
    - watchfiles==1.0.3
    - wcwidth==0.2.13
    - websocket-client==1.8.0
    - websockets==14.1
    - werkzeug==3.1.3
    - wget==3.2
    - wrapt==1.17.0
    - wsproto==1.2.0
    - xxhash==3.5.0
    - yarl==1.18.3
    - zetascale==0.4.9
    - zipp==3.21.0
Download .txt
gitextract_tgmekqoo/

├── AutomaticPipeline/
│   ├── AgePreTrainModel.py
│   ├── AutoPipeline.py
│   ├── Clustering.py
│   ├── PitchEnergy.py
│   ├── models/
│   │   └── SECap/
│   │       └── model2.py
│   └── outputs/
│       ├── labels_LJspeech_0.json
│       └── labels_LJspeech_0.scp
├── README.md
├── llama-ft/
│   └── llama_infer.py
└── requirements.yaml
Download .txt
SYMBOL INDEX (50 symbols across 6 files)

FILE: AutomaticPipeline/AgePreTrainModel.py
  class ModelHead (line 10) | class ModelHead(nn.Module):
    method __init__ (line 13) | def __init__(self, config, num_labels):
    method forward (line 21) | def forward(self, features, **kwargs):
  class AgeGenderModel (line 32) | class AgeGenderModel(Wav2Vec2PreTrainedModel):
    method __init__ (line 35) | def __init__(self, config):
    method forward (line 44) | def forward(

FILE: AutomaticPipeline/AutoPipeline.py
  function to_device (line 30) | def to_device(tensors, device):
  class CustomDataset (line 39) | class CustomDataset(torch.utils.data.Dataset):
    method __init__ (line 40) | def __init__(
    method __preprocess__ (line 58) | def __preprocess__(self):
    method __len__ (line 70) | def __len__(self):
    method _cutorpad (line 76) | def _cutorpad(self, audio: np.ndarray) -> np.ndarray:
    method __getitem__ (line 94) | def __getitem__(self, index) -> torch.Tensor:
  class CollateFunc (line 117) | class CollateFunc:
    method __init__ (line 118) | def __init__(
    method __call__ (line 132) | def __call__(self, batch: List):
  function age_predict (line 157) | def age_predict(batch, model, device):
  function gender_predict (line 165) | def gender_predict(batch, model, device):
  function emotion_predict (line 176) | def emotion_predict(audiopaths, model):
  function pitch_energy_calculate (line 184) | def pitch_energy_calculate(input_values):
  function inference_on_device (line 195) | def inference_on_device(device, i, num_devices, language, basedir, scp_p...
  function main (line 289) | def main(args):

FILE: AutomaticPipeline/Clustering.py
  function assign_pitch_group (line 6) | def assign_pitch_group(row, language, male_percentiles, female_percentil...
  function replace_age_with_text (line 22) | def replace_age_with_text(row, language):
  function main (line 36) | def main(args):

FILE: AutomaticPipeline/PitchEnergy.py
  function extract_pitch (line 4) | def extract_pitch(wav, sr):
  function calculate_mean_pitch (line 8) | def calculate_mean_pitch(pitches):
  function process_audio (line 11) | def process_audio(audio, sr):

FILE: AutomaticPipeline/models/SECap/model2.py
  class KeywordsStoppingCriteria (line 20) | class KeywordsStoppingCriteria(StoppingCriteria):
    method __init__ (line 21) | def __init__(self, keywords_ids:list):
    method __call__ (line 24) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  class MotionAudio (line 29) | class MotionAudio(pl.LightningModule):
    method __init__ (line 30) | def __init__(
    method init_Qformer (line 76) | def init_Qformer(self,num_query_token, vision_width, cross_attention_f...
    method mean_pooling (line 94) | def mean_pooling(self,model_output, attention_mask):
    method forward (line 102) | def forward(self, audio, describtion):
    method training_step (line 170) | def training_step(self, batch, batch_idx):
    method validation_step (line 175) | def validation_step(self, batch, batch_idx):
    method configure_optimizers (line 180) | def configure_optimizers(self):
    method inference (line 183) | def inference(self, audio_feature):
    method post_processing (line 242) | def post_processing(self, sentences,device):
    method test_step (line 254) | def test_step(self, batch, batch_idx):
  function count_parameters (line 272) | def count_parameters(model):

FILE: llama-ft/llama_infer.py
  class Dataset (line 43) | class Dataset(torch.utils.data.Dataset):
    method __init__ (line 44) | def __init__(self, args):
    method __len__ (line 51) | def __len__(self):
    method __getitem__ (line 54) | def __getitem__(self, index):
  function extract (line 71) | def extract(input_text, language):
  function inference_on_device (line 87) | def inference_on_device(args, tokenizer, device, dataloader):
  function main (line 151) | def main(args):
Condensed preview — 10 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (151K chars).
[
  {
    "path": "AutomaticPipeline/AgePreTrainModel.py",
    "chars": 1327,
    "preview": "import numpy as np\nimport torch\nimport torch.nn as nn\nfrom transformers.models.wav2vec2.modeling_wav2vec2 import (\n    W"
  },
  {
    "path": "AutomaticPipeline/AutoPipeline.py",
    "chars": 11761,
    "preview": "# coding=utf-8\nimport os\nimport argparse\nimport numpy as np\nimport torch\nimport librosa\nfrom typing import List, Optiona"
  },
  {
    "path": "AutomaticPipeline/Clustering.py",
    "chars": 3846,
    "preview": "import pandas as pd\nimport csv\nimport numpy as np\nimport argparse\nimport json\ndef assign_pitch_group(row, language, male"
  },
  {
    "path": "AutomaticPipeline/PitchEnergy.py",
    "chars": 512,
    "preview": "import librosa\nimport numpy as np\n\ndef extract_pitch(wav, sr):\n    pitches, magnitudes = librosa.core.piptrack(y=wav, sr"
  },
  {
    "path": "AutomaticPipeline/models/SECap/model2.py",
    "chars": 13992,
    "preview": "import torch\nimport torch.nn as nn\nimport lightning.pytorch as pl\nfrom module.Qformer import BertConfig, BertLMHeadModel"
  },
  {
    "path": "AutomaticPipeline/outputs/labels_LJspeech_0.json",
    "chars": 49493,
    "preview": "{\n    \"LJSpeech_LJ010-0227\": {\n        \"labels\": \"age:Elderly\\tgender:male\\tpitch:low\\tvolume:normal\\tspeed:fast\\temotio"
  },
  {
    "path": "AutomaticPipeline/outputs/labels_LJspeech_0.scp",
    "chars": 36520,
    "preview": "LJSpeech\tLJ010-0227\t68\tmale\t968.5380249023438\t0.8162265419960022\t0.022741336633663366\tsad\t He went from Newgate first to"
  },
  {
    "path": "README.md",
    "chars": 6309,
    "preview": "# SpeechCraft\n\nThis is the official repository of the ACM Multimedia 2024 paper *\"SpeechCraft: A Fine-Grained Expressive"
  },
  {
    "path": "llama-ft/llama_infer.py",
    "chars": 7269,
    "preview": "import os\nimport torch\nimport argparse\nfrom accelerate import Accelerator\nfrom transformers import AutoModelForCausalLM,"
  },
  {
    "path": "requirements.yaml",
    "chars": 8578,
    "preview": "name: speechcraft\nchannels:\n  - http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/\n  - http://mirrors.tuna.tsin"
  }
]

About this extraction

This page contains the full source code of the thuhcsi/SpeechCraft GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 10 files (136.3 KB), approximately 43.4k tokens, and a symbol index with 50 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!