Showing preview only (3,324K chars total). Download the full file or copy to clipboard to get everything.
Repository: saheedniyi02/yarngpt
Branch: main
Commit: 8bb0eb27d307
Files: 57
Total size: 3.2 MB
Directory structure:
gitextract_5ttmyi3a/
├── README.md
├── __init__.py
├── audiotokenizer.py
├── default_speakers/
│ ├── azeez.json
│ ├── chinenye.json
│ ├── emma.json
│ ├── idera.json
│ ├── joke.json
│ ├── jude.json
│ ├── onye.json
│ ├── osagie.json
│ ├── regina.json
│ ├── remi.json
│ ├── saheed.json
│ ├── tayo.json
│ ├── umar.json
│ └── zainab.json
├── default_speakers_local/
│ ├── hausa_female1.json
│ ├── hausa_female2.json
│ ├── hausa_male1.json
│ ├── hausa_male2.json
│ ├── igbo_female1.json
│ ├── igbo_female2.json
│ ├── igbo_male2.json
│ ├── yoruba_female1.json
│ ├── yoruba_female2.json
│ ├── yoruba_male1.json
│ ├── yoruba_male2.json
│ └── yoruba_male3.json
├── notebooks/
│ ├── Merge_datasets.ipynb
│ ├── Merge_datasets_local (1).ipynb
│ ├── Yoruba_prepare_data_naij (2).ipynb
│ ├── train_YarnGPT.ipynb
│ └── train_YarnGPT_local.ipynb
├── python-wrapper/
│ ├── README.md
│ ├── audiotokenizer.py
│ ├── default_speakers/
│ │ ├── .ipynb_checkpoints/
│ │ │ ├── Yoruba_prepare_data_naij (2)-checkpoint.ipynb
│ │ │ ├── emma-checkpoint.json
│ │ │ ├── idera-checkpoint.json
│ │ │ └── onye-checkpoint.json
│ │ ├── Yoruba_prepare_data_naij (2).ipynb
│ │ ├── chinenye.json
│ │ ├── emma.json
│ │ ├── idera.json
│ │ ├── joke.json
│ │ ├── jude.json
│ │ ├── onye.json
│ │ ├── osagie.json
│ │ ├── regina.json
│ │ ├── remi.json
│ │ ├── tayo.json
│ │ └── umar.json
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── yarngpt/
│ ├── __init__.py
│ └── core.py
└── requirements.txt
================================================
FILE CONTENTS
================================================
================================================
FILE: README.md
================================================
# YarnGPT 🎙️

A text-to-speech model generating natural Nigerian-accented English speech. Built on pure language modeling without external adapters.
Web Url: https://yarngpt.co/
## Quick Start
```python
!git clone https://github.com/saheedniyi02/yarngpt.git
pip install outetts uroman
import os
import re
import json
import torch
import inflect
import random
import uroman as ur
import numpy as np
import torchaudio
import IPython
from transformers import AutoModelForCausalLM, AutoTokenizer
from outetts.wav_tokenizer.decoder import WavTokenizer
!wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
!gdown 1-ASeEkrn4HY49yZWHTASgfGFNXdVnLTt
from yarngpt.audiotokenizer import AudioTokenizerV2
tokenizer_path="saheedniyi/YarnGPT2"
wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
audio_tokenizer=AudioTokenizerV2(
tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
)
model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
#change the text
text="The election was won by businessman and politician, Moshood Abiola, but Babangida annulled the results, citing concerns over national security."
# change the language and voice
prompt=audio_tokenizer.create_prompt(text,lang="english",speaker_name="idera")
input_ids=audio_tokenizer.tokenize_prompt(prompt)
output = model.generate(
input_ids=input_ids,
temperature=0.1,
repetition_penalty=1.1,
max_length=4000,
#num_beams=5,# using a beam size helps for the local languages but not english
)
codes=audio_tokenizer.get_codes(output)
audio=audio_tokenizer.get_audio(codes)
IPython.display.Audio(audio,rate=24000)
torchaudio.save(f"Sample.wav", audio, sample_rate=24000)
```
## Features
- 🗣️ 12 preset voices (6 male, 6 female)
- 🎯 Trained on 2000+ hours of Nigerian audio
- 🔊 24kHz high-quality audio output
- 🚀 Simple API for quick integration
- 📝 Support for long-form text
## Available Voices
- Female: zainab, idera, regina, chinenye, joke, remi
- Male: jude, tayo, umar, osagie, onye, emma
## Examples
Check out our [demo notebook](link-to-notebook) or listen to [sample outputs](https://huggingface.co/saheedniyi/YarnGPT/tree/main/audio).
## Model Details
- Base: [HuggingFaceTB/SmolLM2-360M](https://huggingface.co/HuggingFaceTB/SmolLM2-360M)
- Training: 5 epochs on A100 GPU
- Data: Nigerian movies, podcasts, and open-source audio
- Architecture: Pure language modeling approach
## Limitations
- English to Nigerian-accented English only
- May not capture all Nigerian accent variations
- Training data includes auto-generated content
## Citation
```bibtex
@misc{yarngpt2025,
author = {Saheed Azeez},
title = {YarnGPT: Nigerian-Accented English Text-to-Speech Model},
year = {2025},
publisher = {Hugging Face}
}
```
## License
MIT
## Acknowledgments
Built with [WavTokenizer](https://github.com/jishengpeng/WavTokenizer) and inspired by [OuteTTS](https://huggingface.co/OuteAI/OuteTTS-0.2-500M/).
================================================
FILE: __init__.py
================================================
================================================
FILE: audiotokenizer.py
================================================
import os
import re
import json
import torch
import inflect
import random
import uroman as ur
import numpy as np
import torchaudio
from transformers import AutoTokenizer
from outetts.wav_tokenizer.decoder import WavTokenizer
from outetts.wav_tokenizer.encoder.utils import convert_audio
class AudioTokenizer:
def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
self.bos = "<|im_start|>"
self.eos = "<|im_end|>"
self.input_length=0
self.special_tokens = {
"audio_code": "<|{}|>",
"text_start": "<|text_start|>",
"text_end": "<|text_end|>",
"audio_start": "<|audio_start|>",
"audio_end": "<|audio_end|>",
"time": "<|t_{:.2f}|>",
"code_start": "<|code_start|>",
"code_end": "<|code_end|>",
"text_sep": "<|text_sep|>"
}
self.lec = inflect.engine()
#self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{audio_start}\n"
#self.config_path = "/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
#self.model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
self.wavtokenizer = WavTokenizer.from_pretrained0802(wav_tokenizer_config_path, wav_tokenizer_model_path)
self.wavtokenizer = self.wavtokenizer.to(self.device)
self.BASE_DIR = os.path.dirname(__file__)
self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers")
self.speakers=["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye"]
def get_speaker_path(self,speaker_name):
return os.path.join(self.DEFAULT_SPEAKERS_DIR, f"{speaker_name}.json")
def load_speaker(self, path: str):
with open(path, "r") as f:
return json.load(f)
def load_default_speaker(self, name: str):
name = name.lower().strip()
speaker_path=self.get_speaker_path(name)
return self.load_speaker(speaker_path)
def process_text(self, text: str):
text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
text = re.sub(r'[-_/,\.\\]', ' ', text)
text = re.sub(r'[^a-z\s]', '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text.split()
def create_audio_prompt(self,words: list) -> str:
prompt = []
for i in words:
word = i["word"]
duration = self.special_tokens["time"].format(float(i["duration"]))
tokens = "".join([self.special_tokens["audio_code"].format(c) for c in i["codes"]])
prompt.append(f'{word}{duration}{self.special_tokens["code_start"]}{tokens}{self.special_tokens["code_end"]}')
return "\n".join(prompt)
def create_prompt(self,text,speaker_name="idera"):
speaker=self.load_default_speaker(speaker_name)
input_words = self.process_text(speaker["text"]) + self.process_text(text)
#input_words = process_text(speaker["text"]) + input_words
inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
prompt = self.text_prompt.format(
bos=self.bos,
text_start=self.special_tokens['text_start'],
words=inputs_words_strings,
text_end=self.special_tokens['text_end'],
audio_start=self.special_tokens['audio_start']
)
prompt += self.create_audio_prompt(speaker["words"])
return prompt
def tokenize_prompt(self, prompt):
input_ids = self.tokenizer.encode(
prompt,
add_special_tokens=False,
return_tensors="pt"
).to(self.device)
self.input_length=input_ids.shape[1]
return input_ids.to(self.device)
def get_audio(self,discrete_code):
discrete_code=torch.tensor([[discrete_code]]).to(self.device)
features = self.wavtokenizer.codes_to_features(discrete_code).to(self.device)
bandwidth_id = torch.tensor([0]).to(self.device)
audio_out = self.wavtokenizer.decode(features, bandwidth_id=bandwidth_id)
return audio_out.to("cpu")
def extract_integers(self,s):
# Match integers enclosed in vertical bars |integer|
matches = re.findall(r'\|(-?\d+)\|', s)
# Convert matches to integers
return [int(match) for match in matches]
def get_codes(self, output):
new_output=self.tokenizer.decode(output[0][self.input_length:])
codes=self.extract_integers(new_output)
return codes
class AudioTokenizerForLocal(AudioTokenizer):
def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
self.special_tokens = {
"audio_code": "<|{}|>",
"text_start": "<|text_start|>",
"text_end": "<|text_end|>",
"audio_start": "<|audio_start|>",
"audio_end": "<|audio_end|>",
"word_start": "<|word_start|>",
"word_end": "<|word_end|>",
"time": "<|t_{:.2f}|>",
"code_start": "<|code_start|>",
"code_end": "<|code_end|>",
"text_sep": "<|text_sep|>",
"hausa":"<|hausa|>",
"igbo":"<|igbo|>",
"yoruba":"<|yoruba|>",
}
self.uroman = ur.Uroman()
self.DEFAULT_SPEAKERS_DIR = os.path.join(self.BASE_DIR, "default_speakers_local")
self.speakers = [
"hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
"hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
]
def process_text(self, text: str):
text = self.uroman.romanize_string(text)
text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
text = re.sub(r'[-_/,\.\\]', ' ', text)
text = re.sub(r'[^a-z\s]', '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text.split()
def create_prompt(self,text,lang,speaker_name=None):
assert lang in ["hausa","igbo","yoruba"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba']"
#if no speaker
if speaker_name is None:
if lang=="hausa":
speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
elif lang=="igbo":
speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
else:
speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
speaker=self.load_default_speaker(speaker_name)
input_words = self.process_text(speaker["text"]) + self.process_text(text)
#input_words = process_text(speaker["text"]) + input_words
inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
prompt = self.text_prompt.format(
bos=self.bos,
text_start=self.special_tokens['text_start'],
words=inputs_words_strings,
text_end=self.special_tokens['text_end'],
lang=self.special_tokens[lang],
audio_start=self.special_tokens['audio_start']
)
prompt += self.create_audio_prompt(speaker["words"])
return prompt
class AudioTokenizerV2(AudioTokenizer):
def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path,):
super().__init__(tokenizer_path, wav_tokenizer_model_path, wav_tokenizer_config_path)
self.text_prompt = "{bos}\n{text_start}{words}{text_end}\n{lang}\n{audio_start}\n"
self.asr_prompt="{bos}\n{code_start}{codes}{code_end}\n{asr}\n"
self.special_tokens = {
"audio_code": "<|{}|>",
"text_start": "<|text_start|>",
"text_end": "<|text_end|>",
"audio_start": "<|audio_start|>",
"audio_end": "<|audio_end|>",
"word_start": "<|word_start|>",
"word_end": "<|word_end|>",
"time": "<|t_{:.2f}|>",
"code_start": "<|code_start|>",
"code_end": "<|code_end|>",
"text_sep": "<|text_sep|>",
"hausa":"<|hausa|>",
"igbo":"<|igbo|>",
"yoruba":"<|yoruba|>",
"english":"<|english|>",#<|english|>
"asr":"<|asr|>"
}
self.uroman = ur.Uroman()
self.DEFAULT_SPEAKERS_DIR_LOCAL = os.path.join(self.BASE_DIR, "default_speakers_local")
self.DEFAULT_SPEAKERS_ENG = os.path.join(self.BASE_DIR, "default_speakers")
self.speakers_local = [
"hausa_male1", "hausa_male2","yoruba_male1", "yoruba_male2","igbo_male2" #"igbo_male1", "igbo_male2",
"hausa_female1", "hausa_female2", "igbo_female1", "igbo_female2", "yoruba_female1", "yoruba_female2"
]
self.speakers_eng = ["idera","emma","onye","jude","osagie","tayo","zainab","joke","regina","remi","umar","chinenye","saheed"]
self.changed_tokens=[('<|1836|>', '<|453|><|453|>'),
('<|1837|>', '<|1836|><|1836|>'),
('<|1838|>', '<|1837|><|1837|>'),
('<|1840|>', '<|244|><|167|>'),
('<|1841|>', '<|235|><|219|>'),
('<|1844|>', '<|453|><|244|>'),
('<|1845|>', '<|1838|><|1838|>')]
def process_text(self, text: str):
text = self.uroman.romanize_string(text)
text = re.sub(r'\d+(\.\d+)?', lambda x: self.lec.number_to_words(x.group()), text.lower())
text = re.sub(r'[-_/,\.\\]', ' ', text)
text = re.sub(r'[^a-z\s]', '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text.split()
def get_speaker_path(self,speaker_name,dir):
return os.path.join(dir, f"{speaker_name}.json")
def load_speaker(self, path: str):
with open(path, "r") as f:
return json.load(f)
def load_default_speaker(self, name: str,dir: str):
name = name.lower().strip()
speaker_path=self.get_speaker_path(name,dir)
return self.load_speaker(speaker_path)
def create_prompt(self,text,lang,speaker_name=None):
assert lang in ["hausa","igbo","yoruba","english"], f"Invalid language: {lang}, language must be one of ['hausa','igbo','yoruba','english']"
#if no speaker
dir=self.DEFAULT_SPEAKERS_DIR_LOCAL
if speaker_name is None:
if lang=="hausa":
speaker_name=random.choice(["hausa_male1","hausa_male2","hausa_female1","hausa_female2"])
elif lang=="igbo":
speaker_name=random.choice(["igbo_female1","igbo_female2","igbo_male2"])#"igbo_male1"])
elif lang=="yoruba":
speaker_name=random.choice(["yoruba_male2","yoruba_female1","yoruba_female2"])
else:
speaker_name=random.choice(self.speakers_eng)
if lang=="english":
dir=self.DEFAULT_SPEAKERS_ENG
speaker=self.load_default_speaker(speaker_name,dir)
input_words = self.process_text(speaker["text"]) + self.process_text(text)
#input_words = process_text(speaker["text"]) + input_words
inputs_words_strings = f"{self.special_tokens['text_sep']}".join([i.strip() for i in input_words])
prompt = self.text_prompt.format(
bos=self.bos,
text_start=self.special_tokens['text_start'],
words=inputs_words_strings,
text_end=self.special_tokens['text_end'],
lang=self.special_tokens[lang],
audio_start=self.special_tokens['audio_start']
)
prompt += self.create_audio_prompt(speaker["words"])
return prompt
def replace_tokens(text):
for pair in self.changed_tokens:
text=text.replace(pair[0],pair[-1])
return text
def resample(self,audio: np.ndarray, sr: int, target_sr: int):
audio = audio.to(dtype=torch.float32)
#.clone().detach()
audio = audio.unsqueeze(0)
# 1 as last arg corresponds to mono audio
resampled = convert_audio(audio, sr, target_sr, 1)
return resampled.to(self.device )
def quantize_wavtokenizer(self, path):
audio_data, sample_rate = torchaudio.load(path)
audio_data=audio_data.squeeze()
audio = self.resample(audio_data, sample_rate, 24000).to(self.device)
if audio.ndim==3:
audio=audio.squeeze(1)
bandwidth_id = torch.tensor([0]).to(self.device )
_, codes = self.wavtokenizer.encode_infer(audio, bandwidth_id=bandwidth_id)
codes = codes.squeeze(1).to(self.device)#+last_text_token
res=""
for code in codes[0].tolist():
res+=f"<|{code}|>"
return res
def create_asr_prompt(self,audio_path):
codes=self.quantize_wavtokenizer(audio_path)
prompt = self.asr_prompt.format(
bos=self.bos,
code_start=self.special_tokens['code_start'],
codes=codes,
code_end=self.special_tokens['code_end'],
asr=self.special_tokens["asr"],
)
return prompt
def get_asr_results(self,output):
res=""
for text in self.tokenizer.decode(output[0]).split("<|text_start|>")[-1].split("<|text_end|>")[0].split("\n"):
res+=text.split("<|word_start|>")[-1].split("<|word_end|>")[0]
res+=" "
return res.strip()
================================================
FILE: default_speakers/azeez.json
================================================
{
"text": "Hello! My name is Saheed azeez and I am testing the audio feature",
"words": [
{
"word": "hello",
"duration": 1.22,
"codes": [
219,
244,
244,
167,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
244,
219,
237,
864,
1041,
1048,
1372,
1780,
1554,
1024,
702,
1814,
1754,
1315,
1697,
1719,
1682,
307,
621,
901,
355,
783,
1726,
353,
1416,
729,
803,
1494,
353,
876,
1818,
932,
1068,
1813,
875,
1774,
766,
1453,
1466,
792,
1388,
1495,
1236,
1462,
431,
1025,
1429,
1128,
1236,
1483,
1305,
1352,
1681,
5,
1758,
1481,
1339
]
},
{
"word": "my",
"duration": 0.18,
"codes": [
1333,
1339,
1388,
1373,
974,
723,
1776,
1001,
1160,
1769,
1048,
1646,
1321,
912
]
},
{
"word": "name",
"duration": 0.2,
"codes": [
1596,
325,
876,
1303,
973,
1707,
1332,
1300,
145,
1136,
1266,
1353,
845,
913,
989
]
},
{
"word": "is",
"duration": 0.12,
"codes": [
1257,
1372,
1617,
1800,
1568,
1679,
1798,
1476,
1759
]
},
{
"word": "saheed",
"duration": 0.5,
"codes": [
1807,
1354,
1737,
1738,
1060,
1122,
1195,
1275,
1129,
1473,
688,
1675,
1724,
1392,
1146,
1605,
1784,
1476,
1454,
1743,
1824,
706,
1706,
669,
91,
1079,
1456,
1645,
1041,
1687,
1425,
1205,
830,
1525,
1007,
1291,
723
]
},
{
"word": "azeez",
"duration": 0.48,
"codes": [
829,
926,
1438,
1124,
1282,
1745,
1019,
1430,
1657,
1715,
1637,
1653,
1713,
1370,
1534,
1410,
1767,
814,
22,
1703,
1534,
1797,
1488,
1812,
1637,
1791,
1720,
1677,
1807,
1459,
1779,
1767,
1145,
1239,
1622,
1264
]
},
{
"word": "and",
"duration": 0.24,
"codes": [
1780,
1291,
1174,
1435,
1494,
1807,
662,
1760,
1694,
363,
1225,
1775,
1264,
1455,
1014,
1758,
1620,
1013
]
},
{
"word": "i",
"duration": 0.06,
"codes": [
1823,
1295,
1397,
1108,
1275
]
},
{
"word": "am",
"duration": 0.14,
"codes": [
1129,
1697,
835,
1589,
1719,
1534,
1495,
1025,
1405,
766
]
},
{
"word": "testing",
"duration": 0.42,
"codes": [
196,
1118,
761,
1314,
1770,
1138,
1429,
728,
1497,
1792,
1049,
1430,
1062,
1788,
1354,
1555,
1735,
1728,
954,
1754,
343,
1418,
636,
1501,
1301,
901,
763,
1620,
1687,
177,
1706,
325
]
},
{
"word": "the",
"duration": 0.14,
"codes": [
810,
1421,
1404,
1093,
781,
752,
1780,
1749,
850,
1435
]
},
{
"word": "audio",
"duration": 0.3,
"codes": [
1792,
1381,
1309,
1472,
1449,
1785,
114,
601,
866,
1764,
1212,
1453,
1152,
1777,
853,
1735,
1052,
355,
1421,
1605,
1761,
1664,
540
]
},
{
"word": "feature",
"duration": 0.4,
"codes": [
1682,
1442,
1819,
1818,
710,
1776,
1205,
646,
1688,
1572,
875,
1367,
476,
1285,
460,
342,
1784,
28,
1621,
1745,
1462,
988,
1780,
1697,
1249,
1348,
1120,
1590,
803,
1205
]
}
]
}
================================================
FILE: default_speakers/chinenye.json
================================================
{
"text": "and once I got that out of the way",
"words": [
{
"word": "and",
"duration": 1.18,
"codes": [
1073,
1804,
1510,
1562,
377,
1287,
1615,
175,
631,
1702,
1700,
1590,
1158,
1676,
758,
1727,
1548,
1464,
1605,
1469,
1291,
1755,
1656,
1323,
1372,
269,
1252,
1466,
1677,
1192,
1220,
1815,
1658,
1818,
1514,
1480,
1747,
1413,
1440,
1403,
28,
1806,
1536,
1269,
1673,
1616,
1619,
1745,
1532,
1659,
1682,
1777,
1764,
1766,
1796,
1827,
719,
1768,
1761,
1524,
1782,
1410,
1748,
1764,
1447,
1791,
1790,
1528,
1550,
1491,
1764,
1324,
790,
1307,
664,
719,
1224,
1571,
1740,
1062,
1775,
1494,
486,
1544,
1828,
961,
1115,
1308
]
},
{
"word": "once",
"duration": 0.46,
"codes": [
996,
1407,
892,
1326,
1223,
362,
36,
1103,
1734,
1755,
1798,
749,
1603,
1748,
519,
1643,
1744,
176,
1709,
749,
1615,
1801,
1438,
1719,
1491,
1802,
1575,
1750,
1180,
1077,
855,
1511,
961,
1739,
632
]
},
{
"word": "i",
"duration": 0.16,
"codes": [
398,
1055,
767,
57,
1777,
1706,
34,
1025,
1745,
1796,
1266,
1348
]
},
{
"word": "got",
"duration": 0.24,
"codes": [
1555,
639,
1708,
813,
1152,
753,
718,
1742,
756,
1109,
1796,
85,
1623,
1769,
1759,
1491,
1769,
1693
]
},
{
"word": "that",
"duration": 0.28,
"codes": [
1555,
1732,
1301,
755,
1224,
1192,
1241,
1192,
1102,
944,
1358,
855,
1342,
1603,
1693,
1783,
1689,
1803,
1126,
1089,
839
]
},
{
"word": "out",
"duration": 0.16,
"codes": [
887,
1726,
1411,
1758,
839,
9,
1686,
1642,
1695,
998,
828,
1755
]
},
{
"word": "of",
"duration": 0.08,
"codes": [
1825,
1734,
1281,
1794,
1518,
1696
]
},
{
"word": "the",
"duration": 0.14,
"codes": [
1565,
1608,
1541,
1258,
1798,
1499,
1685,
1554,
1776,
1602,
1381
]
},
{
"word": "way",
"duration": 0.16,
"codes": [
1822,
1773,
1663,
1710,
1554,
1493,
4,
1620,
1755,
416,
1384,
1688
]
}
]
}
================================================
FILE: default_speakers/emma.json
================================================
{
"text": "Scientists have discovered a new planet that may be capable of supporting life!",
"words": [
{
"word": "scientists",
"duration": 0.82,
"codes": [
1334,
1359,
619,
1057,
1528,
817,
1175,
884,
527,
1519,
323,
980,
608,
1104,
1271,
1265,
1237,
191,
1308,
203,
1126,
1226,
1265,
1073,
1661,
903,
502,
197,
127,
1712,
877,
1717,
1735,
1076,
1284,
1629,
784,
62,
175,
432,
767,
533,
990,
1258,
823,
1651,
1801,
701,
1382,
554,
527,
117,
323,
989,
884,
817,
495,
781,
1214,
1099,
1104
]
},
{
"word": "have",
"duration": 0.24,
"codes": [
930,
1393,
1303,
1001,
1438,
628,
1774,
973,
1758,
1501,
1761,
1428,
1725,
669,
1780,
487,
866,
1762
]
},
{
"word": "discovered",
"duration": 0.66,
"codes": [
820,
1592,
1737,
731,
1325,
1644,
884,
1300,
323,
596,
231,
296,
943,
990,
1214,
1039,
1039,
1430,
866,
19,
1675,
1824,
1030,
1630,
1758,
783,
1598,
1832,
1330,
1319,
1730,
1449,
1414,
1511,
695,
1526,
1410,
95,
1686,
1400,
961,
1809,
1303,
355,
544,
1671,
1493,
1290,
1732,
1808
]
},
{
"word": "a",
"duration": 0.14,
"codes": [
968,
1281,
895,
1827,
1819,
694,
1509,
1346,
928,
1449,
1512
]
},
{
"word": "new",
"duration": 0.24,
"codes": [
1433,
1689,
1685,
1598,
1547,
1369,
1228,
1708,
1285,
1722,
1257,
625,
1114,
1425,
465,
950,
651,
561
]
},
{
"word": "planet",
"duration": 0.48,
"codes": [
1707,
821,
1225,
1228,
1168,
1291,
1739,
813,
1738,
966,
1829,
1229,
1751,
1280,
1120,
1537,
1145,
1257,
1145,
1490,
1565,
41,
1677,
1796,
1258,
1228,
1389,
1145,
1433,
763,
1255,
355,
509,
869,
1144,
501
]
},
{
"word": "that",
"duration": 0.26,
"codes": [
1571,
1404,
1484,
1716,
1136,
1720,
1237,
1420,
1680,
892,
1458,
1697,
669,
1658,
859,
1128,
804,
1157,
1694
]
},
{
"word": "may",
"duration": 0.18,
"codes": [
1339,
761,
820,
1150,
823,
1706,
1815,
1354,
1417,
820,
744,
1413,
995,
733
]
},
{
"word": "be",
"duration": 0.18,
"codes": [
20,
1763,
1417,
821,
1384,
1784,
968,
1767,
501,
795,
378,
242,
447
]
},
{
"word": "capable",
"duration": 0.56,
"codes": [
666,
1170,
1637,
1746,
1042,
1331,
695,
1739,
1136,
1471,
1823,
1185,
1231,
459,
1071,
168,
418,
513,
431,
669,
840,
938,
1463,
1640,
1741,
86,
1273,
724,
1006,
544,
1408,
1352,
1721,
1490,
1321,
1674,
792,
1765,
1093,
1731,
1506,
1742,
1465
]
},
{
"word": "of",
"duration": 0.16,
"codes": [
1697,
1435,
42,
1593,
1573,
1146,
1600,
980,
878,
713,
796,
1364
]
},
{
"word": "supporting",
"duration": 0.62,
"codes": [
541,
833,
1546,
1230,
1232,
1417,
1473,
1486,
1759,
1327,
1806,
544,
918,
526,
418,
950,
669,
1749,
1499,
959,
1806,
203,
1771,
1651,
1433,
686,
967,
484,
649,
884,
176,
323,
1349,
722,
1230,
1218,
1430,
1663,
1648,
1808,
1629,
1822,
1813,
1663,
1418,
1742
]
},
{
"word": "life",
"duration": 0.22,
"codes": [
1622,
1648,
1141,
1682,
1353,
1351,
1822,
1229,
1621,
1435,
1766,
1428,
1727,
1343,
1769,
823,
1050
]
}
]
}
================================================
FILE: default_speakers/idera.json
================================================
{
"text": "Scientists have discovered a new planet that may be capable of supporting life!",
"words": [
{
"word": "scientists",
"duration": "1.00",
"codes": [
258,
551,
21,
401,
509,
235,
151,
94,
194,
496,
241,
420,
606,
256,
311,
464,
343,
765,
56,
23,
209,
72,
851,
360,
442,
257,
457,
75,
265,
227,
16,
167,
194,
391,
68,
786,
1642,
888,
884,
1688,
1021,
1270,
1250,
640,
1471,
1193,
1117,
95,
158,
587,
1484,
1054,
947,
521,
234,
502,
1172,
1379,
1332,
1267,
1659,
226,
325,
404,
634,
713,
333,
1210,
1028,
700,
1804,
1549,
1552,
1527,
701,
895
]
},
{
"word": "have",
"duration": "0.16",
"codes": [
652,
1487,
1045,
665,
384,
908,
1073,
903,
169,
91,
1242,
59,
1614
]
},
{
"word": "discovered",
"duration": "0.52",
"codes": [
1523,
519,
1311,
1166,
1049,
368,
176,
1546,
990,
546,
1091,
872,
975,
224,
419,
1714,
1247,
1769,
1141,
811,
1149,
320,
1161,
982,
732,
473,
1025,
470,
1253,
1345,
965,
916,
407,
844,
594,
1710,
193,
740,
761,
1740
]
},
{
"word": "a",
"duration": "0.08",
"codes": [
5,
414,
1608,
449,
1643,
1732,
1653
]
},
{
"word": "new",
"duration": "0.18",
"codes": [
396,
1599,
1733,
250,
1624,
485,
1645,
771,
1630,
736,
336,
476,
641,
345
]
},
{
"word": "planet",
"duration": "0.38",
"codes": [
21,
131,
1743,
1082,
1707,
86,
1075,
883,
944,
1103,
790,
978,
860,
1738,
1060,
749,
171,
679,
1144,
966,
1532,
1179,
714,
1123,
1308,
1524,
752,
1613,
1266
]
},
{
"word": "that",
"duration": "0.14",
"codes": [
64,
32,
1457,
1095,
931,
1774,
1017,
1661,
1713,
355,
1708
]
},
{
"word": "may",
"duration": "0.12",
"codes": [
1800,
1070,
1452,
1185,
1295,
26,
638,
240,
1480,
1461
]
},
{
"word": "be",
"duration": "0.12",
"codes": [
859,
729,
848,
1131,
1618,
928,
331,
504,
487,
417
]
},
{
"word": "capable",
"duration": "0.42",
"codes": [
686,
1040,
28,
1456,
1056,
1133,
901,
1127,
693,
1406,
20,
118,
141,
572,
845,
1280,
353,
1726,
338,
1413,
484,
272,
1569,
144,
1581,
437,
1502,
963,
1415,
655,
949,
1289
]
},
{
"word": "of",
"duration": "0.10",
"codes": [
1198,
1755,
1478,
1548,
802,
1513,
1290,
636
]
},
{
"word": "supporting",
"duration": "0.54",
"codes": [
541,
867,
750,
1505,
754,
1344,
1032,
734,
505,
559,
220,
288,
342,
591,
1459,
1721,
490,
825,
80,
1221,
1234,
639,
1052,
450,
1557,
1302,
784,
1547,
823,
527,
1667,
1437,
832,
1366,
674,
1607,
486,
893,
1748,
792,
1757
]
},
{
"word": "life",
"duration": "0.28",
"codes": [
1761,
149,
1501,
1342,
1063,
1124,
117,
1225,
1115,
1155,
1815,
1035,
936,
807,
930,
1514,
837,
1104,
1145,
1164,
1687,
1589
]
}
]
}
================================================
FILE: default_speakers/joke.json
================================================
{
"text": "i still said you and i was like mister so this is what you are doing with",
"words": [
{
"word": "i",
"duration": 0.34,
"codes": [
1737,
1555,
1439,
1679,
1634,
1661,
1764,
1698,
1715,
862,
1516,
1427,
1350,
1136,
1472,
1113,
1686,
1596,
1005,
1365,
1180,
1473,
1296,
1337,
1579
]
},
{
"word": "still",
"duration": 0.26,
"codes": [
848,
1653,
1756,
1711,
1693,
1722,
1580,
1552,
502,
1416,
1463,
1341,
1449,
1542,
1700,
1786,
428,
1728,
1624,
1624
]
},
{
"word": "said",
"duration": 0.24,
"codes": [
1657,
1744,
1657,
1634,
1615,
1534,
996,
1296,
1542,
577,
1047,
1506,
440,
1756,
1783,
1593,
906,
1810
]
},
{
"word": "you",
"duration": 0.62,
"codes": [
1610,
409,
1534,
1685,
1709,
1756,
363,
1441,
1789,
1594,
863,
1773,
1612,
1535,
1602,
1615,
1426,
48,
1690,
1740,
1650,
1824,
1613,
1807,
1041,
1778,
719,
1002,
1759,
1403,
1766,
1826,
1002,
1769,
1661,
1278,
1759,
1351,
1638,
1740,
1395,
1722,
1765,
1751,
1461,
1492
]
},
{
"word": "and",
"duration": 0.14,
"codes": [
1056,
1494,
1389,
1002,
1452,
1413,
1345,
1401,
1593,
1073,
775
]
},
{
"word": "i",
"duration": 0.08,
"codes": [
1812,
547,
1581,
1468,
949,
1740
]
},
{
"word": "was",
"duration": 0.16,
"codes": [
1662,
1542,
363,
1374,
1598,
1563,
1394,
473,
863,
1587,
1685,
1729
]
},
{
"word": "like",
"duration": 0.28,
"codes": [
1407,
1444,
1286,
1506,
1366,
1286,
1013,
502,
631,
1449,
1374,
1711,
1413,
1660,
1679,
1783,
1772,
1723,
1549,
1674,
1388
]
},
{
"word": "mister",
"duration": 0.84,
"codes": [
1591,
1765,
1653,
1549,
1449,
1341,
473,
1363,
1605,
1554,
1387,
1641,
1439,
362,
1606,
319,
1691,
1582,
1617,
1756,
1286,
1409,
1221,
1372,
1584,
794,
1636,
1488,
1280,
1366,
1753,
1636,
882,
1723,
1796,
1769,
1717,
1549,
1518,
1633,
175,
1678,
1679,
1549,
1732,
1710,
1662,
1744,
1641,
1696,
1565,
1769,
1789,
719,
1831,
1786,
1451,
1728,
1646,
1713,
1672,
1774,
1734
]
},
{
"word": "so",
"duration": 0.14,
"codes": [
1354,
1518,
1791,
1374,
277,
1542,
1366,
700,
1444,
1744,
1217
]
},
{
"word": "this",
"duration": 0.2,
"codes": [
1461,
1588,
1672,
1712,
1679,
175,
63,
426,
293,
1654,
57,
1616,
1394,
1789,
175
]
},
{
"word": "is",
"duration": 0.06,
"codes": [
1394,
1605,
1596,
1800,
269
]
},
{
"word": "what",
"duration": 0.16,
"codes": [
1706,
759,
1047,
1493,
637,
1723,
1772,
1748,
1634,
4,
1387,
1710
]
},
{
"word": "you",
"duration": 0.1,
"codes": [
890,
1374,
1019,
848,
1415,
1341,
1073
]
},
{
"word": "are",
"duration": 0.1,
"codes": [
1286,
127,
949,
870,
1734,
1593,
1761,
1717
]
},
{
"word": "doing",
"duration": 0.22,
"codes": [
1643,
1485,
1708,
1394,
1469,
348,
1676,
1685,
428,
1584,
1695,
1596,
1613,
1286,
1787,
1374
]
},
{
"word": "with",
"duration": 0.36,
"codes": [
1382,
615,
1127,
1742,
1591,
239,
1810,
1778,
719,
1616,
1549,
519,
1804,
1416,
1636,
1584,
1437,
1698,
1625,
1494,
1633,
1545,
1747,
1737,
1672,
1646,
1778
]
}
]
}
================================================
FILE: default_speakers/jude.json
================================================
{
"text": "know what I'm saying what I'm saying is that if you say",
"words": [
{
"word": "know",
"duration": 0.44,
"codes": [
1824,
1820,
1743,
1819,
1171,
1796,
1613,
1126,
1500,
1346,
1429,
1810,
1655,
1462,
1780,
1812,
1518,
1431,
741,
1206,
1325,
1392,
920,
409,
4,
1270,
416,
1759,
1141,
708,
1022,
1769,
1384
]
},
{
"word": "what",
"duration": 0.12,
"codes": [
607,
787,
48,
1350,
1340,
297,
364,
825,
1775
]
},
{
"word": "im",
"duration": 0.1,
"codes": [
1668,
1311,
1651,
1048,
176,
430,
333
]
},
{
"word": "saying",
"duration": 0.56,
"codes": [
822,
648,
1568,
1660,
1071,
1399,
890,
1396,
1381,
1818,
124,
1623,
361,
1588,
1688,
1280,
1805,
1659,
1605,
1412,
1672,
1752,
1741,
1514,
1817,
1796,
1763,
1790,
1595,
1788,
1823,
758,
1466,
1802,
1788,
1649,
1614,
1751,
1718,
1585,
1637,
1773
]
},
{
"word": "what",
"duration": 0.12,
"codes": [
1666,
1680,
1431,
411,
1687,
695,
1629,
1678,
664,
1087
]
},
{
"word": "im",
"duration": 0.16,
"codes": [
117,
408,
1813,
1729,
1336,
1710,
1833,
1615,
276,
362,
1364,
687
]
},
{
"word": "saying",
"duration": 0.26,
"codes": [
28,
440,
1376,
1196,
1147,
1636,
1272,
1449,
198,
1277,
1470,
1485,
1100,
1588,
1673,
1620,
1710,
1753,
806
]
},
{
"word": "is",
"duration": 0.06,
"codes": [
1621,
1636,
1833,
529,
1653
]
},
{
"word": "that",
"duration": 0.24,
"codes": [
1773,
1004,
1796,
907,
239,
1804,
565,
1432,
1534,
1718,
1643,
1432,
1447,
1273,
1824,
1657,
1776,
1651
]
},
{
"word": "if",
"duration": 0.12,
"codes": [
1649,
1620,
1342,
176,
1773,
178,
1710,
1710,
1521
]
},
{
"word": "you",
"duration": 0.16,
"codes": [
959,
1728,
1651,
361,
822,
1661,
1341,
780,
1518,
335,
452,
736
]
},
{
"word": "say",
"duration": 0.14,
"codes": [
372,
1217,
713,
848,
1140,
1420,
1549,
483,
125,
1353
]
}
]
}
================================================
FILE: default_speakers/onye.json
================================================
{
"text": "out to another level also going through in the shop chop scotch bonnet peppers",
"words": [
{
"word": "out",
"duration": 0.34,
"codes": [
546,
416,
1519,
1673,
1806,
1015,
693,
1447,
9,
1306,
1485,
1477,
1178,
1543,
1830,
1558,
1801,
1423,
1487,
1165,
1743,
1726,
1772,
368,
1555
]
},
{
"word": "to",
"duration": 0.28,
"codes": [
1823,
1713,
1734,
368,
1547,
1741,
1737,
1784,
1801,
1732,
1389,
994,
1158,
1278,
1800,
1658,
519,
1542,
1792,
1700,
1415
]
},
{
"word": "another",
"duration": 0.4,
"codes": [
1541,
1824,
1624,
1757,
1294,
1734,
1756,
1821,
1147,
1663,
1697,
1156,
1069,
53,
1223,
1212,
1736,
1748,
1744,
758,
1494,
374,
1187,
1448,
1410,
1356,
1732,
1452,
1295,
1656
]
},
{
"word": "level",
"duration": 1.86,
"codes": [
1688,
1527,
1417,
1486,
384,
1378,
1342,
1075,
1046,
1247,
1660,
1525,
719,
1769,
1628,
1810,
1078,
1429,
1483,
1280,
1814,
1115,
184,
1014,
1686,
1341,
1347,
1502,
1350,
1666,
1686,
1823,
1749,
1412,
1651,
1832,
1701,
1782,
1741,
1798,
1828,
1701,
1796,
1807,
1701,
1768,
1817,
1524,
1786,
1400,
1717,
1722,
1773,
1202,
1098,
1161,
1750,
822,
1420,
1434,
979,
1764,
1313,
1734,
1458,
1660,
1200,
370,
1636,
1186,
768,
855,
599,
1632,
1164,
1041,
1791,
1714,
368,
1715,
1500,
1817,
1817,
1772,
1805,
1825,
1818,
1828,
1395,
1718,
1818,
0,
1696,
1808,
1637,
1796,
1701,
1796,
1824,
1646,
1702,
1714,
895,
1764,
1637,
1717,
1747,
1751,
1696,
639,
1436,
1828,
1818,
1737,
1832,
1646,
1796,
1822,
1741,
1791,
1701,
1796,
1779,
1638,
1783,
1751,
1781,
1768,
1412,
1744,
1720,
1403,
1802,
1638,
1734,
1802,
1826,
1785,
1443,
1167
]
},
{
"word": "also",
"duration": 0.26,
"codes": [
973,
1187,
1333,
359,
1494,
1222,
1759,
749,
533,
4,
1599,
1608,
1280,
1167,
1015,
1526,
1662,
1728,
1016,
1796
]
},
{
"word": "going",
"duration": 0.26,
"codes": [
1789,
1291,
1209,
828,
1452,
1749,
1052,
1460,
1783,
1656,
1542,
1281,
1710,
1716,
1404,
1734,
495,
1624,
1747
]
},
{
"word": "through",
"duration": 0.34,
"codes": [
1465,
1664,
1786,
231,
1826,
1318,
1494,
1505,
1063,
1311,
1656,
1265,
1720,
1226,
940,
1490,
1447,
1730,
1348,
1637,
1118,
1710,
841,
795,
298,
1216
]
},
{
"word": "in",
"duration": 0.42,
"codes": [
899,
1240,
869,
679,
1343,
1280,
1681,
1221,
1632,
1221,
1479,
1431,
1623,
1372,
1722,
1494,
1011,
1636,
957,
1661,
939,
1772,
1096,
1688,
1537,
1360,
1734,
1595,
1781,
1284,
1413
]
},
{
"word": "the",
"duration": 1.08,
"codes": [
1701,
1447,
1328,
1690,
1281,
1401,
700,
1295,
1494,
1326,
1218,
361,
922,
1210,
1300,
19,
1403,
1272,
1150,
1062,
1457,
1344,
1167,
1742,
996,
1158,
1245,
1210,
1720,
1823,
85,
1829,
1555,
1718,
979,
1665,
1783,
1088,
1810,
1828,
1795,
1419,
1795,
1826,
1779,
1741,
1719,
1809,
1646,
1765,
1818,
1713,
1821,
1737,
1348,
1821,
1400,
1748,
1278,
1521,
758,
1701,
1798,
1817,
1646,
1672,
1825,
1796,
957,
1808,
1807,
1833,
1798,
1425,
1830,
1037,
1251,
554,
1395,
175,
919
]
},
{
"word": "shop",
"duration": 0.3,
"codes": [
1611,
154,
1329,
1701,
1677,
1210,
880,
660,
816,
1276,
1471,
41,
1779,
1465,
1298,
1817,
1777,
1073,
1713,
1808,
1818,
1348,
1711
]
},
{
"word": "chop",
"duration": 0.3,
"codes": [
1439,
4,
315,
1751,
1731,
53,
1184,
1132,
755,
1429,
1464,
1483,
1770,
1749,
1278,
1769,
1511,
1683,
1779,
1660,
183,
1535,
416
]
},
{
"word": "scotch",
"duration": 0.4,
"codes": [
1518,
1679,
0,
1695,
1682,
1098,
1764,
1256,
1808,
1609,
1745,
1318,
632,
1197,
271,
1683,
1774,
1824,
1783,
1671,
1805,
22,
631,
117,
1345,
800,
1707,
1466,
1005,
1462
]
},
{
"word": "bonnet",
"duration": 0.34,
"codes": [
1677,
1826,
1277,
524,
1001,
789,
973,
1509,
1817,
546,
1260,
1117,
782,
142,
1455,
947,
1814,
1815,
0,
1538,
1766,
1744,
1824,
239,
1710
]
},
{
"word": "peppers",
"duration": 0.5,
"codes": [
1817,
1287,
1769,
1309,
446,
1173,
1183,
375,
1342,
1815,
1382,
1685,
1797,
1351,
1798,
1631,
749,
1717,
1324,
1147,
1186,
955,
577,
1736,
827,
1240,
1484,
847,
1661,
1475,
1287,
1535,
595,
1286,
1734,
1256,
319,
1688
]
}
]
}
================================================
FILE: default_speakers/osagie.json
================================================
{
"text": "do Charlotte Douglas shallots be me shut up dummy Libby shallots foolish storms",
"words": [
{
"word": "do",
"duration": 1.18,
"codes": [
1798,
858,
1653,
1400,
1441,
1810,
1180,
892,
1487,
380,
208,
452,
181,
714,
521,
152,
1180,
2,
142,
756,
208,
874,
380,
565,
422,
656,
81,
860,
146,
1042,
1685,
1580,
50,
137,
132,
170,
1633,
648,
1819,
898,
1247,
1646,
1491,
438,
85,
46,
170,
664,
2,
236,
65,
100,
393,
324,
170,
1499,
1619,
519,
123,
798,
79,
1447,
132,
146,
779,
380,
221,
1588,
228,
1443,
152,
1366,
1441,
189,
320,
1387,
368,
1599,
295,
65,
1353,
13,
920,
1341,
55,
315,
1542,
315
]
},
{
"word": "charlotte",
"duration": 0.42,
"codes": [
543,
769,
69,
714,
725,
212,
374,
1439,
25,
1453,
637,
291,
1212,
106,
1671,
146,
82,
1261,
1710,
686,
1571,
213,
298,
510,
452,
1396,
1635,
1760,
1469,
1793,
1233,
851
]
},
{
"word": "douglas",
"duration": 0.42,
"codes": [
1539,
2,
679,
51,
215,
1068,
295,
115,
1150,
753,
1806,
287,
85,
725,
1312,
293,
614,
1610,
380,
260,
1014,
104,
777,
1697,
270,
580,
794,
1345,
1552,
7,
178
]
},
{
"word": "shallots",
"duration": 0.48,
"codes": [
315,
290,
333,
1761,
412,
520,
125,
367,
1001,
700,
1258,
955,
388,
880,
324,
637,
642,
1723,
1480,
990,
507,
652,
69,
1670,
1073,
1433,
830,
1737,
1769,
1829,
1524,
1605,
1737,
1660,
1782,
1687,
1802
]
},
{
"word": "be",
"duration": 0.16,
"codes": [
1715,
687,
1365,
49,
98,
357,
1416,
245,
1058,
870,
1689,
1588
]
},
{
"word": "me",
"duration": 0.36,
"codes": [
1469,
1221,
1783,
127,
372,
519,
98,
50,
1439,
876,
362,
1439,
1506,
1452,
736,
1740,
1715,
1641,
1628,
1807,
1654,
1601,
911,
788,
1451,
356,
1450
]
},
{
"word": "shut",
"duration": 0.34,
"codes": [
202,
543,
1527,
1345,
105,
721,
128,
571,
1180,
1366,
1187,
860,
1113,
1089,
270,
113,
525,
992,
1588,
975,
668,
780,
399,
233,
510
]
},
{
"word": "up",
"duration": 0.1,
"codes": [
1715,
1833,
1719,
363,
1763,
1784,
1765,
85
]
},
{
"word": "dummy",
"duration": 0.36,
"codes": [
101,
47,
1127,
205,
164,
647,
300,
737,
300,
910,
549,
1598,
333,
900,
1521,
1287,
917,
362,
290,
1353,
917,
407,
1588,
1396,
1415,
440,
1565
]
},
{
"word": "libby",
"duration": 0.36,
"codes": [
935,
479,
153,
127,
162,
782,
932,
1023,
1262,
343,
1728,
502,
1401,
996,
350,
1445,
856,
298,
48,
1698,
1470,
1736,
26,
1342,
328,
372,
1451
]
},
{
"word": "shallots",
"duration": 0.4,
"codes": [
7,
50,
519,
1221,
212,
238,
1083,
844,
333,
182,
472,
839,
609,
656,
208,
291,
1234,
1678,
1151,
867,
290,
546,
848,
1700,
1740,
26,
1617,
1238,
183,
1693
]
},
{
"word": "foolish",
"duration": 0.38,
"codes": [
863,
176,
1546,
1470,
1435,
716,
1460,
1013,
217,
1374,
736,
91,
959,
767,
1678,
1541,
903,
362,
1336,
1345,
546,
848,
253,
335,
510,
69,
546,
1166,
1677
]
},
{
"word": "storms",
"duration": 0.4,
"codes": [
939,
1361,
1719,
1428,
1691,
319,
1596,
236,
757,
1625,
123,
1297,
55,
132,
708,
92,
1344,
848,
1232,
518,
695,
1726,
1502,
1759,
363,
1751,
1524,
409,
189,
0
]
}
]
}
================================================
FILE: default_speakers/regina.json
================================================
{
"text": "was just like is that what is amazing to you your marriage is",
"words": [
{
"word": "was",
"duration": 1.02,
"codes": [
1514,
571,
892,
386,
186,
1403,
1082,
636,
851,
1287,
1678,
1166,
162,
1345,
282,
104,
1345,
329,
637,
844,
537,
1366,
537,
282,
1485,
537,
637,
844,
537,
1710,
375,
452,
1588,
537,
1382,
714,
206,
333,
330,
344,
281,
1523,
44,
1557,
315,
479,
271,
370,
110,
498,
768,
560,
579,
847,
961,
293,
1351,
1141,
138,
1229,
2,
847,
1245,
1345,
1829,
1811,
1326,
955,
1314,
137,
270,
1743,
324,
1389,
1027,
863
]
},
{
"word": "just",
"duration": 0.28,
"codes": [
333,
38,
1518,
1296,
146,
1077,
1204,
665,
658,
1005,
944,
1136,
519,
749,
1061,
69,
1363,
415,
1679,
1741,
138
]
},
{
"word": "like",
"duration": 1.68,
"codes": [
1796,
714,
65,
13,
664,
1077,
463,
232,
461,
1210,
356,
346,
1196,
202,
631,
1804,
1096,
450,
23,
1535,
415,
582,
328,
546,
1571,
344,
1512,
1242,
141,
194,
220,
258,
246,
220,
246,
542,
258,
246,
220,
151,
246,
542,
342,
220,
75,
246,
220,
246,
542,
246,
220,
542,
161,
450,
419,
246,
542,
246,
542,
246,
220,
542,
246,
246,
542,
246,
542,
342,
542,
342,
246,
542,
342,
220,
75,
246,
75,
246,
542,
246,
220,
75,
161,
542,
342,
220,
258,
246,
220,
75,
342,
220,
258,
194,
220,
436,
246,
220,
194,
194,
1442,
246,
220,
246,
246,
246,
151,
1551,
1522,
1362,
652,
1557,
333,
273,
928,
1551,
180,
1570,
652,
1664,
6,
654,
281,
1578,
1557,
1346,
756
]
},
{
"word": "is",
"duration": 0.06,
"codes": [
1337,
1662,
198,
33
]
},
{
"word": "that",
"duration": 0.12,
"codes": [
1679,
236,
934,
1056,
208,
609,
860,
1318,
1340
]
},
{
"word": "what",
"duration": 0.14,
"codes": [
1618,
806,
1068,
113,
1686,
428,
230,
409,
263,
415,
175
]
},
{
"word": "is",
"duration": 0.1,
"codes": [
415,
1773,
1539,
124,
1563,
700,
579
]
},
{
"word": "amazing",
"duration": 0.34,
"codes": [
973,
695,
1247,
1737,
1609,
1664,
1006,
134,
409,
416,
774,
848,
1542,
10,
1441,
1539,
129,
1698,
687,
1620,
1340,
749,
469,
1695,
448,
448
]
},
{
"word": "to",
"duration": 0.12,
"codes": [
189,
198,
124,
1753,
510,
1825,
856,
1441,
1688
]
},
{
"word": "you",
"duration": 1.62,
"codes": [
1552,
1546,
1698,
166,
101,
1457,
137,
864,
790,
794,
1615,
454,
1512,
328,
634,
1578,
409,
1592,
176,
1441,
1644,
356,
1641,
1580,
510,
1609,
407,
882,
1580,
218,
1616,
865,
409,
1570,
1376,
1734,
34,
687,
1592,
556,
640,
1592,
6,
1362,
4,
1546,
1302,
1376,
1570,
34,
652,
180,
1569,
203,
1744,
282,
945,
362,
931,
1662,
631,
1580,
452,
329,
725,
140,
277,
1113,
537,
1332,
560,
282,
1056,
270,
940,
755,
860,
104,
903,
537,
1310,
579,
282,
848,
371,
844,
1808,
400,
1772,
1166,
213,
1485,
1502,
276,
1594,
1599,
1819,
1197,
441,
1318,
1237,
679,
1186,
384,
609,
637,
157,
609,
637,
157,
790,
157,
547,
452,
452,
870,
162,
320,
1649,
1272,
1318,
860
]
},
{
"word": "your",
"duration": 0.16,
"codes": [
1477,
67,
113,
1149,
479,
901,
1232,
295,
9,
1129,
67,
1825
]
},
{
"word": "marriage",
"duration": 0.8,
"codes": [
529,
697,
695,
1429,
282,
626,
1355,
192,
1671,
100,
95,
1310,
388,
1155,
1494,
104,
104,
587,
1156,
67,
57,
1437,
697,
714,
1221,
1443,
2,
1357,
931,
931,
1298,
388,
1136,
1604,
428,
1240,
1698,
65,
1272,
128,
755,
79,
794,
1698,
1518,
1546,
1696,
448,
233,
1599,
1732,
1240,
110,
775,
483,
100,
1075,
346,
863,
1498
]
},
{
"word": "is",
"duration": 0.1,
"codes": [
631,
18,
679,
430,
176,
10,
52
]
}
]
}
================================================
FILE: default_speakers/remi.json
================================================
{
"text": "animal noral human being",
"words": [
{
"word": "animal",
"duration": 2.79,
"codes": [
1679,
1711,
714,
1588,
906,
725,
789,
456,
79,
230,
1127,
532,
200,
834,
29,
753,
1420,
595,
997,
557,
205,
488,
775,
63,
1520,
1600,
1394,
1811,
1715,
473,
805,
128,
502,
1353,
1636,
1832,
182,
381,
281,
1540,
748,
1341,
1744,
374,
1767,
182,
621,
495,
234,
909,
1383,
92,
1545,
1394,
1794,
1641,
319,
1452,
1240,
217,
1815,
388,
828,
1664,
184,
1239,
319,
1469,
1810,
36,
1019,
1451,
774,
1819,
1521,
761,
23,
1609,
273,
52,
1670,
524,
813,
806,
79,
1141,
1677,
138,
1409,
1468,
1633,
1573,
782,
1655,
1669,
1239,
458,
1495,
258,
544,
1532,
1567,
1627,
1641,
851,
1573,
1569,
265,
686,
72,
151,
342,
194,
75,
419,
342,
542,
419,
75,
342,
246,
75,
342,
246,
56,
161,
246,
442,
161,
56,
156,
420,
161,
75,
219,
194,
56,
156,
220,
453,
156,
1019,
490,
1415,
742,
1533,
412,
828,
138,
1487,
128,
660,
1339,
882,
154,
1533,
47,
312,
730,
1087,
764,
346,
1394,
179,
959,
1344,
324,
1457,
388,
57,
514,
1323,
631,
6,
479,
815,
1599,
384,
952,
1650,
57,
314,
320,
787,
1488,
147,
203,
1078,
192,
1663,
236,
1501,
270,
1280,
716,
631,
1584,
1605,
1779,
1239,
363,
1437,
430,
1554,
1069,
189,
319,
856,
143
]
},
{
"word": "noral",
"duration": 0.56,
"codes": [
1831,
201,
1674,
1707,
1807,
487,
1577,
1394,
1341,
412,
814,
205,
1633,
79,
1267,
1625,
315,
1649,
4,
780,
368,
592,
1633,
592,
1431,
1563,
599,
176,
10,
725,
1468,
76,
593,
714,
146,
974,
725,
549,
57,
1068,
1729,
52
]
},
{
"word": "human",
"duration": 0.82,
"codes": [
1552,
233,
298,
949,
1636,
380,
363,
1520,
1768,
85,
483,
876,
125,
153,
564,
200,
1221,
803,
1712,
117,
804,
688,
787,
1345,
592,
291,
472,
158,
132,
1827,
617,
157,
36,
1186,
1008,
324,
961,
644,
179,
931,
1400,
688,
1015,
488,
532,
500,
952,
945,
29,
1497,
529,
749,
1733,
439,
63,
1773,
1527,
1622,
728,
1613,
1274,
136
]
},
{
"word": "being",
"duration": 0.54,
"codes": [
546,
1287,
166,
315,
1678,
882,
1753,
1018,
1449,
1581,
298,
1710,
1799,
1772,
1406,
1538,
1728,
1657,
1778,
182,
921,
217,
1615,
133,
217,
1516,
1830,
844,
1584,
338,
1639,
644,
417,
774,
1724,
648,
749,
4,
315,
1497
]
}
]
}
================================================
FILE: default_speakers/saheed.json
================================================
{
"text": "Hello! My name is Saheed azeez and I am testing the audio feature",
"words": [
{
"word": "hello",
"duration": 2.38,
"codes": [
219,
244,
244,
167,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
453,
244,
219,
139,
966,
1099,
1299,
1433,
1128,
1266,
1517,
649,
196,
1731,
1405,
830,
1771,
964,
476,
1803,
584,
875,
1683,
986,
363,
1489,
465,
5,
1067,
606,
1590,
1397,
265,
1446,
1279,
799,
1491,
1367,
606,
1593,
1279,
360,
256,
1705,
1425,
58,
1210,
1357,
1379,
752,
1640,
837,
734,
1787,
1406,
1052,
1796,
686,
1446,
1716,
564,
595,
1716,
728,
847,
732,
935,
1253,
752,
1019,
1455,
564,
1492,
733,
1645,
1391,
728,
1501,
1822,
1339,
1677,
1456,
807,
1738,
710,
1381,
1292,
406,
1517,
1458,
761,
1361,
649,
17,
1367,
606,
1771,
1028,
464,
1309,
691,
1023,
1314,
692,
1373,
837,
442,
1683,
838,
476,
1475,
950,
136,
1309,
465,
17,
19,
765,
1553,
1305,
534,
1309,
666,
761,
1067,
442,
1704,
1128,
633,
1438,
1011,
406,
1489,
136,
1813,
1589,
763,
1489,
696,
643,
1305,
246,
406,
1421,
37
]
},
{
"word": "my",
"duration": 0.2,
"codes": [
1187,
1770,
646,
1174,
1771,
1192,
800,
310,
1318,
1500,
909,
1104,
1792,
1218,
1832
]
},
{
"word": "name",
"duration": 0.24,
"codes": [
875,
1583,
1632,
671,
1002,
905,
1073,
1294,
595,
1684,
1501,
1797,
850,
1761,
1751,
935,
1443,
1781
]
},
{
"word": "is",
"duration": 0.14,
"codes": [
1780,
1215,
1674,
1815,
1451,
1673,
1303,
1660,
1613,
1379,
1756
]
},
{
"word": "saheed",
"duration": 0.68,
"codes": [
1419,
1568,
1643,
1099,
1795,
970,
1184,
1498,
877,
1162,
902,
1537,
1192,
1565,
1472,
1109,
1225,
1321,
1453,
1654,
1274,
1811,
1695,
946,
1631,
1590,
1152,
820,
272,
1458,
1378,
240,
1421,
174,
925,
1126,
1346,
1600,
1716,
258,
1611,
442,
625,
1448,
246,
957,
226,
338,
1190,
921,
1505
]
},
{
"word": "azeez",
"duration": 0.8,
"codes": [
1195,
646,
1505,
1014,
250,
837,
729,
121,
1715,
1446,
1430,
1608,
1575,
1057,
1643,
1514,
1795,
893,
1718,
1383,
840,
1802,
426,
1414,
1573,
1784,
1285,
852,
1246,
896,
1744,
1299,
495,
1796,
1570,
1665,
505,
888,
1654,
343,
1120,
1474,
16,
1035,
505,
1699,
862,
692,
1623,
633,
566,
1037,
342,
950,
261,
729,
1317,
177,
1213,
1333
]
},
{
"word": "and",
"duration": 0.34,
"codes": [
908,
1203,
1683,
926,
1278,
564,
1067,
1003,
90,
459,
568,
272,
1117,
1396,
1411,
1233,
193,
1197,
970,
1065,
1611,
883,
1216,
1776,
747
]
},
{
"word": "i",
"duration": 0.06,
"codes": [
924,
1628,
988,
1116,
1388
]
},
{
"word": "am",
"duration": 0.18,
"codes": [
1199,
1188,
593,
953,
459,
272,
869,
1321,
145,
1306,
272,
406,
1479
]
},
{
"word": "testing",
"duration": 0.44,
"codes": [
237,
1003,
1638,
638,
1180,
1666,
811,
1178,
1565,
814,
1211,
1654,
1779,
1313,
1619,
1684,
1230,
419,
891,
28,
1231,
1379,
729,
1682,
338,
1468,
136,
1630,
1215,
251,
1464,
781,
598
]
},
{
"word": "the",
"duration": 0.22,
"codes": [
555,
692,
663,
1632,
905,
807,
1085,
752,
1433,
392,
921,
1820,
363,
987,
1328,
734,
1063
]
},
{
"word": "audio",
"duration": 0.34,
"codes": [
1294,
814,
1423,
1750,
747,
672,
651,
250,
1478,
37,
1760,
1021,
850,
58,
438,
953,
1668,
771,
729,
1456,
322,
591,
1474,
1440,
1170
]
},
{
"word": "feature",
"duration": 0.4,
"codes": [
332,
1333,
1146,
1025,
19,
501,
169,
1250,
734,
1629,
1383,
355,
1747,
584,
237,
1428,
240,
1298,
999,
1338,
1438,
1727,
987,
1455,
792,
932,
1199,
355,
1185,
772
]
}
]
}
================================================
FILE: default_speakers/tayo.json
================================================
{
"text": "and enjoy ourselves we need more parties let party start again now we know",
"words": [
{
"word": "and",
"duration": 0.5,
"codes": [
82,
1201,
329,
992,
908,
847,
925,
1666,
1057,
1266,
1448,
1737,
1251,
1031,
1759,
1459,
1094,
1750,
1739,
1521,
594,
1625,
732,
1326,
1095,
828,
239,
752,
1221,
1382,
705,
1716,
865,
1503,
478,
1692,
938
]
},
{
"word": "enjoy",
"duration": 0.4,
"codes": [
844,
192,
737,
344,
276,
138,
48,
1616,
28,
1530,
1550,
1383,
1712,
69,
1261,
547,
249,
1047,
500,
182,
63,
1445,
935,
865,
1478,
1670,
479,
116,
1674,
886
]
},
{
"word": "ourselves",
"duration": 0.7,
"codes": [
467,
1534,
901,
569,
1740,
882,
1579,
507,
276,
1296,
543,
399,
404,
1624,
1666,
153,
102,
1323,
1552,
65,
898,
1577,
757,
1446,
1022,
363,
124,
947,
1441,
581,
1677,
1269,
1525,
1170,
505,
1681,
1212,
1273,
1364,
1513,
1826,
1139,
1756,
639,
1450,
1810,
1638,
1644,
1669,
1519,
851,
1362,
1672
]
},
{
"word": "we",
"duration": 0.1,
"codes": [
875,
1558,
1249,
1445,
181,
738,
1641
]
},
{
"word": "need",
"duration": 0.14,
"codes": [
1603,
177,
195,
65,
1600,
104,
143,
1574,
1416,
160,
50
]
},
{
"word": "more",
"duration": 0.18,
"codes": [
48,
1597,
39,
1414,
74,
1192,
84,
1345,
748,
1269,
1672,
686,
1820,
1442
]
},
{
"word": "parties",
"duration": 0.56,
"codes": [
1640,
1030,
138,
147,
413,
110,
282,
1633,
1659,
1524,
176,
350,
137,
1004,
92,
1240,
1521,
1376,
502,
1558,
592,
473,
1021,
1805,
1346,
1393,
1759,
1786,
231,
1728,
117,
1366,
1754,
1073,
1786,
1354,
1532,
1572,
1754,
16,
257,
273
]
},
{
"word": "let",
"duration": 0.16,
"codes": [
1312,
961,
372,
212,
1253,
115,
656,
1374,
78,
1322,
1284,
343
]
},
{
"word": "party",
"duration": 0.24,
"codes": [
1572,
1662,
25,
390,
892,
212,
637,
576,
176,
1702,
640,
276,
52,
648,
577,
1240,
276,
155
]
},
{
"word": "start",
"duration": 0.3,
"codes": [
213,
356,
1603,
1284,
1442,
1599,
705,
82,
65,
764,
349,
370,
856,
1524,
1508,
209,
495,
1552,
50,
1588,
863,
63
]
},
{
"word": "again",
"duration": 0.3,
"codes": [
1267,
273,
298,
1409,
101,
1548,
733,
625,
1728,
1283,
286,
1645,
1363,
368,
153,
289,
716,
1756,
865,
1376,
688,
332,
731
]
},
{
"word": "now",
"duration": 0.44,
"codes": [
983,
385,
1002,
806,
1798,
95,
1776,
825,
1790,
737,
1595,
907,
932,
1786,
626,
831,
1823,
1680,
1780,
1502,
1206,
1078,
47,
829,
868,
69,
277,
429,
125,
132,
14,
1497,
444
]
},
{
"word": "we",
"duration": 1.32,
"codes": [
1692,
648,
481,
155,
483,
126,
1283,
12,
108,
429,
828,
128,
1161,
725,
155,
107,
1610,
228,
1492,
1560,
368,
1138,
810,
1572,
1562,
320,
112,
520,
52,
49,
1008,
1635,
1728,
1523,
62,
190,
648,
592,
384,
969,
1441,
519,
1536,
1571,
1587,
1539,
15,
1156,
376,
1022,
642,
483,
1794,
1335,
1712,
1449,
529,
1558,
1463,
1559,
1706,
1460,
249,
1308,
293,
529,
841,
201,
1256,
931,
132,
1173,
479,
286,
1075,
153,
13,
1503,
398,
415,
432,
7,
183,
103,
409,
736,
15,
940,
1459,
15,
1631,
1580,
1773,
624,
1417,
926,
531,
1159,
1257
]
},
{
"word": "know",
"duration": 0.44,
"codes": [
777,
1240,
446,
303,
153,
263,
1402,
317,
1365,
481,
848,
1280,
354,
1415,
245,
408,
462,
466,
253,
943,
472,
215,
143,
519,
202,
1389,
1608,
714,
1599,
399,
944,
124,
844
]
}
]
}
================================================
FILE: default_speakers/umar.json
================================================
{
"text": "that i'd like to share with everybody in the world yes sometimes you go all the way",
"words": [
{
"word": "that",
"duration": 0.48,
"codes": [
519,
848,
1374,
416,
940,
1445,
416,
753,
1616,
774,
803,
1697,
1541,
1047,
200,
462,
1417,
1313,
1296,
184,
1396,
1568,
1416,
1444,
1631,
1463,
702,
1831,
1564,
1374,
1580,
1643,
1681,
1660,
1124,
1720
]
},
{
"word": "id",
"duration": 0.38,
"codes": [
4,
705,
1534,
1290,
1661,
302,
1798,
844,
197,
1027,
1606,
903,
1414,
794,
871,
882,
941,
1310,
871,
1247,
1140,
1247,
718,
1422,
1509,
1678,
1093,
1734
]
},
{
"word": "like",
"duration": 0.18,
"codes": [
647,
1824,
474,
1111,
599,
221,
1435,
822,
1409,
1717,
1748,
1550,
1738,
1717
]
},
{
"word": "to",
"duration": 0.14,
"codes": [
1535,
231,
1794,
1553,
1351,
1365,
1296,
1781,
1599,
1082
]
},
{
"word": "share",
"duration": 0.18,
"codes": [
1737,
0,
979,
1688,
546,
1807,
319,
252,
1805,
714,
580,
1524,
798,
1779
]
},
{
"word": "with",
"duration": 0.14,
"codes": [
1698,
702,
966,
1461,
127,
1681,
85,
1741,
1588,
718
]
},
{
"word": "everybody",
"duration": 0.4,
"codes": [
1600,
806,
1770,
1078,
1727,
679,
1569,
1452,
1685,
774,
1598,
1382,
1520,
1786,
1702,
1607,
1747,
828,
1553,
983,
1103,
882,
1427,
1679,
1613,
1636,
1433,
519,
853,
1451
]
},
{
"word": "in",
"duration": 0.06,
"codes": [
1369,
1654,
1581,
1600,
1452
]
},
{
"word": "the",
"duration": 0.12,
"codes": [
1241,
1769,
678,
1751,
1280,
1711,
1663,
1772,
1655
]
},
{
"word": "world",
"duration": 0.74,
"codes": [
973,
1231,
1015,
1052,
1415,
721,
1822,
825,
1076,
1431,
1357,
1389,
744,
1263,
1525,
1794,
319,
1678,
1732,
1395,
1695,
1827,
1059,
1719,
1675,
1714,
1635,
1466,
1730,
1750,
1395,
1525,
1827,
1313,
1440,
1447,
1292,
1762,
1226,
1418,
1750,
719,
1549,
1761,
1459,
1717,
1800,
1404,
1702,
1795,
1711,
1789,
1808,
1759,
385,
415
]
},
{
"word": "yes",
"duration": 0.32,
"codes": [
302,
1704,
485,
983,
234,
63,
462,
483,
82,
827,
999,
1143,
102,
1655,
117,
1619,
519,
1217,
1518,
1476,
333,
1660,
1238,
1679
]
},
{
"word": "sometimes",
"duration": 0.58,
"codes": [
1287,
546,
1552,
1736,
1647,
836,
575,
354,
1156,
1264,
1194,
1761,
1629,
1452,
1241,
1394,
856,
1313,
1653,
736,
556,
1387,
1824,
966,
373,
1424,
1342,
221,
580,
1412,
940,
626,
1797,
858,
972,
1525,
1744,
738,
1695,
1542,
1604,
1394,
1627
]
},
{
"word": "you",
"duration": 0.12,
"codes": [
1460,
546,
1427,
1451,
1081,
1760,
1463,
1628,
1692
]
},
{
"word": "go",
"duration": 0.26,
"codes": [
1521,
1734,
753,
770,
1640,
1757,
297,
462,
702,
1826,
1440,
1828,
1747,
1651,
1729,
1087,
580,
1698,
1194,
1308
]
},
{
"word": "all",
"duration": 0.42,
"codes": [
863,
610,
429,
443,
1087,
183,
782,
613,
222,
1047,
1492,
154,
955,
429,
443,
613,
983,
328,
382,
359,
341,
217,
456,
289,
1324,
714,
756,
369,
211,
127,
1827,
1563
]
},
{
"word": "the",
"duration": 0.12,
"codes": [
1686,
949,
1296,
829,
1463,
1731,
1222,
1353,
1780
]
},
{
"word": "way",
"duration": 0.18,
"codes": [
1263,
890,
683,
289,
217,
326,
335,
1059,
1204,
213,
1340,
289,
191
]
}
]
}
================================================
FILE: default_speakers/zainab.json
================================================
{
"text": "mama giver her because she gave so",
"words": [
{
"word": "mama",
"duration": 1.46,
"codes": [
1734,
1812,
1759,
1721,
1765,
1769,
1805,
1800,
1734,
1380,
1706,
1724,
1695,
1769,
1772,
1689,
1511,
339,
1077,
1492,
1494,
1353,
890,
753,
29,
607,
1812,
1310,
1326,
1497,
818,
1716,
1776,
1155,
1645,
1545,
1371,
1454,
1205,
1464,
703,
1096,
1285,
1811,
1494,
738,
1248,
1725,
952,
230,
1415,
1691,
1718,
41,
1685,
1783,
1092,
1346,
954,
776,
702,
1157,
1152,
1768,
572,
1025,
1750,
1231,
900,
1764,
1246,
1572,
1711,
1534,
1320,
1389,
197,
1584,
1019,
1576,
1027,
1402,
506,
1402,
617,
1490,
1358,
770,
1666,
1025,
921,
1658,
830,
1062,
1598,
1095,
1174,
1680,
1501,
1332,
1827,
1588,
231,
1633,
1591,
736,
1825,
1696,
1614
]
},
{
"word": "giver",
"duration": 0.36,
"codes": [
1346,
404,
1270,
1389,
1363,
1426,
1008,
473,
1341,
1604,
1773,
385,
1685,
736,
1778,
1577,
1189,
1830,
973,
1192,
1624,
1766,
1344,
1542,
1463,
1253,
1554
]
},
{
"word": "her",
"duration": 1.89,
"codes": [
1828,
1287,
1520,
1671,
1546,
932,
1367,
1176,
953,
1225,
1508,
1822,
1642,
381,
1003,
1288,
355,
627,
256,
1231,
822,
863,
1826,
788,
1786,
1796,
1585,
1266,
1236,
1157,
476,
1425,
1814,
1488,
1763,
343,
385,
1419,
1413,
1537,
1465,
1413,
1689,
975,
27,
1804,
1766,
1750,
1612,
1293,
1613,
1629,
1011,
1572,
1708,
1669,
1440,
1598,
1514,
1773,
1166,
1769,
923,
1792,
1764,
1491,
1807,
1768,
1157,
1808,
1491,
1721,
1816,
1783,
901,
1468,
1824,
1743,
1801,
1745,
1656,
1425,
1745,
1775,
1807,
714,
1755,
1704,
1661,
1493,
776,
1783,
416,
1670,
1406,
1769,
362,
1636,
1464,
1651,
1403,
1800,
1426,
1831,
1827,
1160,
1759,
1720,
1651,
1762,
1331,
1746,
1433,
1466,
1023,
1425,
1742,
486,
1771,
1816,
1301,
1583,
320,
1300,
315,
52,
1217,
67,
502,
1485,
848,
1734,
1387,
1783,
1626,
920,
361,
1715,
1657,
1560,
85,
1562
]
},
{
"word": "because",
"duration": 0.48,
"codes": [
1756,
844,
245,
1310,
312,
344,
1734,
1319,
1722,
1386,
1230,
461,
1344,
847,
658,
1078,
1554,
537,
987,
848,
1055,
840,
1710,
736,
1679,
213,
844,
731,
631,
1638,
166,
858,
1535,
50,
1651,
713
]
},
{
"word": "she",
"duration": 0.38,
"codes": [
556,
1735,
654,
1524,
1769,
1387,
639,
1463,
1625,
1726,
1664,
1691,
1531,
1603,
1833,
121,
1627,
1757,
736,
1583,
1684,
1741,
1831,
1791,
1034,
1807,
1338,
1737
]
},
{
"word": "gave",
"duration": 0.76,
"codes": [
1790,
430,
1310,
399,
599,
1542,
1394,
1075,
834,
428,
1015,
249,
362,
945,
108,
1308,
29,
362,
1766,
448,
1370,
197,
298,
1353,
1566,
1485,
1341,
1544,
1468,
1366,
849,
1584,
1441,
1696,
1610,
1702,
702,
1508,
1653,
1508,
1535,
502,
1485,
232,
648,
863,
631,
348,
372,
129,
1296,
253,
1599,
1364,
315,
920,
18,
183
]
},
{
"word": "so",
"duration": 0.14,
"codes": [
428,
372,
15,
202,
286,
1344,
714,
966,
1341,
184
]
}
]
}
================================================
FILE: default_speakers_local/hausa_female1.json
================================================
{
"text": "Idan hira tayi \u0257a\u0257i bana son na tashi.",
"words": [
{
"word": "idan",
"duration": "0.52",
"codes": [
165,
338,
781,
661,
601,
691,
1154,
762,
691,
523,
641,
378,
1464,
38,
1280,
243,
1784,
195,
5,
1679,
77,
530,
1527,
270,
243,
374,
200,
157,
152,
228,
768,
743,
104,
221,
968,
479,
321,
1679,
1279
]
},
{
"word": "hira",
"duration": "0.38",
"codes": [
1587,
1544,
683,
92,
1255,
46,
106,
636,
320,
53,
249,
123,
1140,
1290,
93,
553,
0,
1192,
210,
587,
1184,
764,
215,
221,
2,
1115,
1079,
1033
]
},
{
"word": "tayi",
"duration": "0.38",
"codes": [
447,
1292,
198,
50,
1439,
1191,
1399,
106,
880,
844,
306,
466,
74,
260,
152,
723,
723,
687,
306,
195,
648,
466,
30,
1110,
637,
384,
1131,
342,
392
]
},
{
"word": "dadi",
"duration": "0.38",
"codes": [
751,
412,
212,
306,
388,
589,
446,
479,
880,
768,
467,
699,
128,
665,
882,
908,
171,
1146,
1297,
687,
901,
1110,
153,
386,
1330,
1283,
1181,
1070,
766
]
},
{
"word": "bana",
"duration": "0.46",
"codes": [
534,
1440,
1102,
1194,
474,
252,
39,
367,
116,
212,
36,
115,
76,
1173,
931,
1285,
1630,
678,
1087,
208,
1055,
441,
545,
324,
1192,
179,
1147,
897,
1387,
1283,
10,
1,
654,
863,
103
]
},
{
"word": "son",
"duration": "0.22",
"codes": [
198,
507,
1477,
915,
215,
267,
1232,
1041,
569,
1596,
1759,
229,
901,
1774,
1487,
51
]
},
{
"word": "na",
"duration": "0.16",
"codes": [
251,
243,
965,
215,
135,
711,
105,
1350,
1556,
226,
459,
68
]
},
{
"word": "tashi",
"duration": "0.42",
"codes": [
20,
502,
610,
179,
711,
800,
424,
352,
102,
569,
67,
262,
855,
413,
63,
701,
1719,
262,
383,
1166,
358,
1331,
596,
383,
1351,
96,
1170,
1061,
1059,
1392,
328,
1471
]
}
]
}
================================================
FILE: default_speakers_local/hausa_female2.json
================================================
{
"text": "Idan hira tayi \u0257a\u0257i bana son na tashi.",
"words": [
{
"word": "idan",
"duration": "0.52",
"codes": [
165,
338,
781,
661,
601,
691,
1154,
762,
691,
523,
641,
378,
1464,
38,
1280,
243,
1784,
195,
5,
1679,
77,
530,
1527,
270,
243,
374,
200,
157,
152,
228,
768,
743,
104,
221,
968,
479,
321,
1679,
1279
]
},
{
"word": "hira",
"duration": "0.38",
"codes": [
1587,
1544,
683,
92,
1255,
46,
106,
636,
320,
53,
249,
123,
1140,
1290,
93,
553,
0,
1192,
210,
587,
1184,
764,
215,
221,
2,
1115,
1079,
1033
]
},
{
"word": "tayi",
"duration": "0.38",
"codes": [
447,
1292,
198,
50,
1439,
1191,
1399,
106,
880,
844,
306,
466,
74,
260,
152,
723,
723,
687,
306,
195,
648,
466,
30,
1110,
637,
384,
1131,
342,
392
]
},
{
"word": "dadi",
"duration": "0.38",
"codes": [
751,
412,
212,
306,
388,
589,
446,
479,
880,
768,
467,
699,
128,
665,
882,
908,
171,
1146,
1297,
687,
901,
1110,
153,
386,
1330,
1283,
1181,
1070,
766
]
},
{
"word": "bana",
"duration": "0.46",
"codes": [
534,
1440,
1102,
1194,
474,
252,
39,
367,
116,
212,
36,
115,
76,
1173,
931,
1285,
1630,
678,
1087,
208,
1055,
441,
545,
324,
1192,
179,
1147,
897,
1387,
1283,
10,
1,
654,
863,
103
]
},
{
"word": "son",
"duration": "0.22",
"codes": [
198,
507,
1477,
915,
215,
267,
1232,
1041,
569,
1596,
1759,
229,
901,
1774,
1487,
51
]
},
{
"word": "na",
"duration": "0.16",
"codes": [
251,
243,
965,
215,
135,
711,
105,
1350,
1556,
226,
459,
68
]
},
{
"word": "tashi",
"duration": "0.42",
"codes": [
20,
502,
610,
179,
711,
800,
424,
352,
102,
569,
67,
262,
855,
413,
63,
701,
1719,
262,
383,
1166,
358,
1331,
596,
383,
1351,
96,
1170,
1061,
1059,
1392,
328,
1471
]
}
]
}
================================================
FILE: default_speakers_local/hausa_male1.json
================================================
{
"text": "Eh, mun za\u0253i yin wasan kwaikwayo don nuna al'adunmu yayin ranar al'ada.",
"words": [
{
"word": "eh",
"duration": "0.86",
"codes": [
165,
226,
1145,
284,
77,
187,
459,
77,
691,
278,
643,
247,
156,
204,
89,
1247,
52,
1350,
433,
812,
328,
553,
648,
602,
1075,
243,
557,
507,
645,
352,
29,
451,
83,
787,
10,
1000,
1791,
620,
188,
1681,
447,
752,
1405,
1070,
861,
1142,
163,
1293,
674,
250,
724,
259,
624,
676,
259,
1114,
526,
199,
724,
163,
168,
447,
663,
1471
]
},
{
"word": "mun",
"duration": "0.22",
"codes": [
651,
617,
1411,
389,
1329,
491,
1680,
1053,
618,
488,
1494,
1224,
1259,
1317,
1457,
508,
1341
]
},
{
"word": "zabi",
"duration": "0.40",
"codes": [
1777,
0,
1794,
83,
74,
462,
1170,
1212,
159,
1361,
384,
373,
218,
613,
1583,
1311,
188,
1466,
338,
405,
1321,
307,
1161,
1623,
293,
1644,
858,
703,
911,
326
]
},
{
"word": "yin",
"duration": "0.20",
"codes": [
1715,
870,
341,
1711,
1542,
429,
1565,
326,
1771,
966,
91,
614,
620,
647,
1755
]
},
{
"word": "wasan",
"duration": "0.44",
"codes": [
1070,
520,
973,
754,
83,
997,
1253,
982,
359,
537,
1115,
1677,
1358,
1250,
1403,
1637,
881,
382,
1754,
589,
1131,
88,
1256,
988,
83,
672,
644,
847,
322,
983,
1305,
31,
967
]
},
{
"word": "kwaikwayo",
"duration": "0.58",
"codes": [
1071,
1003,
1811,
684,
1210,
553,
1535,
491,
398,
222,
315,
439,
205,
174,
1742,
1373,
259,
1185,
1787,
516,
1440,
646,
1402,
267,
1677,
553,
344,
429,
202,
389,
782,
662,
388,
177,
553,
1413,
491,
554,
222,
759,
111,
1719,
1305,
437
]
},
{
"word": "don",
"duration": "0.24",
"codes": [
144,
824,
90,
637,
439,
138,
593,
609,
617,
1247,
444,
793,
600,
1425,
1379,
283,
995,
1804
]
},
{
"word": "nuna",
"duration": "0.40",
"codes": [
389,
669,
1804,
506,
1668,
1621,
341,
913,
1495,
1819,
112,
647,
743,
1612,
506,
1320,
1648,
106,
1107,
579,
326,
140,
1220,
936,
661,
729,
1183,
441,
797,
309
]
},
{
"word": "aladunmu",
"duration": "0.76",
"codes": [
1260,
179,
1240,
68,
753,
807,
1808,
894,
140,
791,
1486,
1276,
1471,
1132,
573,
797,
1307,
271,
632,
1059,
699,
816,
282,
908,
1240,
41,
144,
1721,
322,
237,
1284,
1312,
1444,
521,
593,
753,
506,
1024,
439,
1142,
1790,
478,
1164,
953,
1727,
1078,
564,
1665,
482,
976,
910,
727,
297,
677,
297,
507,
1157
]
}
]
}
================================================
FILE: default_speakers_local/hausa_male2.json
================================================
{
"text": "Audu ya hau jirgi a Kaduna.",
"words": [
{
"word": "audu",
"duration": "0.75",
"codes": [
165,
167,
68,
567,
156,
351,
337,
156,
351,
337,
337,
219,
584,
156,
762,
334,
185,
156,
334,
762,
156,
337,
612,
219,
691,
185,
156,
204,
862,
777,
589,
173,
550,
128,
489,
182,
74,
255,
427,
1554,
945,
289,
79,
875,
442,
1664,
464,
230,
1500,
181,
1152,
286,
103,
662,
125
]
},
{
"word": "ya",
"duration": "0.22",
"codes": [
201,
1332,
67,
1041,
248,
901,
352,
969,
642,
105,
215,
411,
408,
1235,
1212,
1264,
653
]
},
{
"word": "hau",
"duration": "0.22",
"codes": [
1083,
913,
1026,
1295,
1473,
1399,
41,
629,
1081,
623,
536,
890,
1554,
384,
1664,
921,
325
]
},
{
"word": "jirgi",
"duration": "0.48",
"codes": [
486,
1536,
597,
1088,
1743,
1286,
340,
949,
116,
1441,
1550,
28,
1073,
973,
233,
1319,
733,
465,
1152,
1644,
773,
1651,
175,
1281,
1563,
11,
1773,
1323,
30,
10,
424,
293,
1437,
1484,
1072,
370
]
},
{
"word": "a",
"duration": "0.10",
"codes": [
159,
697,
53,
1040,
1256,
264,
710,
1251
]
},
{
"word": "kaduna",
"duration": "0.44",
"codes": [
1203,
764,
1473,
1156,
400,
212,
1698,
1217,
145,
1569,
1151,
1056,
1700,
1527,
629,
1747,
1350,
738,
1734,
55,
1595,
890,
55,
1364,
203,
281,
952,
1234,
452,
93,
1036,
565,
969
]
}
]
}
================================================
FILE: default_speakers_local/igbo_female1.json
================================================
{
"text": "Codeine na-agba ah\u1ee5 \u1ecbnweta.",
"words": [
{
"word": "codeine",
"duration": "0.68",
"codes": [
165,
336,
1359,
661,
199,
379,
585,
1742,
210,
303,
388,
412,
1772,
794,
1607,
467,
622,
201,
575,
447,
319,
352,
234,
1797,
405,
1703,
1831,
1163,
1826,
1152,
563,
696,
1284,
157,
100,
402,
315,
1036,
1298,
592,
1177,
665,
7,
794,
509,
192,
1092,
821,
1022,
834,
132
]
},
{
"word": "na",
"duration": "0.20",
"codes": [
1764,
1340,
1394,
1341,
146,
303,
1102,
172,
366,
1263,
708,
164,
836,
1424,
81
]
},
{
"word": "agba",
"duration": "0.76",
"codes": [
994,
841,
816,
744,
1743,
1051,
1023,
1556,
331,
1706,
160,
160,
403,
142,
565,
723,
140,
874,
339,
186,
1229,
309,
461,
1015,
81,
297,
1206,
1041,
585,
960,
1007,
223,
578,
1142,
242,
1215,
261,
857,
1390,
334,
837,
735,
334,
649,
563,
544,
672,
316,
544,
630,
337,
601,
978,
956,
642,
552,
164
]
},
{
"word": "ahu",
"duration": "0.72",
"codes": [
254,
1014,
571,
208,
1388,
393,
467,
1453,
402,
361,
1464,
665,
1468,
1643,
858,
1663,
1381,
1596,
1420,
1235,
1287,
1483,
277,
1753,
949,
483,
1554,
787,
1407,
1100,
1035,
578,
591,
504,
460,
712,
838,
516,
620,
460,
223,
928,
1422,
1513,
1699,
513,
896,
242,
313,
1634,
1237,
249,
153,
1056,
508
]
},
{
"word": "inweta",
"duration": "0.44",
"codes": [
1391,
416,
182,
488,
500,
1544,
1237,
577,
1813,
860,
749,
679,
51,
682,
506,
79,
49,
254,
987,
348,
1418,
1688,
1735,
1658,
544,
16,
1777,
309,
25,
1317,
146,
1333,
147
]
}
]
}
================================================
FILE: default_speakers_local/igbo_female2.json
================================================
{
"text": "Umunwoke n\u1ecd na \u1ecct\u1ee5t\u1ee5 \u1ecdr\u1ee5 \u1ecdch\u1ecbch\u1ecb",
"words": [
{
"word": "umunwoke",
"duration": "0.79",
"codes": [
156,
1807,
1225,
976,
950,
1205,
957,
669,
838,
1142,
781,
666,
1151,
1219,
1044,
42,
51,
1712,
893,
963,
438,
30,
529,
792,
1769,
102,
834,
1398,
1258,
1460,
1407,
1265,
1615,
682,
455,
488,
395,
376,
1136,
1391,
79,
1052,
1747,
1739,
351,
1421,
423,
344,
253,
1098,
479,
1077,
243,
364,
1812,
315,
1073,
832
]
},
{
"word": "no",
"duration": "0.16",
"codes": [
175,
1407,
458,
860,
1025,
65,
1443,
1482,
371,
1257,
890,
1161,
449
]
},
{
"word": "na",
"duration": "0.10",
"codes": [
1650,
639,
322,
1596,
741,
987,
1452
]
},
{
"word": "otutu",
"duration": "0.38",
"codes": [
371,
1107,
1444,
794,
1517,
504,
930,
767,
990,
507,
1314,
1766,
1073,
1229,
1525,
1664,
460,
896,
1230,
640,
507,
919,
1104,
1320,
1022,
234,
520,
583,
959
]
},
{
"word": "oru",
"duration": "0.28",
"codes": [
324,
943,
65,
613,
709,
128,
384,
681,
1071,
1732,
1392,
616,
706,
679,
510,
934,
37,
76,
1032,
1618,
944
]
},
{
"word": "ochichi",
"duration": "0.44",
"codes": [
1234,
1267,
295,
1278,
891,
1652,
1142,
435,
356,
599,
70,
517,
1303,
788,
1314,
57,
1700,
1790,
432,
1495,
435,
823,
1583,
350,
290,
656,
70,
1074,
1104,
911,
1297,
1708,
1826
]
}
]
}
================================================
FILE: default_speakers_local/igbo_male2.json
================================================
{
"text": "Any\u1ecb na-eji nkw\u1ee5 n'ihu na-eme fan aka",
"words": [
{
"word": "anyi",
"duration": "0.79",
"codes": [
165,
226,
672,
278,
1279,
924,
1648,
1079,
1010,
1321,
869,
964,
1118,
964,
691,
1033,
964,
762,
981,
772,
630,
967,
676,
676,
460,
567,
680,
301,
334,
981,
301,
334,
981,
316,
316,
316,
223,
1007,
571,
524,
402,
147,
367,
402,
303,
182,
1729,
510,
914,
293,
1636,
683,
500,
1369,
451,
756,
1339,
1619
]
},
{
"word": "na",
"duration": "0.12",
"codes": [
1756,
593,
1446,
48,
67,
96,
759,
488,
69
]
},
{
"word": "eji",
"duration": "0.26",
"codes": [
367,
890,
357,
966,
654,
41,
1478,
1637,
1381,
654,
330,
844,
372,
1147,
202,
206,
148,
455,
50,
592
]
},
{
"word": "nkwu",
"duration": "0.28",
"codes": [
506,
515,
1363,
1663,
1464,
1383,
1770,
1251,
1639,
1705,
1634,
1464,
583,
1008,
1384,
557,
1002,
716,
952,
1552,
506
]
},
{
"word": "nihu",
"duration": "0.36",
"codes": [
1366,
1650,
716,
890,
1494,
189,
687,
439,
15,
45,
297,
48,
33,
335,
1591,
1560,
1574,
1368,
1069,
1394,
1166,
1457,
109,
143,
1574,
1663,
286
]
},
{
"word": "na",
"duration": "0.14",
"codes": [
1748,
1454,
1238,
407,
148,
30,
49,
789,
488,
137,
1166
]
},
{
"word": "eme",
"duration": "0.32",
"codes": [
537,
471,
1136,
1296,
1284,
217,
1516,
593,
704,
1002,
433,
205,
263,
1247,
665,
428,
269,
22,
519,
1400,
400,
1400,
1171,
493
]
},
{
"word": "fan",
"duration": "0.40",
"codes": [
1212,
911,
640,
1265,
386,
352,
102,
252,
642,
1182,
985,
115,
730,
347,
173,
1676,
794,
363,
1217,
1388,
736,
843,
1422,
660,
1160,
474,
1403,
142,
1278,
147
]
},
{
"word": "aka",
"duration": "0.24",
"codes": [
1492,
402,
1280,
595,
1732,
1697,
838,
1809,
1199,
724,
337,
516,
948,
1700,
1129,
901,
934,
1110
]
}
]
}
================================================
FILE: default_speakers_local/yoruba_female1.json
================================================
{
"text": "Kulikuli j\u1eb9\u0301 \u01f9kan \u00ecpanu t\u00ed w\u00f3\u0323n \u1e63e n\u00edpa l\u00edlo \u1eb9\u0300p\u00e0, p\u1eb9lu or\u00eds\u00ec\u00edr\u00eds\u00ec\u00ed \u01f9kan",
"words": [
{
"word": "kulikuli",
"duration": "0.50",
"codes": [
156,
1777,
479,
1086,
243,
127,
170,
1275,
1470,
392,
278,
837,
1142,
284,
1411,
1742,
1280,
87,
898,
228,
67,
1499,
1568,
1035,
978,
157,
1078,
243,
1708,
170,
1498,
346,
344,
526,
1039,
316,
526
]
},
{
"word": "je",
"duration": "0.28",
"codes": [
1570,
1290,
654,
328,
816,
270,
402,
271,
76,
43,
1259,
303,
371,
1077,
560,
1117,
1108,
1110,
1481,
691,
1825
]
},
{
"word": "nkan",
"duration": "0.26",
"codes": [
1465,
1312,
538,
1807,
1152,
27,
20,
379,
1378,
1505,
84,
959,
756,
107,
949,
996,
1358,
1286,
755,
1686
]
},
{
"word": "ipanu",
"duration": "0.54",
"codes": [
371,
1224,
458,
1601,
241,
247,
620,
423,
584,
905,
411,
1209,
309,
88,
1511,
164,
552,
1104,
140,
737,
1699,
595,
1257,
544,
1733,
169,
1339,
1830,
123,
1048,
1378,
1817,
775,
1093,
669,
1663,
464,
1536,
696,
1120,
781
]
},
{
"word": "ti",
"duration": "0.22",
"codes": [
724,
1120,
1250,
885,
432,
1556,
1803,
759,
234,
1104,
1264,
205,
892,
1223,
1051,
1141
]
},
{
"word": "won",
"duration": "0.26",
"codes": [
205,
1004,
1107,
386,
951,
53,
339,
1186,
664,
874,
1245,
547,
1320,
918,
1363,
1638,
654,
279,
1040,
739
]
},
{
"word": "se",
"duration": "0.22",
"codes": [
1082,
878,
760,
1094,
973,
656,
142,
10,
170,
1744,
170,
495,
2,
379,
725,
1816
]
},
{
"word": "nipa",
"duration": "0.36",
"codes": [
963,
1436,
49,
43,
386,
1731,
537,
121,
496,
666,
423,
668,
851,
811,
737,
25,
260,
1313,
300,
303,
951,
1153,
172,
589,
1831,
1088,
378
]
},
{
"word": "lilo",
"duration": "0.30",
"codes": [
451,
1801,
1800,
967,
1313,
49,
1814,
659,
858,
534,
1217,
727,
609,
651,
1411,
688,
321,
47,
1271,
79,
362,
816,
157
]
},
{
"word": "epa",
"duration": "0.40",
"codes": [
1272,
497,
1192,
67,
986,
54,
351,
423,
1154,
561,
584,
417,
209,
1017,
424,
1122,
25,
1191,
475,
140,
1184,
730,
1459,
1266,
379,
799,
567,
460,
379,
676
]
},
{
"word": "pelu",
"duration": "0.28",
"codes": [
381,
926,
433,
811,
76,
774,
1179,
380,
1668,
1646,
1364,
1446,
1241,
1503,
1384,
902,
1073,
443,
74,
1015,
1107
]
},
{
"word": "orisiirisii",
"duration": "0.64",
"codes": [
51,
1047,
367,
674,
1117,
734,
gitextract_5ttmyi3a/ ├── README.md ├── __init__.py ├── audiotokenizer.py ├── default_speakers/ │ ├── azeez.json │ ├── chinenye.json │ ├── emma.json │ ├── idera.json │ ├── joke.json │ ├── jude.json │ ├── onye.json │ ├── osagie.json │ ├── regina.json │ ├── remi.json │ ├── saheed.json │ ├── tayo.json │ ├── umar.json │ └── zainab.json ├── default_speakers_local/ │ ├── hausa_female1.json │ ├── hausa_female2.json │ ├── hausa_male1.json │ ├── hausa_male2.json │ ├── igbo_female1.json │ ├── igbo_female2.json │ ├── igbo_male2.json │ ├── yoruba_female1.json │ ├── yoruba_female2.json │ ├── yoruba_male1.json │ ├── yoruba_male2.json │ └── yoruba_male3.json ├── notebooks/ │ ├── Merge_datasets.ipynb │ ├── Merge_datasets_local (1).ipynb │ ├── Yoruba_prepare_data_naij (2).ipynb │ ├── train_YarnGPT.ipynb │ └── train_YarnGPT_local.ipynb ├── python-wrapper/ │ ├── README.md │ ├── audiotokenizer.py │ ├── default_speakers/ │ │ ├── .ipynb_checkpoints/ │ │ │ ├── Yoruba_prepare_data_naij (2)-checkpoint.ipynb │ │ │ ├── emma-checkpoint.json │ │ │ ├── idera-checkpoint.json │ │ │ └── onye-checkpoint.json │ │ ├── Yoruba_prepare_data_naij (2).ipynb │ │ ├── chinenye.json │ │ ├── emma.json │ │ ├── idera.json │ │ ├── joke.json │ │ ├── jude.json │ │ ├── onye.json │ │ ├── osagie.json │ │ ├── regina.json │ │ ├── remi.json │ │ ├── tayo.json │ │ └── umar.json │ ├── pyproject.toml │ ├── requirements.txt │ └── yarngpt/ │ ├── __init__.py │ └── core.py └── requirements.txt
SYMBOL INDEX (59 symbols across 3 files)
FILE: audiotokenizer.py
class AudioTokenizer (line 14) | class AudioTokenizer:
method __init__ (line 16) | def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenize...
method get_speaker_path (line 44) | def get_speaker_path(self,speaker_name):
method load_speaker (line 47) | def load_speaker(self, path: str):
method load_default_speaker (line 51) | def load_default_speaker(self, name: str):
method process_text (line 57) | def process_text(self, text: str):
method create_audio_prompt (line 65) | def create_audio_prompt(self,words: list) -> str:
method create_prompt (line 74) | def create_prompt(self,text,speaker_name="idera"):
method tokenize_prompt (line 91) | def tokenize_prompt(self, prompt):
method get_audio (line 101) | def get_audio(self,discrete_code):
method extract_integers (line 108) | def extract_integers(self,s):
method get_codes (line 114) | def get_codes(self, output):
class AudioTokenizerForLocal (line 120) | class AudioTokenizerForLocal(AudioTokenizer):
method __init__ (line 122) | def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenize...
method process_text (line 148) | def process_text(self, text: str):
method create_prompt (line 156) | def create_prompt(self,text,lang,speaker_name=None):
class AudioTokenizerV2 (line 184) | class AudioTokenizerV2(AudioTokenizer):
method __init__ (line 186) | def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenize...
method process_text (line 224) | def process_text(self, text: str):
method get_speaker_path (line 232) | def get_speaker_path(self,speaker_name,dir):
method load_speaker (line 235) | def load_speaker(self, path: str):
method load_default_speaker (line 239) | def load_default_speaker(self, name: str,dir: str):
method create_prompt (line 244) | def create_prompt(self,text,lang,speaker_name=None):
method replace_tokens (line 276) | def replace_tokens(text):
method resample (line 281) | def resample(self,audio: np.ndarray, sr: int, target_sr: int):
method quantize_wavtokenizer (line 289) | def quantize_wavtokenizer(self, path):
method create_asr_prompt (line 303) | def create_asr_prompt(self,audio_path):
method get_asr_results (line 314) | def get_asr_results(self,output):
FILE: python-wrapper/audiotokenizer.py
class AudioTokenizer (line 14) | class AudioTokenizer:
method __init__ (line 16) | def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenize...
method get_speaker_path (line 44) | def get_speaker_path(self,speaker_name):
method load_speaker (line 47) | def load_speaker(self, path: str):
method load_default_speaker (line 51) | def load_default_speaker(self, name: str):
method process_text (line 57) | def process_text(self, text: str):
method create_audio_prompt (line 65) | def create_audio_prompt(self,words: list) -> str:
method create_prompt (line 74) | def create_prompt(self,text,speaker_name="idera"):
method tokenize_prompt (line 91) | def tokenize_prompt(self, prompt):
method get_audio (line 101) | def get_audio(self,discrete_code):
method extract_integers (line 108) | def extract_integers(self,s):
method get_codes (line 114) | def get_codes(self, output):
class AudioTokenizerForLocal (line 120) | class AudioTokenizerForLocal(AudioTokenizer):
method __init__ (line 122) | def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenize...
method process_text (line 148) | def process_text(self, text: str):
method create_prompt (line 156) | def create_prompt(self,text,lang,speaker_name=None):
class AudioTokenizerV2 (line 184) | class AudioTokenizerV2(AudioTokenizer):
method __init__ (line 186) | def __init__(self,tokenizer_path,wav_tokenizer_model_path,wav_tokenize...
method process_text (line 224) | def process_text(self, text: str):
method get_speaker_path (line 232) | def get_speaker_path(self,speaker_name,dir):
method load_speaker (line 235) | def load_speaker(self, path: str):
method load_default_speaker (line 239) | def load_default_speaker(self, name: str,dir: str):
method create_prompt (line 244) | def create_prompt(self,text,lang,speaker_name=None):
method replace_tokens (line 276) | def replace_tokens(text):
method resample (line 281) | def resample(self,audio: np.ndarray, sr: int, target_sr: int):
method quantize_wavtokenizer (line 289) | def quantize_wavtokenizer(self, path):
method load_asr_prompt (line 301) | def load_asr_prompt(self,audio_path):
method get_asr_results (line 312) | def get_asr_results(self,output):
FILE: python-wrapper/yarngpt/core.py
function download_file (line 20) | def download_file(url, dest_path):
function load_model_and_tokenizer (line 49) | def load_model_and_tokenizer():
function generate_speech (line 62) | def generate_speech(text, speaker="idera", temperature=0.1, repetition_p...
Condensed preview — 57 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,590K chars).
[
{
"path": "README.md",
"chars": 3404,
"preview": "# YarnGPT 🎙️\n.ipynb",
"chars": 5880,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {\n \"id\": \"mKb-4H"
},
{
"path": "notebooks/Yoruba_prepare_data_naij (2).ipynb",
"chars": 690693,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {\n \"colab\": {\n "
},
{
"path": "notebooks/train_YarnGPT.ipynb",
"chars": 446850,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {\n \"colab\": {\n "
},
{
"path": "notebooks/train_YarnGPT_local.ipynb",
"chars": 469996,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {\n \"colab\": {\n "
},
{
"path": "python-wrapper/README.md",
"chars": 2659,
"preview": "# YarnGPT Python Wrapper Library\n\n## Description\nYarnGPT is a Python wrapper for the YarnGPT text-to-speech model, desig"
},
{
"path": "python-wrapper/audiotokenizer.py",
"chars": 14004,
"preview": "import os\nimport re\nimport json\nimport torch\nimport inflect\nimport random\nimport uroman as ur\nimport numpy as np\nimport "
},
{
"path": "python-wrapper/default_speakers/.ipynb_checkpoints/Yoruba_prepare_data_naij (2)-checkpoint.ipynb",
"chars": 690693,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {\n \"colab\": {\n "
},
{
"path": "python-wrapper/default_speakers/.ipynb_checkpoints/emma-checkpoint.json",
"chars": 9366,
"preview": "{\n \"text\": \"Scientists have discovered a new planet that may be capable of supporting life!\",\n \"words\": [\n "
},
{
"path": "python-wrapper/default_speakers/.ipynb_checkpoints/idera-checkpoint.json",
"chars": 8341,
"preview": "{\n \"text\": \"Scientists have discovered a new planet that may be capable of supporting life!\",\n \"words\": [\n "
},
{
"path": "python-wrapper/default_speakers/.ipynb_checkpoints/onye-checkpoint.json",
"chars": 13328,
"preview": "{\n \"text\": \"out to another level also going through in the shop chop scotch bonnet peppers\",\n \"words\": [\n {"
},
{
"path": "python-wrapper/default_speakers/Yoruba_prepare_data_naij (2).ipynb",
"chars": 587236,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {\n \"colab\": {\n \"base_uri\": \""
},
{
"path": "python-wrapper/default_speakers/chinenye.json",
"chars": 5775,
"preview": "{\n \"text\": \"and once I got that out of the way\",\n \"words\": [\n {\n \"word\": \"and\",\n \"dur"
},
{
"path": "python-wrapper/default_speakers/emma.json",
"chars": 9366,
"preview": "{\n \"text\": \"Scientists have discovered a new planet that may be capable of supporting life!\",\n \"words\": [\n "
},
{
"path": "python-wrapper/default_speakers/idera.json",
"chars": 8341,
"preview": "{\n \"text\": \"Scientists have discovered a new planet that may be capable of supporting life!\",\n \"words\": [\n "
},
{
"path": "python-wrapper/default_speakers/joke.json",
"chars": 9103,
"preview": "{\n \"text\": \"i still said you and i was like mister so this is what you are doing with\",\n \"words\": [\n {\n "
},
{
"path": "python-wrapper/default_speakers/jude.json",
"chars": 5504,
"preview": "{\n \"text\": \"know what I'm saying what I'm saying is that if you say\",\n \"words\": [\n {\n \"word\": \"k"
},
{
"path": "python-wrapper/default_speakers/onye.json",
"chars": 13328,
"preview": "{\n \"text\": \"out to another level also going through in the shop chop scotch bonnet peppers\",\n \"words\": [\n {"
},
{
"path": "python-wrapper/default_speakers/osagie.json",
"chars": 10207,
"preview": "{\n \"text\": \"do Charlotte Douglas shallots be me shut up dummy Libby shallots foolish storms\",\n \"words\": [\n "
},
{
"path": "python-wrapper/default_speakers/regina.json",
"chars": 12028,
"preview": "{\n \"text\": \"was just like is that what is amazing to you your marriage is\",\n \"words\": [\n {\n \"wor"
},
{
"path": "python-wrapper/default_speakers/remi.json",
"chars": 8040,
"preview": "{\n \"text\": \"animal noral human being\",\n \"words\": [\n {\n \"word\": \"animal\",\n \"duration\":"
},
{
"path": "python-wrapper/default_speakers/tayo.json",
"chars": 10995,
"preview": "{\n \"text\": \"and enjoy ourselves we need more parties let party start again now we know\",\n \"words\": [\n {\n "
},
{
"path": "python-wrapper/default_speakers/umar.json",
"chars": 9897,
"preview": "{\n \"text\": \"that i'd like to share with everybody in the world yes sometimes you go all the way\",\n \"words\": [\n "
},
{
"path": "python-wrapper/pyproject.toml",
"chars": 1003,
"preview": "[build-system]\nrequires = [\"setuptools>=42\", \"wheel\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"yarngpt"
},
{
"path": "python-wrapper/requirements.txt",
"chars": 85,
"preview": "torch\ntransformers\ntorchaudio\noutetts==0.2.3\nuroman\nnumpy\ninflect\nIPython\nbuild\ntqdm\n"
},
{
"path": "python-wrapper/yarngpt/__init__.py",
"chars": 94,
"preview": "from yarngpt.core import generate_speech\n\n__version__ = \"0.1.5\"\n__all__ = [\"generate_speech\"]\n"
},
{
"path": "python-wrapper/yarngpt/core.py",
"chars": 4765,
"preview": "import os\nimport torch\nimport requests\nfrom transformers import AutoModelForCausalLM\nfrom audiotokenizer import AudioTok"
},
{
"path": "requirements.txt",
"chars": 69,
"preview": "outetts==0.2.3 \nuroman\ntorch\ntorchaudio\ntransformers==4.47.1\ninflect\n"
}
]
About this extraction
This page contains the full source code of the saheedniyi02/yarngpt GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 57 files (3.2 MB), approximately 828.6k tokens, and a symbol index with 59 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.