Showing preview only (3,013K chars total). Download the full file or copy to clipboard to get everything.
Repository: gia-guar/JARVIS-ChatGPT
Branch: main
Commit: 9aca1c051631
Files: 339
Total size: 2.8 MB
Directory structure:
gitextract_p75g_f1y/
├── .gitignore
├── Assistant/
│ ├── Agents.py
│ ├── Chat.py
│ ├── VirtualAssistant.py
│ ├── __init__.py
│ ├── get_audio.py
│ ├── research_mode.py
│ ├── semantic_scholar/
│ │ ├── S2_tools.py
│ │ ├── __init__.py
│ │ ├── agent_tools.py
│ │ └── simple.py
│ ├── tools.py
│ ├── voice.py
│ └── webui.py
├── LICENSE
├── README.md
├── TTS/
│ ├── .models.json
│ ├── VERSION
│ ├── __init__.py
│ ├── api.py
│ ├── bin/
│ │ ├── __init__.py
│ │ ├── collect_env_info.py
│ │ ├── compute_attention_masks.py
│ │ ├── compute_embeddings.py
│ │ ├── compute_statistics.py
│ │ ├── eval_encoder.py
│ │ ├── extract_tts_spectrograms.py
│ │ ├── find_unique_chars.py
│ │ ├── find_unique_phonemes.py
│ │ ├── remove_silence_using_vad.py
│ │ ├── resample.py
│ │ ├── synthesize.py
│ │ ├── train_encoder.py
│ │ ├── train_tts.py
│ │ ├── train_vocoder.py
│ │ └── tune_wavegrad.py
│ ├── config/
│ │ ├── __init__.py
│ │ └── shared_configs.py
│ ├── encoder/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── configs/
│ │ │ ├── base_encoder_config.py
│ │ │ ├── emotion_encoder_config.py
│ │ │ └── speaker_encoder_config.py
│ │ ├── dataset.py
│ │ ├── losses.py
│ │ ├── models/
│ │ │ ├── base_encoder.py
│ │ │ ├── lstm.py
│ │ │ └── resnet.py
│ │ ├── requirements.txt
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── generic_utils.py
│ │ ├── io.py
│ │ ├── prepare_voxceleb.py
│ │ ├── training.py
│ │ └── visual.py
│ ├── model.py
│ ├── server/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── conf.json
│ │ ├── server.py
│ │ └── templates/
│ │ ├── details.html
│ │ └── index.html
│ ├── tts/
│ │ ├── __init__.py
│ │ ├── configs/
│ │ │ ├── __init__.py
│ │ │ ├── align_tts_config.py
│ │ │ ├── fast_pitch_config.py
│ │ │ ├── fast_speech_config.py
│ │ │ ├── fastspeech2_config.py
│ │ │ ├── glow_tts_config.py
│ │ │ ├── neuralhmm_tts_config.py
│ │ │ ├── overflow_config.py
│ │ │ ├── shared_configs.py
│ │ │ ├── speedy_speech_config.py
│ │ │ ├── tacotron2_config.py
│ │ │ ├── tacotron_config.py
│ │ │ └── vits_config.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── dataset.py
│ │ │ └── formatters.py
│ │ ├── layers/
│ │ │ ├── __init__.py
│ │ │ ├── align_tts/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── duration_predictor.py
│ │ │ │ └── mdn.py
│ │ │ ├── feed_forward/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── decoder.py
│ │ │ │ ├── duration_predictor.py
│ │ │ │ └── encoder.py
│ │ │ ├── generic/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── aligner.py
│ │ │ │ ├── gated_conv.py
│ │ │ │ ├── normalization.py
│ │ │ │ ├── pos_encoding.py
│ │ │ │ ├── res_conv_bn.py
│ │ │ │ ├── time_depth_sep_conv.py
│ │ │ │ ├── transformer.py
│ │ │ │ └── wavenet.py
│ │ │ ├── glow_tts/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── decoder.py
│ │ │ │ ├── duration_predictor.py
│ │ │ │ ├── encoder.py
│ │ │ │ ├── glow.py
│ │ │ │ └── transformer.py
│ │ │ ├── losses.py
│ │ │ ├── overflow/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── common_layers.py
│ │ │ │ ├── decoder.py
│ │ │ │ ├── neural_hmm.py
│ │ │ │ └── plotting_utils.py
│ │ │ ├── tacotron/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── attentions.py
│ │ │ │ ├── capacitron_layers.py
│ │ │ │ ├── common_layers.py
│ │ │ │ ├── gst_layers.py
│ │ │ │ ├── tacotron.py
│ │ │ │ └── tacotron2.py
│ │ │ └── vits/
│ │ │ ├── discriminator.py
│ │ │ ├── networks.py
│ │ │ ├── stochastic_duration_predictor.py
│ │ │ └── transforms.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── align_tts.py
│ │ │ ├── base_tacotron.py
│ │ │ ├── base_tts.py
│ │ │ ├── forward_tts.py
│ │ │ ├── glow_tts.py
│ │ │ ├── neuralhmm_tts.py
│ │ │ ├── overflow.py
│ │ │ ├── tacotron.py
│ │ │ ├── tacotron2.py
│ │ │ └── vits.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── data.py
│ │ ├── helpers.py
│ │ ├── languages.py
│ │ ├── managers.py
│ │ ├── measures.py
│ │ ├── monotonic_align/
│ │ │ ├── __init__.py
│ │ │ ├── core.c
│ │ │ ├── core.pyx
│ │ │ └── setup.py
│ │ ├── speakers.py
│ │ ├── ssim.py
│ │ ├── synthesis.py
│ │ ├── text/
│ │ │ ├── __init__.py
│ │ │ ├── characters.py
│ │ │ ├── chinese_mandarin/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── numbers.py
│ │ │ │ ├── phonemizer.py
│ │ │ │ └── pinyinToPhonemes.py
│ │ │ ├── cleaners.py
│ │ │ ├── cmudict.py
│ │ │ ├── english/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── abbreviations.py
│ │ │ │ ├── number_norm.py
│ │ │ │ └── time_norm.py
│ │ │ ├── french/
│ │ │ │ ├── __init__.py
│ │ │ │ └── abbreviations.py
│ │ │ ├── japanese/
│ │ │ │ ├── __init__.py
│ │ │ │ └── phonemizer.py
│ │ │ ├── korean/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ko_dictionary.py
│ │ │ │ ├── korean.py
│ │ │ │ └── phonemizer.py
│ │ │ ├── phonemizers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── espeak_wrapper.py
│ │ │ │ ├── gruut_wrapper.py
│ │ │ │ ├── ja_jp_phonemizer.py
│ │ │ │ ├── ko_kr_phonemizer.py
│ │ │ │ ├── multi_phonemizer.py
│ │ │ │ └── zh_cn_phonemizer.py
│ │ │ ├── punctuation.py
│ │ │ └── tokenizer.py
│ │ └── visual.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── audio/
│ │ │ ├── __init__.py
│ │ │ ├── numpy_transforms.py
│ │ │ ├── processor.py
│ │ │ └── torch_transforms.py
│ │ ├── callbacks.py
│ │ ├── capacitron_optimizer.py
│ │ ├── distribute.py
│ │ ├── download.py
│ │ ├── downloaders.py
│ │ ├── generic_utils.py
│ │ ├── io.py
│ │ ├── manage.py
│ │ ├── radam.py
│ │ ├── samplers.py
│ │ ├── synthesizer.py
│ │ ├── training.py
│ │ └── vad.py
│ ├── vc/
│ │ ├── configs/
│ │ │ ├── __init__.py
│ │ │ ├── freevc_config.py
│ │ │ └── shared_configs.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── base_vc.py
│ │ │ └── freevc.py
│ │ └── modules/
│ │ ├── __init__.py
│ │ └── freevc/
│ │ ├── __init__.py
│ │ ├── commons.py
│ │ ├── mel_processing.py
│ │ ├── modules.py
│ │ ├── speaker_encoder/
│ │ │ ├── __init__.py
│ │ │ ├── audio.py
│ │ │ ├── hparams.py
│ │ │ └── speaker_encoder.py
│ │ └── wavlm/
│ │ ├── __init__.py
│ │ ├── config.json
│ │ ├── modules.py
│ │ └── wavlm.py
│ └── vocoder/
│ ├── README.md
│ ├── __init__.py
│ ├── configs/
│ │ ├── __init__.py
│ │ ├── fullband_melgan_config.py
│ │ ├── hifigan_config.py
│ │ ├── melgan_config.py
│ │ ├── multiband_melgan_config.py
│ │ ├── parallel_wavegan_config.py
│ │ ├── shared_configs.py
│ │ ├── univnet_config.py
│ │ ├── wavegrad_config.py
│ │ └── wavernn_config.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── gan_dataset.py
│ │ ├── preprocess.py
│ │ ├── wavegrad_dataset.py
│ │ └── wavernn_dataset.py
│ ├── layers/
│ │ ├── __init__.py
│ │ ├── hifigan.py
│ │ ├── losses.py
│ │ ├── lvc_block.py
│ │ ├── melgan.py
│ │ ├── parallel_wavegan.py
│ │ ├── pqmf.py
│ │ ├── upsample.py
│ │ └── wavegrad.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── base_vocoder.py
│ │ ├── fullband_melgan_generator.py
│ │ ├── gan.py
│ │ ├── hifigan_discriminator.py
│ │ ├── hifigan_generator.py
│ │ ├── melgan_discriminator.py
│ │ ├── melgan_generator.py
│ │ ├── melgan_multiscale_discriminator.py
│ │ ├── multiband_melgan_generator.py
│ │ ├── parallel_wavegan_discriminator.py
│ │ ├── parallel_wavegan_generator.py
│ │ ├── random_window_discriminator.py
│ │ ├── univnet_discriminator.py
│ │ ├── univnet_generator.py
│ │ ├── wavegrad.py
│ │ └── wavernn.py
│ └── utils/
│ ├── __init__.py
│ ├── distribution.py
│ └── generic_utils.py
├── TTS_additional_material/
│ ├── .gitignore
│ ├── .pre-commit-config.yaml
│ ├── .pylintrc
│ ├── .readthedocs.yml
│ ├── CODE_OF_CONDUCT.md
│ ├── README.md
│ ├── hubconf.py
│ └── requirements.txt
├── UpdateHistory.md
├── Vicuna/
│ ├── README.md
│ ├── start-webui-vicuna-gpu.bat
│ └── vicuna.ps1
├── demos/
│ ├── chat_with_keyboard.py
│ ├── demo_da_vinci.py
│ ├── demo_elevenlabs.py
│ ├── demo_google_search.py
│ ├── demo_local_search_engine.py
│ ├── demo_pyaudio.py
│ ├── demo_research_mode.py
│ └── demo_tts.py
├── env.txt
├── openai_api_chatbot.py
├── run.bat
├── saved_chats/
│ ├── 2023-03-13_ExploringtheConceptofDeepLearningandItsApplicationsintheAviationIndustry.txt
│ ├── 2023-03-25_ExploringtheAmbitiousWorldofStarCitizenIsitRightforCasualGamers.txt
│ ├── 2023-03-26_AnIntroductiontoArtificialIntelligence.txt
│ ├── 2023-03-26_ArtificialIntelligence.txt
│ ├── 2023-03-26_ExploringthePossibilityofHumanExtinction.txt
│ ├── 2023-03-26_ExploringtheUseofTransformersinImageSegmentation.txt
│ ├── 2023-03-26_FlutterDevelopment.txt
│ ├── 2023-03-26_FlutterDevelopmentLearningMethodsandManagingAppState.txt
│ ├── 2023-03-26_HumanExtinction.txt
│ ├── 2023-03-26_NuclearandElectricPropulsioninSpaceExplorationAdvantagesandChallenges.txt
│ ├── 2023-03-26_RevolutionizingHealthcare.txt
│ ├── 2023-03-26_RevolutionizingHealthcarewithArtificialIntelligenceBenefitsandOpportunities.txt
│ ├── 2023-03-26_SpacePropulsion.txt
│ ├── 2023-03-26_TagImageSegmentation.txt
│ ├── 2023-03-26_TipsforRunningScriptsatSystemStartuponWindowsandLinux.txt
│ ├── 2023-03-26_TipsforRunningStartup.txt
│ ├── 2023-03-28_AsteroidImpact.txt
│ ├── 2023-03-28_ChoosingAppleComputer.txt
│ ├── 2023-03-28_ChoosingtheRightAppleComputerforDeepLearningandMachineLearningM1vsM2ChipandOptimalMemorySize.txt
│ ├── 2023-03-28_SizeandImpactHowBigDoesanAsteroidNeedtoBetoCauseSignificantDamage.txt
│ ├── 2023-03-29_InteractiveSystem.txt
│ ├── 2023-03-29_TraininganInteractiveSystemtoRespondtoCommandsandQuestions.txt
│ ├── 2023-03-30_ExploringtheColdestTemperaturesonEarthRecordBreakingLowTemperaturesandHistoricalEstimates.txt
│ ├── 2023-03-30_ExtremeColdTemperatures.txt
│ ├── 2023-03-30_TagBeerBasics.txt
│ ├── 2023-03-30_TagSkyColors.txt
│ ├── 2023-03-30_TheBasicsofBeerUnderstandingthePopularAlcoholicBeverage.txt
│ ├── 2023-03-30_TheColorsoftheSkyExploringtheHuesandVariations.txt
│ ├── 2023-04-01_IslandTravel.txt
│ ├── 2023-04-01_RegalieattivitàperunviaggioinIslanda.txt
│ ├── 2023-04-02_SoundCardComponentsandADC.txt
│ ├── 2023-04-02_UnderstandingtheComponentsofaSoundCardandtheFunctionofADC.txt
│ ├── 2023-04-03_ExploringMultiHeadSelfAttentionforKeywordIdentificationinNaturalLanguageProcessing.txt
│ ├── 2023-04-03_TagNaturalLanguageProcessing.txt
│ ├── 2023-04-04_ExploringFeasibleOptionsforPoweringanIronManSuitSolarCellsBatteriesandNuclearReactors.txt
│ ├── 2023-04-04_PoweringIronManSuit.txt
│ ├── 2023-04-05_DockerContainerization.txt
│ ├── 2023-04-07_ApproachesforTextto.txt
│ ├── 2023-04-09_FitbitDataandSleep.txt
│ ├── 2023-04-11_Fitness.txt
│ ├── 2023-04-19_ChineseLanguages.txt
│ ├── 2023-04-19_SyntheticMeat.txt
│ ├── 2023-04-20_LabGrownMeat.txt
│ ├── 2023-04-20_SyntheticMeatCont.txt
│ ├── 2023-04-20_andTextto.txt
│ ├── 2023-05-03_AIConversations.txt
│ ├── 2023-05-03_MeaninginLife.txt
│ ├── 2023-06-07_ImageProcessing.txt
│ └── DATAFRAME.csv
├── setup.bat
├── test_TTS.py
├── tests.py
├── venv_requirements.txt
├── whisper_edits/
│ ├── __init__.py
│ └── model.py
└── workspaces/
└── Vision_09df18b156814c80a3e1c1ab544423fc/
├── refy_suggestions/
│ ├── test.csv
│ └── test.html
└── results/
├── papers.bib
└── papers.csv
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
jarvis/
chatgptwrapper/
output.wav
.env
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
vicuna/oobabooga-windows/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
================================================
FILE: Assistant/Agents.py
================================================
from langchain import OpenAI, LLMChain
from langchain.llms import OpenAI
from langchain.agents import Tool, AgentExecutor, ZeroShotAgent
from langchain.memory import ConversationBufferMemory
from langchain.agents import initialize_agent, load_tools
import datetime
from Assistant import VirtualAssistant
import os
# generate a Zero Shot React Agent with memory that looks K interactions behind
def generateReactAgent(VA:VirtualAssistant, k:int):
# Local Search Engine ($)
LocalSearchEngine = Tool(
name= 'Key Search',
func=VA.find_file,
description="Useful when you don't know the name of a resource. Inputs should be keywords. Keywords are used to find resources. You ddon't know the name of the resources"
)
Save = Tool(name='Memorize',
func=VA.save_chat,
description='save the current conversation. Useful for when the conversation will be needed in future')
FileReader = Tool(
name='Load File',
func=VA.open_file,
description='Useful when you have file names. Loads the content of a file given its filename'
)
Summarize = Tool(
name='TLDR',
func=VA.search_engine.tldr,
description='Summarize large amounts of text'
)
tools = [LocalSearchEngine, Save, FileReader, Summarize]
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, human_prefix='user', ai_prefix='assistant')
# need to work on a custom LangChain llm model
prefix = """You are an AI research assistant designed to assist users with their academic research. You are equipped with these tools:"""
suffix = """Begin!"
{chat_history}
Question: {input}
{agent_scratchpad}"""
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=["input", "chat_history", "agent_scratchpad"]
)
llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
# adding a window of memory:
memory = build_memory(chat_history = VA.current_conversation(), k=k)
return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory, early_stopping_method ='generate', max_iterations=2)
def build_memory(chat_history, k):
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, human_prefix='user', ai_prefix='assistant')
k = min(k, len(chat_history)//2)
if k==0 :return memory
if chat_history[-k]["role"] != 'user':
print('refreshing memory warning - considering last interaction only')
k=1
try:
for i in range(-k*2-1, -1, 2):
input = chat_history[i]['content']
output = chat_history[i+1]['content']
memory.save_context({"input":input}, {"output":output})
except:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, human_prefix='user', ai_prefix='assistant')
return memory
def generateGoogleAgent(VA:VirtualAssistant, k:int):
names = ['wikipedia', 'requests', 'open-meteo-api']
llm = OpenAI(temperature=0)
if len(os.getenv('SERPER_API_KEY'))>1:
print('(using google-seper)')
names.append('google-serper')
elif len(os.getenv('GOOGLE_API_KEY'))>1:
print('(using google-serch)')
names.append('google-search')
tools = load_tools(names, llm=llm)
custom_tools = [
Tool(
name ='Locate me',
func = locate_me,
description='useful to know the current geographical location'),
Tool(
name='News',
func=news,
description='Use this when you want to get information about the top headlines of current news stories. The input should be a keyword describing the topic'),
Tool(
name='Today',
func=today,
description='Useful to know the current day'),
Tool(
name='Delta days',
func= time_between_dates,
description='Use this you need to compute the time between two Dates. Input should be two dates in the ISO 8601 format: Year-Month-Day'
)
]
for item in custom_tools: tools.append(item)
prefix = """Answer the question. You have also access to the following tools:"""
suffix = """Begin!"
{chat_history}
Question: {input}
{agent_scratchpad}"""
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=["input", "chat_history", "agent_scratchpad"]
)
llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
# adding a window of memory:
memory = build_memory(VA.current_conversation(), k)
return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory, early_stopping_method = 'generate', max_iterations=4)
## FUNCTIONS FOR TOOLS
import geocoder
from newsapi import NewsApiClient
def locate_me(p):
g = geocoder.ip('me')
return [g.city, g.state, g.country]
def today(p):
return str(datetime.date.today())
def time_between_dates(date1, date2):
try:
date1 = datetime.date.fromisoformat(date1)
date2 = datetime.date.fromisoformat(date2)
except:
return 'date format incorrect'
if date2.toordinal()>date1.toordinal(): return str(date2-date1)
else: return str(date1-date2)
def news(keyword):
newsapi = NewsApiClient(api_key=os.getenv('NEWS_API_KEY'))
top_headlines = newsapi.get_top_headlines(q=keyword)
if len(top_headlines['articles'])==0:
top_headlines = newsapi.get_everything(q=keyword)
top_headlines['articles'] = top_headlines['articles'][0:min(len(top_headlines['articles']),10)]
res = ''
for article in top_headlines['articles']:
res += '\nsource: '+article['source']['name']+'\n'
res += '\n'+article['title']+'\nurl: '+article['url']+'\n'
res += article['description']
print(res)
return res
================================================
FILE: Assistant/Chat.py
================================================
================================================
FILE: Assistant/VirtualAssistant.py
================================================
# import for prompt routing
from langchain import OpenAI
from langchain.agents import Tool
from langchain.agents import initialize_agent
from Assistant.research_mode import ResearchAssistant
from .Agents import generateReactAgent, generateGoogleAgent
import tiktoken
# imports for chats
import pygame
import os
import re
import pandas as pd
from datetime import datetime
import copy
import openai
import time
import langid
import torch
import Assistant.get_audio as myaudio
from .voice import *
from .tools import Translator, LocalSearchEngine, AssistantChat
from .tools import parse_conversation, count_tokens, take_last_k_interactions
from .webui import oobabooga_textgen
# imports for audio
import whisper
import wave
import pyaudio
import speech_recognition as sr
import time
import sys
from contextlib import contextmanager
#module used for speaking during recording
import webrtcvad
class VirtualAssistant:
DEBUG = True
DEFAULT_CHAT = AssistantChat([{"role": "system", "content": "You are a helpful assistant. You can make question to make the conversation entertaining."}])
RESPONSE_TIME = 1.5 #values that work well in my environment (ticks, not seconds)
SLEEP_DELAY = 3 #seconds
MIN_RECORDING_TIME = .5 #seconds
MAX_RECORDING_TIME = 60 #seconds
VAD_AGGRESSIVENESS = 2 #1-3
MAX_TOKENS = 4096
DEVICE_INDEX = myaudio.detect_microphones()[0]
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = myaudio.get_device_channels()[DEVICE_INDEX]
RATE = int(myaudio.get_devices()[DEVICE_INDEX]['defaultSampleRate'])
print('using input device: ', myaudio.get_devices()[DEVICE_INDEX]['name'])
CONVERSATION_LONG_ENOUGH = 4 #interactions (2 questions)
def __init__(self,
whisper_model=None,
awake_with_keywords = ['elephant'],
model = "gpt-3.5-turbo",
embed_model = "text-embedding-ada-002",
translator_model = 'argostranslator',
**kwargs):
try:
openai.api_key = kwargs['openai_api']
except:
print('OpenAI API key not found')
# HEAVY STUFF FIRTS
# Filling the GPU with the model
if whisper_model == None:
if 'whisper_size' not in kwargs: raise Exception('whisper model needs to be specified')
self.interpreter = whisper.load_model(kwargs['whisper_size'])
else:
self.interpreter = whisper_model
# STATUS and PROMPT ANALYZER
if 'mode' in kwargs:
if kwargs['mode'].upper() != 'CHAT' and kwargs['mode'].upper() != 'RESEARCH': raise KeyError()
self.MODE = kwargs['mode']
self.DIRECTORIES={
'CHAT_DIR': os.path.realpath(os.path.join(os.getcwd(), 'saved_chats')),
'SOUND_DIR':os.path.realpath(os.path.join(os.getcwd(), 'Assistant', 'sounds')),
'VOICE_DIR':os.path.realpath(os.path.join(os.getcwd(), 'Assistant', 'voices'))
}
self.func_descript={
"CHAT":[
"tools: the prompt requires an action like handling a file, saving a conversation, changing some specified parameters...",
"respond: provide an answer to a question",
"you don't know the answer or you can't satisfy the request."],
"RESEARCH":[
"tools: the prompt requires one or multiple actions like reading a file, downloading a known resource",
"respond: provide an answer based on scientific information",
]
}
# TEXT and VOICE
if 'voice_id' in kwargs.keys():
for item in kwargs['voice_id']:
print(kwargs['voice_id'][item])
kwargs['voice_id'][item] = (os.path.join(self.DIRECTORIES["VOICE_DIR"], kwargs['voice_id'][item])) + '.wav'
else:
kwargs['voice_id'] = os.path.join(self.DIRECTORIES["VOICE_DIR"], 'default.wav')
self.languages = {
'en': "English",
'it': "Italian",
# add yours
}
self.voice = Voice(write_dir = self.DIRECTORIES['SOUND_DIR'], languages = self.languages, **kwargs)
self.translator = Translator(model=translator_model, translator_languages = list(self.languages.keys()))
self.answer_engine = model
self.search_engine = LocalSearchEngine(
embed_model = embed_model,
tldr_model = kwargs['search_engine_llm'] if 'search_engine_llm' in list(kwargs.keys()) else model,
translator_model=translator_model,
translator_languages = list(self.languages.keys()))
self.is_awake = False
self.current_conversation = self.DEFAULT_CHAT
# AUDIO
# initialize the VAD module
self.vad = webrtcvad.Vad()
self.vad.set_mode(self.VAD_AGGRESSIVENESS)
if 'awake_with_keywords' in kwargs:
self.Keywords = kwargs['awake_with_keywords']
else:
self.Keywords = awake_with_keywords
self.Keywords = awake_with_keywords
self.ears = sr.Recognizer()
# init finished
self.play('system_online_bleep.mp3')
# STATUS ###############################################################################################
def switch_mode(self):
if self.MODE == 'CHAT':
self.say('Moving into research mode', VoiceIdx='en', elevenlabs=True)
self.play('Sci-Fi-UI.mp3',loop=True)
self.init_research_mode()
pygame.mixer.stop()
self.play('system_online_bleep.mp3', PlayAndWait=True)
response = self.translator.translate('research mode is ready', to_language=langid.classify(self.current_conversation[-1]['content']), from_language='en').lower()
return response
else:
self.MODE = 'CHAT'
response = self.translator.translate('chat mode enabled', to_language=langid.classify(self.current_conversation[-1]['content'])[0],from_language='en').lower()
return response
def identify_explicit_command(self, prompt):
prompt = self.translator.translate(prompt, to_language='en').lower()
# if the prompt is long it's unlikely to be an explicit command
# (this condition prevents false positives)
if len(prompt.split())>15: return
INTERNET_COMMANDS = [
"do an internet search",
"look on the web",
"do a web search",
"control on the internet",
"do a search",
"make a search",
"perform a search",
"perform a web search"]
if ("research mode" in prompt and self.MODE=='CHAT') or ("chat mode" in prompt and self.MODE=='RESEARCH'):
print('found explicit command')
return '-1'
if self.MODE == 'CHAT':
if any(command in prompt for command in INTERNET_COMMANDS):
print('found explicit command')
return '3'
if self.MODE == 'RESEARCH':
if "new workspace" in prompt or "new environment" in prompt:
print('found explicit command')
return '3'
def use_tools(self, prompt, debug = DEBUG):
if debug: print(' -use tools ')
# tools for chat mode
if self.MODE == "CAHT":
ActionManager = generateReactAgent(self, k=1)
return ActionManager.run(input = prompt)
# research mode
else:
return self.ResearchAssistant.agent.run(input = prompt)
def secondary_agent(self, prompt, debug = DEBUG):
if self.MODE == 'CHAT':
if debug: print(' - web surfing ')
WebSurfingAgent = generateGoogleAgent(self, k=1)
return WebSurfingAgent.run(prompt)
if self.MODE == 'RESEARCH':
if debug: print(' - assessing new workspace ')
return self.ResearchAssistant.PROTOCOL_begin_new_workspace(prompt)
def set_directories(self, **kwargs):
for item in kwargs:
try:
if not(os.path.isdir(kwargs[item])): raise Exception
print(f'updating {item} from {self.DIRECTORIES[item.upper()]} == to => {kwargs[item]}')
self.DIRECTORIES[item.upper()] = kwargs[item]
except:
self.play('error.mp3', PlayAndWait=True)
print(f"{kwargs[item]}: not found")
def go_to_sleep(self):
print('[Assistant going to sleep]')
self.is_awake = False
if len(self.current_conversation()) > self.CONVERSATION_LONG_ENOUGH:
self.save_chat()
self.play('sleep.mp3', PlayAndWait=True)
# [stable]
def analyze_prompt(self, prompt, debug = DEBUG):
if debug: print(f' - analyzing prompt in {self.MODE} mode')
# Hard coded options: DO this, Look on INTERNET...
flag = self.identify_explicit_command(prompt)
if flag is not None: return flag
# CHAT MODE
if self.MODE == 'CHAT':
context ="""You are a prompt manager. A number must always be present in your answer. You can perform some actions and decide which associated number is required. Your actions:"""
for i, function in enumerate(self.func_descript['CHAT']):
context += f"\n{i+1}) {function};"
context += "\nYou can answer only with numbers. A number must always be present in your answer."
context += """\nHere are some example:
\nPROMPT: 'find and summarize all the files about history'\n1
\nPROMPT: 'find a past conversation about planes'\n1
\nPROMPT: 'do you agree?'\n2
\nPROMPT: 'Salva questa conversazione'\n1
\nPROMPT: 'How is the weather?'\n3
\nPROMPT: 'credo sia giusto.'\n2
\nPROMPT: '¿Cuál es la noticia de hoy?'\n3
\nPROMPT: 'Thank you'\n2"""
CHAT = [{"role": "system", "content": context},
{"role": "user", "content":f"PROMPT: '{prompt}'"}]
flag = self.identify_explicit_command(prompt)
# RESEARCH MODE
else:
context ="""You are a prompt manager. A number must always be present in your answer. You can perform some actions and decide which associated number is required. You are designed to assist users with their academic research. You are equipped with a range of tools. Your tools:"""
for i, function in enumerate(self.func_descript['RESEARCH']):
context += f"\n{i+1}) {function};"
context += "\nYou can answer only with numbers. A number must always be present in your answer."
context += """\nHere are some example:
\nPROMPT: 'begin a new project'\n1
\nPROMPT: 'download papers about ...'\n1
\nPROMPT: 'what are the mechanichal properties of carbon fiber?'\n2
\nPROMPT: 'Salva questa conversazione'\n1
\nPROMPT: 'What are the authors of the paper XYZ?'\n2
\nPROMPT: 'What studies mention Transformers architectures?'\n2
\nPROMPT: 'Find new papers that are similar to paper XYZ'\n1
\nPROMPT: 'Tell me more'\n2
\nPROMPT: 'what is up?\n2"""
CHAT = [{"role": "system", "content": context},
{"role": "user", "content":f"PROMPT: '{prompt}'"}]
if debug: print(' - - submitting request')
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
temperature=0,
max_tokens=10,
messages=CHAT)
flag = response['choices'][0]['message']['content']
if debug: print(' - - got answer')
return flag
# CONVERSATION ################################################################################
def start_new_conversation(self):
if len(self.current_conversation)>2:
print('forgetting the last conversation')
self.current_conversation = self.DEFAULT_CHAT
def expand_conversation(self, role, content): self.current_conversation.append({"role":role, "content":content})
def get_answer(self, question, optimize_cuda = False, debug=DEBUG):
if debug: print(' - thinking')
if self.MODE == "CHAT":
temp = copy.deepcopy(self.current_conversation())
temp.append({"role":"user", "content":question})
self.play('thinking.mp3', loop=True)
if self.answer_engine == 'gpt-3.5-turbo':
if debug: print(' - - submitting request')
API_response = openai.ChatCompletion.create(
model=self.answer_engine,
messages=temp)
answer = API_response['choices'][0]['message']['content']
if debug: print(' - - got answer')
elif self.answer_engine == 'anon8231489123_vicuna-13b-GPTQ-4bit-128g':
lang_id = langid.classify(question)[0]
if optimize_cuda:
# free space on the GPU
self.deallocate_whisper()
# use GPU to process the answer
answer = oobabooga_textgen(prompt = temp)
answer = self.translator.translate(answer, from_language=langid.classify(answer)[0], to_language=lang_id)
if optimize_cuda:
# try to get the model back to GPU
self.allocate_whisper()
elif self.answer_engine == 'eachadea_ggml-vicuna-13b-4bit':
answer = oobabooga_textgen(prompt = question)
# RESEARCH MODE
else:
if self.ResearchAssistant.query_engine == None:
return 'error: no workspace loaded. I cannot provide precise information without a workspace loaded on research mode'
res = self.ResearchAssistant.query_engine.query(question)
answer = res.response
pygame.mixer.stop()
self.expand_conversation(role="assistant", content=answer)
self.last_interaction = time.perf_counter()
if debug: print(' - - finished')
return answer
def save_chat(self, debug = DEBUG):
if debug: print(' - saving chat')
if not os.path.isdir(self.DIRECTORIES['CHAT_DIR']): os.mkdir(self.DIRECTORIES['CHAT_DIR'])
if not self.current_conversation.is_saved():
if debug: print(' - - generating title')
title = self.get_answer(question="generate a very short title for this conversation")
self.say(f'I am saving this conversation with title: {title}', VoiceIdx='en', IBM=False, elevenlabs=True)
self.play('data_writing.mp3', PlayAndWait=True)
prompt = [{"role": "system", "content": "You don't like redundancy and use as few words as possible"},
{"role":"user", "content":f"Associate a tag to this title: {title} \nHere is an example: 'Exploring Text to Speech Popular Techniques and Deep Learning Approaches' is associated to 'Deep Learning'"}]
if debug: print(' - - submitting request')
API_response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
max_tokens=5,
temperature=0,
messages=prompt)
if debug: print(' - - got answer')
if debug: print(' - - processing response')
answer = API_response['choices'][0]['message']['content']
answer = re.sub(r'[^\w\s]', '',answer)
answer = re.sub(' ', '',answer)
fname = str( str(datetime.today().strftime('%Y-%m-%d')) + '_' + str(answer)+'.txt')
self.current_conversation.filename = fname
else:
self.say(f'I am overwriting the conversation {fname}', VoiceIdx='en', IBM=False, elevenlabs=True)
fname = self.current_conversation.filename
with open(os.path.join(self.DIRECTORIES['CHAT_DIR'], fname), 'w') as f:
for message in self.current_conversation():
f.write(message["role"]+ ': ' + message["content"]+'\n')
f.close()
self.is_awake = False
return f"file: {os.path.join(self.DIRECTORIES['CHAT_DIR'], fname)} saved successfully"
# ACTIONS ##################################################################################
def init_research_mode(self, workspace=None):
if workspace is None:
# get last created workspace
if 'workspaces' in os.listdir(os.getcwd()):
search_dir = os.path.join('workspaces')
subdirs = os.listdir(search_dir)
subdirs.sort(key=lambda fn: os.path.getmtime(os.path.join(search_dir, fn)))
subdirs.reverse()
for subd in subdirs:
folder_path = os.path.join('workspaces',subd)
if os.path.isdir(folder_path):
self.say('loading the last created workspace', VoiceIdx='en', elevenlabs=True)
workspace = os.path.abspath( folder_path )
break
self.play('Sci-Fi-UI.mp3',loop=True)
self.MODE = 'RESEARCH'
self.ResearchAssistant = ResearchAssistant(
current_conversation=self.current_conversation,
index_name='paperquestioning',
workspace=workspace)
pygame.mixer.stop()
def deallocate_whisper(self):
model_name = self.interpreter.name
model_current_device = self.interpreter.device
self.interpreter = None
torch.cuda.empty_cache()
if model_current_device.type == 'cuda':
print('loading Whisper model to cpu')
self.interpreter = whisper.load_model(model_name, device='cpu')
torch.cuda.empty_cache()
def allocate_whisper(self):
model_name = self.interpreter.name
model_current_device = self.interpreter.device
self.interpreter = None
torch.cuda.empty_cache()
if model_current_device.type == 'cpu':
try:
torch.cuda.empty_cache()
print('loading Whisper model to CUDA')
self.interpreter = whisper.load_model(model_name, device='cuda')
except:
self.interpreter = None
print(f"cuda dedicated memory isufficient: {torch.cuda.memory_allocated()/1e6} GB already occupuied")
print(f"keeping Whisper model to cpu")
self.interpreter = whisper.load_model(model_name, device='cpu')
torch.cuda.empty_cache()
def switch_whisper_device(self):
model_name = self.interpreter.name
model_current_device = self.interpreter.device
self.interpreter = None
torch.cuda.empty_cache()
if model_current_device.type == 'cuda':
print('loading Whisper model to cpu')
self.interpreter = whisper.load_model(model_name, device='cpu')
torch.cuda.empty_cache()
else:
try:
torch.cuda.empty_cache()
print('loading Whisper model to CUDA')
self.interpreter = whisper.load_model(model_name, device='cuda')
except:
print(f"cuda dedicated memory isufficient: {torch.cuda.memory_allocated()/1e6} GB already occupuied")
print(f"keeping Whisper model to cpu")
self.interpreter = whisper.load_model(model_name, device='cpu')
torch.cuda.empty_cache()
def open_file(self, filename, debug=DEBUG):
if debug: print(' - opening file')
# look for the file
file = None
for fname in os.listdir(self.DIRECTORIES['CHAT_DIR']):
# look for sub-strings (in case extension is forgotten)
if filename in fname:
file = open(os.path.join(self.DIRECTORIES['CHAT_DIR'], filename), 'r')
file = file.read()
if file is None: return 'No such file'
return file
def find_file(self, keywords, n=3, debug=DEBUG):
if debug: print(' -finding file')
#self.play('thinking.mp3', loop=True)
summary = self.search_engine.accurate_search(key=keywords, from_csv=True, n=n)
# self.play('wake.mp3')
response = ''
for i in range(n):
response += f"\nFilename: {summary.file_names[i]} ; Topics discussed: {summary.tags[i]}"
return response
# SPEAK ####################################################################################
def play(self, fname, PlayAndWait=False, loop=False, debug = DEBUG):
if loop: loop=-1
else: loop = 0
if pygame.mixer.get_init() is None: pygame.mixer.init()
if debug: print(' - playing')
try:
pygame.mixer.music.load(os.path.join(self.DIRECTORIES["SOUND_DIR"], fname))
except Exception as e:
print(e)
return
pygame.mixer.music.set_volume(0.5)
pygame.mixer.music.play(loops=loop)
if PlayAndWait:
while(pygame.mixer.music.get_busy()):pass
if debug: print(' - - finihed playing')
def say(self, text, VoiceIdx='jarvis', elevenlabs=False, IBM=False):
langIdx = langid.classify(text)[0]
print(f"[Assistant]: {text}")
if elevenlabs and IBM: raise(Exception('IBM and ElevenLabs can t be both true'))
if elevenlabs:
try:
try:
self.voice.speak(text=text, VoiceIdx=langIdx, elevenlabs=True, IBM=False, mode='online')
return
except Exception as e:
print(f"couldn t speak with: {e}")
self.voice.speak(text=text, VoiceIdx=langIdx, elevenlabs=False, IBM=True, mode='online')
return
except:
self.voice.speak(text=text, VoiceIdx=VoiceIdx, elevenlabs=False, IBM=False, mode='offline')
return
elif IBM:
try:
try:
self.voice.speak(text=text, VoiceIdx=langIdx, elevenlabs=False, IBM=True, mode='online')
return
except:
self.voice.speak(text=text, VoiceIdx=langIdx, elevenlabs=True, IBM=False, mode='online')
return
except:
self.voice.speak(text=text, VoiceIdx=VoiceIdx, elevenlabs=False, IBM=False, mode='offline')
return
try:
self.voice.speak(text=text, VoiceIdx='jarvis',elevenlabs=False, IBM=False, mode='offline')
except Exception as e:
self.voice.speak(text=text, VoiceIdx=langIdx, elevenlabs=False, IBM=False, mode='offline')
print(VoiceIdx, elevenlabs, IBM)
print(e)
raise Exception('No such specifications')
# LISTEN #############################################################################################
#function that blocks the code until the wakeword, or wakewords are encountered
def block_until_wakeword(self, verbosity=False):
if verbosity: print("listening passively...", end="")
from struct import unpack_from
import pvporcupine
#initialize values
porcupine = None
pa = None
audio_stream = None
try:
porcupine = pvporcupine.create(access_key=os.environ["PORCUPINE_KEY"],
keywords=self.Keywords)
pa = pyaudio.PyAudio()
audio_stream = pa.open(
rate=porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcupine.frame_length)
#not strictly necessary, but helps debug if something overwrote the keywords
print(f"Listening for wake word '{self.Keywords[0]}'...")
#loop to preform while waiting(does not noticeably use the CPU)
while True:
pcm = audio_stream.read(porcupine.frame_length)
pcm = unpack_from("h" * porcupine.frame_length, pcm)
keyword_index = porcupine.process(pcm)
#same actions activated as previous function
#NOTE: keyword_index is -1 unless wakeword encountered, then it's the index of the wakeword in the list
#(different wakewords activate different profiles?)
if keyword_index >= 0:
print("wakeword encountered")
self.start_new_conversation()
self.play('wake.mp3',PlayAndWait=False)
self.is_awake = True
return
finally:
#clean up
if audio_stream is not None:
audio_stream.close()
if pa is not None:
pa.terminate()
if porcupine is not None:
porcupine.delete()
def listen_passively(self, verbosity=False):
with sr.Microphone() as source:
if verbosity: print("listenting passively...", end="")
audio = self.ears.listen(source)
query = ''
try:
query = self.ears.recognize(audio)
if verbosity: print(f"user said: {query}")
except Exception as e:
if verbosity: print(str(e))
# if any keyword is present in the query return True (awake the assistant)
if any(keyword in query.split() for keyword in self.Keywords):
self.start_new_conversation()
self.play('wake.mp3',PlayAndWait=False)
self.is_awake = True
def record_to_file(self, file_path):
wf = wave.open(file_path, 'wb', )
wf.setnchannels(self.CHANNELS)
sample_width = pyaudio.PyAudio().get_sample_size(self.FORMAT)
wf.setsampwidth(sample_width)
frames = self.record()
wf.setframerate(self.RATE)
wf.writeframes(b''.join(frames))
wf.close()
def record(self):
# Your current setup
vad_rate = 32000
frame_length_ms = 20
vad_CHUNK = (vad_rate * frame_length_ms) // 1000
p = pyaudio.PyAudio()
vad_stream = p.open(format=self.FORMAT,
channels=1,
rate=vad_rate,
input=True,
frames_per_buffer=vad_CHUNK)
rec_stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
frames = []
try:
silence_time = 0
speaked = False
is_voice = False
print("listening...")
start_time = time.perf_counter()
while True:
rec_data = rec_stream.read(self.CHUNK)
frames.append(rec_data)
# detect voice activity
data = vad_stream.read(vad_CHUNK)
try:
is_voice = self.vad.is_speech(data, vad_rate)
except Exception as e:
print(f"Error during VAD: {e}")
# Calculate time since the last voice activity
if is_voice and (time.perf_counter()-start_time)>self.MIN_RECORDING_TIME:
speaked = True
silence_time = 0
else:
silence_time += frame_length_ms / 1000
# Print debugging information (useful for tuning sensitivity)
# Stop recording if silence duration exceeds the threshold or if the time limit is reached
if (silence_time > self.RESPONSE_TIME and speaked) or (time.perf_counter() - start_time > self.MAX_RECORDING_TIME):
break
if silence_time > self.MAX_RECORDING_TIME:
self.go_to_sleep()
break
time.sleep(frame_length_ms / 10000)
except KeyboardInterrupt:
print("Done recording")
except Exception as e:
print(str(e))
print(silence_time,self.RESPONSE_TIME,self.SLEEP_DELAY)
exit()
vad_stream.stop_stream()
vad_stream.close()
rec_stream.stop_stream()
rec_stream.close()
p.terminate()
return frames
@contextmanager
def suppress_stdout():
with open(os.devnull, "w") as devnull:
old_stdout = sys.stdout
sys.stdout = devnull
try:
yield
finally:
sys.stdout = old_stdout
================================================
FILE: Assistant/__init__.py
================================================
================================================
FILE: Assistant/get_audio.py
================================================
import whisper
import pyaudio
# CHUNK = 1024
# FORMAT = pyaudio.paInt16
# CHANNELS = 2
# RATE = 44100
# SILENCE_THRESHOLD = 1500
# convert audio content into text
def whisper_wav_to_text(audio_name, model=[], model_name=False, prior=None):
if isinstance(model_name, str):
print('loading model ', model_name)
model = whisper.load_model(model_name)
if model == []:
raise Exception("model cannot be unspecified")
print('listening to ',audio_name,'...')
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio(audio_name)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# detect the spoken language
try:
_, probs = model.detect_language(mel)
if not(prior is None):
filt_probs = {str(lan):probs.get(lan) for lan in prior}
probs = filt_probs
print(f"Detected language: {max(probs, key=probs.get)}")
detected_lang = str(max(probs, key=probs.get))
options = whisper.DecodingOptions(language=detected_lang)
except:
# model does not support multiple languages, default to English
print('language: en')
detected_lang = 'en'
options = whisper.DecodingOptions(language='en')
result = whisper.decode(model, mel, options)
# print the recognized text
print('\n[User]: '+ result.text)
return result.text, detected_lang
def get_device_channels():
p = pyaudio.PyAudio()
DEVICES = {}
for i in range(p.get_device_count()):
dev = p.get_device_info_by_index(i)
DEVICES[i] = dev['maxInputChannels']
return DEVICES
def detect_microphones():
p = pyaudio.PyAudio()
MICS = []
for i in range(p.get_device_count()):
dev = p.get_device_info_by_index(i)
if 'microphone' in dev['name'].lower():
MICS.append(i)
return MICS if len(MICS)>=1 else [0]
def get_devices():
p = pyaudio.PyAudio()
DEV = []
for i in range(p.get_device_count()):
DEV.append( p.get_device_info_by_index(i))
return DEV
================================================
FILE: Assistant/research_mode.py
================================================
# AGENT
from langchain import OpenAI, LLMChain, PromptTemplate
from langchain.llms import OpenAI
from langchain.agents import Tool, AgentExecutor, ZeroShotAgent
from langchain.memory import ConversationBufferMemory
from langchain.agents import initialize_agent, load_tools
from typing import Any
from Assistant.semantic_scholar.agent_tools import *
from Assistant.semantic_scholar.S2_tools import *
from langchain.agents import AgentType
class ResearchAssistant:
def __init__(self, current_conversation, workspace = None, index_name = 'paperquestioning'):
self.current_workspace = workspace
self.index_name = index_name
self.query_engine = None
self.current_conversation = current_conversation
self.ans = []
self.docs = []
if 'workspaces' not in os.listdir(os.getcwd()):
os.mkdir('workspaces')
print('\tinitializing Research Assistant but no workspace are available: begin a new search please')
elif len(os.listdir('workspaces'))>0:
self.boot_workspace(workspace)
print('\tResearch Assistant initialization done')
self.agent = generateResearchAgent(self, k=1)
def boot_workspace(self, workspace):
if os.path.isdir(workspace):
print('\tinitializing Research Assistant with Workspace directory: ', workspace)
# init vector store
init_attempts = 0
print(' ')
self.docs = load_workspace(workspace)
while True:
init_attempts +=1
try:
self.query_engine, self.Index = llama_query_engine(self.docs,pinecone_index_name=self.index_name)
break
except Exception as e:
print(f'initialization attempt {init_attempts} failed with exception {e}')
time.sleep(2)
if init_attempts<=3:continue
else: return None
# make wrappers to store results and info
def wrapper_find_papers_from_query(self, query):
try:
res = find_paper_from_query(query, 20)
text = ''
for result in res:
if not result['isOpenAccess']:continue
text += result['title']+'; paperId: '+result['paperId']
self.ans.append( f"{result['title']}: {result['paperId']}")
if len(text)==0: return "couldn't find any open access result"
return text
except Exception as e:
return f'error: {e}'
def load_pdf_to_pinecone(self, paths):
# read the pdf
if isinstance(paths, str): paths = [paths]
if isinstance(paths, list): raise Exception
for path in paths:
if not path.endswith('.pdf'): continue
content = readPDF(path)
doc = Document(
text = content,
doc_id = uuid.uuid4().hex
)
self.docs.append(doc)
# upload to Pinecone and synch index
self.Index.insert(document=doc)
# regresh the query engine
self.query_engine = self.Index.as_query_engine()
return
def PROTOCOL_begin_new_workspace(self, query):
# PRELIMINARY ASSESSMENT
prompt_template = "Do you really need to search for something on internet?: {query} \n Answer Yes or No"
llm = OpenAI(temperature=0)
llm_chain = LLMChain(
llm=llm,
prompt=PromptTemplate.from_template(prompt_template)
)
assessment = llm_chain.predict(query = query)
if 'yes' not in assessment.lower():
return 'what should be the topic of the workspace?'
# GOING WITH IT
# preprocessing
prompt_template = "Extract a search key from the following query: {query}"
llm = OpenAI(temperature=0)
llm_chain = LLMChain(
llm=llm,
prompt=PromptTemplate.from_template(prompt_template)
)
# extraction of a query
search_query = llm_chain.predict(query = query)
# post processing of extracted search-query
search_query = re.sub('[^0-9a-zA-Z]', ' ', search_query.lower())
if "search key" in search_query: search_query=search_query.replace("search key","").strip()
print('SEARCH QUERY: ', search_query)
self.current_workspace = PaperSearchAndDownload(query=search_query)
self.boot_workspace(self.current_workspace)
return f'new workspace created at {self.current_workspace}'
def wrapper_download_paper(self, id):
if 'cache' not in os.listdir(self.current_workspace): os.mkdir(os.path.join(self.current_workspace,'cache'))
ans = download_pdf_from_id(paperid= id, path= os.path.join(self.current_workspace, 'cache'))
update_workspace_dataframe(self.current_workspace, verbose = False)
pdf_paths = ans.split('\n')
self.load_pdf_to_pinecone(pdf_paths)
return ans
def wrapper_find_reccomendation(self, paperId):
return find_recommendations(paper=paperId, result_limit=5)
def find_in_papers(self, query):
attempt =0
while True:
try:
attempt +=1
answer = self.query_engine.query(query)
except Exception as e:
if attempt<=3: continue
return str(e)
return answer
# generate a Zero Shot React Agent with memory that looks K interactions behind
def generateResearchAgent(RA:ResearchAssistant, k:int):
findpapers = Tool(
name='Find from query',
description='find a paper from a query, title and/or other information.',
func= RA.wrapper_find_papers_from_query
)
download_ID = Tool(
name='Download ID',
description='download a paper from paperId. Take as input a paperId',
func=RA.wrapper_download_paper
)
peek = Tool(
name='glimpse pdf',
description="get paper information if available, Take as input a paper title",
func=glimpse_pdf
)
reccomend = Tool(
name='find reccomendations',
description='find similar paper from paperId. Take as input a paperId',
func=RA.wrapper_find_reccomendation
)
tools = [findpapers, download_ID, peek, reccomend]
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, human_prefix='user', ai_prefix='assistant')
# need to work on a custom LangChain llm model
prefix = """You are an assistant designed to browse scientific libraries, you have the following tools to complete the user requests:"""
suffix = """Begin!"
{chat_history}
Question: {input}
{agent_scratchpad}"""
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=["input", "chat_history", "agent_scratchpad"]
)
llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
# adding a window of memory:
memory = build_memory(chat_history = RA.current_conversation(), k=k)
return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory, early_stopping_method ='generate', max_iterations=20)
def build_memory(chat_history, k):
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, human_prefix='user', ai_prefix='assistant')
k = min(k, len(chat_history)//2)
if k==0 :return memory
if chat_history[-k]["role"] != 'user':
print('refreshing memory warning - considering last interaction only')
k=1
try:
for i in range(-k*2-1, -1, 2):
input = chat_history[i]['content']
output = chat_history[i+1]['content']
memory.save_context({"input":input}, {"output":output})
except:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, human_prefix='user', ai_prefix='assistant')
return memory
================================================
FILE: Assistant/semantic_scholar/S2_tools.py
================================================
import csv
import re
from time import time
import requests
import dotenv
import aspose.pdf as ap
dotenv.load_dotenv()
import argparse
import os
from requests import Session
from typing import Generator, Union
import subprocess
import urllib3
import json
urllib3.disable_warnings()
import refy
import pdftitle
from langchain.document_loaders import OnlinePDFLoader
import time
import arxiv
from pymed import PubMed
from .simple import Main
RESULT_LIMIT = 10
S2_API_KEY = os.environ['S2_API_KEY']
PAPER_FIELDS = 'paperId,externalIds,title,authors,year,abstract,isOpenAccess,openAccessPdf,influentialCitationCount,citationStyles,tldr,venue,journal'
def get_paper(session: Session, paper_id: str, fields: str = 'paperId,title', **kwargs) -> dict:
params = {
'fields': fields,
**kwargs,
}
headers = {
'x-api-key': S2_API_KEY,
}
with session.get(f'https://api.semanticscholar.org/graph/v1/paper/{paper_id}', params=params, headers=headers) as response:
response.raise_for_status()
return response.json()
def find_paper_from_query(query, result_limit=RESULT_LIMIT):
papers = None
while papers is None:
try:
while True:
rsp = requests.get('https://api.semanticscholar.org/graph/v1/paper/search',
params={'query': query, 'limit': result_limit, 'fields': PAPER_FIELDS})
if rsp.status_code == 429:
time.sleep(60)
continue
break
rsp.raise_for_status()
results = rsp.json()
total = results["total"]
if not total:
print('No matches found. Please try another query.')
return 'No matches found. Please try another query.'
papers = results['data']
filtered = []
for paper in (papers):
if paper['isOpenAccess']: filtered.append(paper)
if len(filtered)>=result_limit:break
except Exception as e:
print('\n!!!!!!!!\n')
print('ERROR OCCURRED: ',e)
print(rsp)
return rsp.status_code
print(f'Found {total} results. OpenAccess: {len(filtered)}.')
return papers
# Finds papers which are similar to an exisiting one
def find_recommendations(paper, result_limit = RESULT_LIMIT):
print(f"Looking for up to {result_limit} recommendations based on: {paper['title']}")
rsp = requests.get(f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper['paperId']}",
params={'fields': 'title,url,isOpenAccess', 'limit': result_limit})
rsp.raise_for_status()
results = rsp.json()
print_papers(results['recommendedPapers'])
return results['recommendedPapers']
def extract_title(path):
try:
title = pdftitle.get_title_from_file(path)
# remove non letter chars
title = re.sub('[^0-9a-zA-Z]', ' ', title)
# sometime the full text is returned instead of just the title. in that
# case use a coarse title detection
if len(title.split())>30: raise Exception
return title
except:
# ROUGH TITLE DETECTION
loader = OnlinePDFLoader(path)
data = loader.load()
text_content = ''
formatted_content = data[0].page_content.replace('\n\n', ' ')
text_content+=formatted_content
# CONSIDER THE FIRST 100 WORDS
# exclude single chars and remove puncts
title = ' '.join([word for word in text_content.split()[0:min(100,len(text_content.split()))] if (len(word)>1)])
title = re.sub('[^0-9a-zA-Z]', ' ', title)
# option 1: take the capital words contained in the first 100 words
title = ' '.join([word for word in title.split()[0:min(100,len(title.split()))] if (word.isupper() and len(word)>1)])
if len(title)>20: title = " ".join(title.split()[0:10])
# option 2: take the first 10 words
else:
title = [word for word in text_content.split()[0:100] if (len(word)>1)]
title = ' '.join( title[0:min(10,len(title))])
print(' > generated title: ', title)
if title=='': return []
return title
def find_paper_online(path):
# if every word of the original title is present in the result, return it
def same_title(title1, title2):
return (word in title2.lower().split() for word in title1.lower().split())
# OPEN AND EXTRACT PAPER TITLE
title = extract_title(path)
# LOOK IN OTHER PAPER DATABASES
# 1) scholar attempt
while True:
res = find_paper_from_query(title, result_limit=5)
if isinstance(res, int):
if res == 400: raise Exception
if res == 429:
time.sleep(60)
continue
break
if isinstance(res, list):
for article in res:
if same_title(title, article['title']): return article
# 2) arxiv attempt
search = arxiv.Search(
query=title,
id_list= [],
max_results=5,
)
res = search.results()
for article in res:
if same_title(article.title, title):
return article._raw
# 3) pubmed attempt
pubmed = PubMed(tool="MyTool", email="my@email.address")
res = pubmed.query(title, max_results=5)
for article in res:
art = json.loads(article.toJSON())
if same_title(title, art['title']):return art
# noting found :(
return
def print_papers(papers):
results = ''
for idx, paper in enumerate(papers):
results+= f"{idx} {paper['title']} {paper['url']}"
return results
def chunks(items, chunk_size):
return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
def fetch_paper_batch(paperid: list):
req = {'ids': [f'{id}' for id in paperid]}
# https://api.semanticscholar.org/api-docs/graph#tag/Paper-Data/operation/post_graph_get_papers
rsp = requests.post('https://api.semanticscholar.org/graph/v1/paper/batch',
params={'fields': PAPER_FIELDS},
json=req)
if rsp.status_code != 200:
return f'Problem fetching {req}: ' + rsp.text
return rsp.json()
def download_pdf_from_id(paperid, path=os.getcwd()):
try:
res = Main(paper_ids=paperid, dir=path)
print(res)
return res
except:
return f'error with {paperid}'
# add to PAPER.CSV semantic scolar entries from ID
def update_dataframe(incomplete, dest):
results = fetch_paper_batch(paperid= [item['paperId'] for item in incomplete])
if isinstance(results, str):
print(results)
print(f" input: {incomplete}")
return
pdf_des= dest
pdf_des = pdf_des[:-4] + '.pdf'
text = ''
with open(pdf_des, 'a+',encoding='utf-8') as f:
for paper in results:
try:
text += paper['title'].upper()+'\n'
if 'tldr' in paper.keys():
if paper['tldr'] is not None:text += paper['tldr']['text']+'\n'
text += paper['abstract']+'\n'
if 'summary' in paper.keys(): text += paper['summary']+'\n'
text += '\n\n'
except:
pass
write_to_pdf(text, pdf_des)
count = 0
# Read existing entries from the CSV file
existing_entries = set()
isFile = os.path.isfile(dest)
if not isFile:
with open(dest, 'w',encoding='utf-8') as fp:
csvfile = csv.DictWriter(fp, ['paperId', 'title', 'first_author', 'year', 'abstract','tldr','bibtex','influentialCitationCount','venue','journal','pages'])
csvfile.writeheader()
if isFile:
with open(dest, 'r',encoding='utf-8') as fp:
csvfile = csv.DictReader(fp)
for row in csvfile:
existing_entries.add(row['paperId'])
# Append new entries to the CSV file
with open(dest, 'a', encoding='utf-8') as fp:
csvfile = csv.DictWriter(fp, ['paperId', 'title', 'first_author', 'year', 'abstract','tldr','bibtex','influentialCitationCount','venue','journal','pages'])
for paper in results:
paperId = paper['paperId']
if paperId in existing_entries:
continue # Skip if the entry already exists
paper_authors = paper.get('authors', [])
journal_data = {}
if 'journal' in paper:
journal_data = paper.get('journal',[])
if journal_data is not None:
if 'name' not in journal_data: journal_data['name'] = ''
if 'pages' not in journal_data: journal_data['pages'] = ''
if paper.get('tldt',[]) != []:
tldr = paper['tldr']['text']
elif paper.get('summary',[]) != []:
tldr = paper['summary']
else:
tldr = paper['abstract']
csvfile.writerow({
'title': paper['title'],
'first_author': paper_authors[0]['name'] if paper_authors else '',
'year': paper['year'],
'abstract': paper['abstract'],
'paperId': paperId,
'tldr':tldr,
'bibtex':paper['citationStyles']['bibtex'] if paper['citationStyles']['bibtex'] else '',
'influentialCitationCount':paper['influentialCitationCount'],
'venue':paper['venue'],
'journal':journal_data['name'] if journal_data is not None else '',
'pages':journal_data['pages'] if journal_data is not None else '',
})
# except Exception as e:
# print('error adding paper: ',e, '\n',paper)
# paper['title']
# paper['year']
# paper['abstract']
# paper['citationStyles']['bibtex']
# if paper['tldr']: paper['tldr']
# if paper_authors: paper_authors[0]['name']
# quit()
count += 1
return f'Added {count} new results to {dest}'
def write_bib_file(csv_file, bib_file=None):
if bib_file is None:
bib_file = csv_file[:-4]+'.bib'
with open(csv_file, 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
with open(bib_file, 'w', encoding='utf-8') as output:
print(f'writing bibtex file at {bib_file}')
for row in reader:
bib_entry = create_bib_entry(row)
output.write(bib_entry + '\n\n')
def create_bib_entry(row):
paper_id = row['paperId']
title = row['title']
author = row['first_author']
year = row['year'].split('-')[0] # assuming format like 2023-03-24T15:46:10Z (arxiv use this)
journal_match = re.search(r"journal\s*=\s*{([^}]*)}", row['bibtex'])
if journal_match:
journal = journal_match.group(1)
else: journal = ''
pages_match = re.search(r"pages\s*=\s*{([^}]*)}", row['bibtex'])
if pages_match:
pages = pages_match.group(1)
else: pages = ''
abstract = replace_non_alphanumeric(row['abstract'])
# Generate the BibTeX entry
bib_entry = f"@ARTICLE{{{paper_id},\n"
bib_entry += f" title = \"{title}\",\n"
bib_entry += f" author = \"{author}\",\n"
bib_entry += f" abstract = \"{abstract}\",\n"
bib_entry += f" year = {year},\n"
bib_entry += f" journal = \"{journal}\",\n"
bib_entry += f" pages = \"{pages}\"\n"
bib_entry += "}"
return bib_entry
def replace_non_alphanumeric(string, replacement=' '):
pattern = r'[^a-zA-Z0-9]'
replaced_string = re.sub(pattern, replacement, string)
return replaced_string
def refy_reccomend(bib_path, number=20):
d = refy.Recomender(
bib_path, # path to your .bib file
n_days=30, # fetch preprints from the last N days
html_path=os.path.join(os.path.join(bib_path.replace('\\results\\papers.bib',''),'refy_suggestions'),"test.html"), # save results to a .csv (Optional)
N=number # number of recomended papers
)
def write_to_pdf(text, dest):
# Initialize document object
document = ap.Document()
# Add page
page = document.pages.add()
# Initialize textfragment object
text_fragment = ap.text.TextFragment(text)
# Add text fragment to new page
page.paragraphs.add(text_fragment)
# Save updated PDF
document.save(dest)
================================================
FILE: Assistant/semantic_scholar/__init__.py
================================================
#
================================================
FILE: Assistant/semantic_scholar/agent_tools.py
================================================
from contextlib import contextmanager
import uuid
import os
import tiktoken
from . import S2_tools as scholar
import csv
import sys
import requests
# pdf loader
from langchain.document_loaders import OnlinePDFLoader
## paper questioning tools
from llama_index import Document
from llama_index.vector_stores import PineconeVectorStore
from llama_index import GPTVectorStoreIndex, StorageContext, ServiceContext
from llama_index.embeddings.openai import OpenAIEmbedding
def PaperSearchAndDownload(query):
# make new workspace
if not os.path.exists( os.path.join(os.getcwd(),'workspaces') ): os.mkdir(os.path.join(os.getcwd(),'workspaces'))
workspace_dir_name = os.path.join(os.getcwd(),'workspaces',query.split()[0] + '_'+ str(uuid.uuid4().hex))
os.mkdir(workspace_dir_name)
os.mkdir(os.path.join(workspace_dir_name,'results'))
os.mkdir(os.path.join(workspace_dir_name,'refy_suggestions'))
os.environ['workspace'] = workspace_dir_name
# 1) search papers
print(' 1) Searching base papers')
papers = scholar.find_paper_from_query(query, result_limit=10)
if len(papers == 0):
papers = scholar.find_paper_from_query(query, result_limit=50)
scholar.update_dataframe(incomplete=papers, dest=os.path.join(workspace_dir_name, 'results','papers.csv'))
delete_duplicates_from_csv(csv_file=os.path.join(workspace_dir_name, 'results','papers.csv'))
# 2) Cross-reference reccomendation system:
# a paper is reccomended if and only if it's related to more than one paper
print('\n\n 2) Expanding with Scholar reccomendations')
counts = {}
candidates = {}
for paper in papers:
guesses = scholar.find_recommendations(paper)
for guess in guesses:
if not guess['isOpenAccess']: continue
candidates[guess['title']] = guess
if guess['title'] not in counts.keys(): counts[guess['title']] = 1
else: counts[guess['title']] += 1
# reccomend only papers that appeared more than once
reccomends = []
for key in counts:
if counts[key]>1: reccomends.append(candidates[key])
print(f'found {len(reccomends)} additional papers')
# update the csv
scholar.update_dataframe(incomplete= reccomends, dest=os.path.join(workspace_dir_name, 'results','papers.csv'))
delete_duplicates_from_csv(csv_file=os.path.join(workspace_dir_name, 'results','papers.csv'))
# download the papers (1/2)
print('downloading papers (1/2)')
with open(os.path.join(workspace_dir_name,'results','papers.csv'), 'r',encoding='utf-8') as fp:
csvfile = csv.DictReader(fp)
scholar.download_pdf_from_id(" ".join( row['paperId'] for row in csvfile), workspace_dir_name)
scholar.write_bib_file(csv_file=os.path.join(workspace_dir_name,'results','papers.csv'), bib_file=os.path.join(workspace_dir_name,'results','papers.bib'))
# expand further with refy reccomendendation system
print('\n\n 3) Expanding with Refy reccomendendation system')
print('this might take a while...')
scholar.refy_reccomend(bib_path=os.path.join(workspace_dir_name,'results','papers.bib'))
with open(os.path.join(workspace_dir_name, 'refy_suggestions', 'test.csv'), 'r',encoding='utf-8') as fp:
csvfile = csv.DictReader(fp)
for row in csvfile:
title = scholar.replace_non_alphanumeric(row['title'])
title = title.replace(" ","_")
save_path = os.path.join(workspace_dir_name,'refy_suggestions',(title+'.pdf'))
try:
download_paper(url=row['url'], save_path=save_path)
except:
print(f'couldn t download {row}')
return f'{os.path.join(os.getcwd(), workspace_dir_name)}'
import urllib
def download_paper(url, save_path=f"{uuid.uuid4().hex}.pdf"):
success_string = f"paper saved successfully at {os.path.join(os.path.abspath(save_path))}"
if url.endswith('.pdf'):
urllib.request.urlretrieve(url, save_path)
return success_string
if 'doi' in url:
doi = paper_id = "/".join(url.split("/")[-2:])
# Construct the Crossref API URL
print(doi)
doi_url = f"https://doi.org/{doi}"
# Send a GET request to the doi.org URL
response = requests.get(doi_url, allow_redirects=True)
# Check if the request was successful
if response.status_code == 200:
# Extract the final URL after redirection
url = response.url
if 'arxiv' in url:
# URL del paper su arXiv
# Ottieni l'ID del paper dall'URL
paper_id = url.split("/")[-1]
# Costruisci l'URL di download del paper
pdf_url = f"http://arxiv.org/pdf/{paper_id}.pdf"
# Scarica il paper in formato PDF
urllib.request.urlretrieve(pdf_url, save_path)
return success_string
else:
if '/full' in url:
urllib.request.urlretrieve(url.replace('/full','/pdf'))
return success_string
if 'plos.org' in url:
final_url = url.replace('article?', 'article/file?')
urllib.request.urlretrieve(final_url, save_path)
return success_string
return f'\nfailed to download {url}'
def download_bibtex_library(csv_path):
with open(csv_path, 'r',encoding='utf-8') as fp:
csvfile = csv.DictReader(fp)
for row in csvfile:
title = scholar.replace_non_alphanumeric(row['title'])
title = title.replace(" ","-")
save_path = os.path.join(os.path.join(csv_path, '..', title+'.pdf'))
try:
download_paper(url=row['url'], save_path=save_path)
except:
try:
download_paper(url=row['url']+'.pdf', save_path=save_path)
except:
print(f'couldn t download {row}')
def generate_chunks(text, CHUNK_LENGTH = 4000):
enc = tiktoken.encoding_for_model("gpt-4")
tokens = enc.encode(text)
token_chunks = [tokens[i:i + CHUNK_LENGTH] for i in range(0, len(tokens), CHUNK_LENGTH)]
word_chunks = [enc.decode(chunk) for chunk in token_chunks]
return word_chunks
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
import langid
import time
# def process_pdf_folder(folder_path):
# if not os.path.exists(folder_path):
# return 'the folder does not exist, check your spelling'
# for item in os.listdir(folder_path):
# if not item.endswith('.pdf'):continue
# with open(os.path.join(folder_path,'SUMMARY.txt'), 'a', encoding='UTF-8') as write_file:
# write_file.write(item)
# write_file.write("\n\n\n")
# txt = summarize_pdf(item, model='Vicuna')
# try:
# write_file.write(txt)
# except:
# print(txt)
# with open(os.path.join(folder_path,'SUMMARY.txt'), 'r', encoding='UTF-8') as read_file:
# return read_file.read()
# # def summarize_pdf(pdf_path, model= None):
# text = readPDF(pdf_path)
# # according to the TLDR Model, consider smaller chunks
# text_chunks = generate_chunks(text, 700)
# if model is not None:
# summarizer = LocalSearchEngine(tldr_model=model)
# summary=''
# for chunk in text_chunks:
# summary += summarizer.tldr(chunk)
# return summary
def get_result_path(path, exclude = []):
for item in os.listdir(path):
if item == 'papers.csv':
return os.path.join(path, item)
if os.path.isdir(os.path.join(path, item)) and item not in exclude:
res = get_result_path(os.path.join(path, item))
if res: return res
return
def get_workspace_titles(workspace_name):
csv_file_path = get_result_path(workspace_name)
papers_available = []
with open(csv_file_path, 'r', encoding='utf-8') as file:
csv_file = csv.DictReader(file)
for row in csv_file:
papers_available.append(row['title'])
return papers_available
import re
def same_title(title1, title2):
try:
title1 = re.sub(r'[^a-zA-Z]', ' ', title1)
title2 = re.sub(r'[^a-zA-Z]', ' ', title2)
except:
return False
words1 = set(title1.lower().split())
words2 = set(title2.lower().split())
return words1 == words2 or words1 <= words2 or words1 >= words2
def glimpse_pdf(title):
# find papers.csv in workspace
for workspace_name in os.listdir('workspaces'):
csv_file_path = get_result_path(workspace_name)
if csv_file_path is None: return 'no paper found'
with open(csv_file_path, 'r', encoding='utf-8') as file:
csv_file = csv.DictReader(file)
for row in csv_file:
if same_title(row['title'], title): return f"{row['title']}, paperId: {row['paperId']}, summary: {row['abstract']}"
return f'\nno paper found with title {title}'
def count_tokens(text):
enc = tiktoken.encoding_for_model("gpt-4")
tokens = enc.encode(text)
return len(tokens)
def readPDF(pdf_path):
loader = OnlinePDFLoader(pdf_path)
data = loader.load()
text_content = ''
for page in data:
formatted_content = page.page_content.replace('\n\n', ' ')
text_content+=formatted_content
return text_content
def get_pdf_path(dir, exclude=[]):
paths = []
for item in os.listdir(dir):
itempath = os.path.join(dir,item)
if item.endswith('.pdf'): paths.append(itempath)
if os.path.isdir(itempath)and item not in exclude:
subpaths = get_pdf_path(itempath)
for i in subpaths: paths.append(i)
return paths
def delete_duplicates_from_csv(csv_file):
print('verifying duplicates...')
to_delete = []
def delete_csv_row_by_title(csv_file, title):
# Read the CSV file and store rows in a list
with open(csv_file, 'r',encoding='UTF-8') as file:
reader = csv.DictReader(file)
rows = list(reader)
# Find the row index with the matching title
row_index = None
for index, row in enumerate(rows):
if row['title'] == title:
row_index = index
break
# If no matching title is found, return
if row_index is None:
print(f"No row with title '{title}' found.")
return
# Remove the row from the list
del rows[row_index]
# Write the updated rows back to the CSV file
with open(csv_file, 'w', newline='',encoding='UTF-8') as file:
fieldnames = reader.fieldnames
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
with open(csv_file, 'r', encoding='UTF-8') as file:
DELETED = 0
reader = csv.DictReader(file)
rows = list(reader)
entries = set()
for row in rows:
if row['title']=='' or row['title'] is None: continue
if row['title'] not in entries:entries.add(row['title'])
else:
DELETED+=1
to_delete.append(row['title'])
for title in to_delete: delete_csv_row_by_title(csv_file, title=title)
print(f"Deleted {DELETED} duplicates")
return
def update_workspace_dataframe(workspace, verbose = True):
ADDED = 0
# find results.csv
csv_path = get_result_path(workspace)
# get titles in csv
titles = get_workspace_titles(workspace)
# get local papers path
paths = get_pdf_path(workspace, exclude='refy_suggestions')
# adding new to csv:
for path in paths:
exists = False
# extract the title from the local paper
title = scholar.extract_title(path)
for t in titles:
if same_title(t,title): exists = True
# add it to dataframe if it was not found on the DF
if not exists:
if verbose: print(f"\nnew paper detected: {title}")
# find it with online
paper = scholar.find_paper_online(path)
if paper :
if verbose: print(f"\t---> best match found online: {paper['title']} " )
for t in titles:
if same_title(paper['title'], title):
if verbose: print(f"\t this paper is already present in the dataframe. skipping")
else:
if verbose: print(path, '-x-> no match found')
continue
with open(csv_path, 'a', encoding='utf-8') as fp:
areYouSure = True
for t in titles:
if same_title(t,paper['title']): areYouSure =False
if not areYouSure:
if verbose: print(f"double check revealed that the paper is already in the dataframe. Skipping")
continue
if verbose: print(f"\t---> adding {paper['title']}")
ADDED +=1
paper_authors = paper.get('authors', [])
journal_data = {}
if 'journal' in paper:
journal_data = paper.get('journal',[])
if journal_data is not None:
if 'name' not in journal_data: journal_data['name'] = ''
if 'pages' not in journal_data: journal_data['pages'] = ''
if paper.get('tldr',[]) != []:tldr = paper['tldr']['text']
elif paper.get('summary',[]) != []:tldr = paper['summary']
elif 'abstract' in paper:tldr = paper['abstract']
else: tldr = 'No summary available'
if 'year' in paper:
year = paper['year']
elif 'updated' in paper:year = paper['updated']
else: year = ''
if 'citationStyles' in paper:
if 'bibtex' in paper['citationStyles']: citStyle = paper['citationStyles']['bibtex']
else: citStyle = paper['citationStyles'][0]
else: citStyle = ''
csvfile = csv.DictWriter(fp, ['paperId', 'title', 'first_author', 'year', 'abstract','tldr','bibtex','influentialCitationCount','venue','journal','pages'])
try:
csvfile.writerow({
'title': paper['title'],
'first_author': paper_authors[0]['name'] if paper_authors else '',
'year': year,
'abstract': paper['abstract'] if 'abstract' in paper else '',
'paperId': paper['paperId'] if 'paperId' in paper else '',
'tldr':tldr,
'bibtex':citStyle,
'influentialCitationCount': paper['influentialCitationCount'] if 'influentialCitationCount' in paper else '0',
'venue':paper['venue'] if 'venue' in paper else '',
'journal':journal_data['name'] if journal_data is not None else '',
'pages':journal_data['pages'] if journal_data is not None else '',
})
except Exception as e:
if verbose: print('could not add ', title, '\n',e)
# delete dupes if present
if verbose: print(f"\n\nCSV UPDATE: Added {ADDED} new papers")
# clean form dupes
delete_duplicates_from_csv(csv_path)
# update bib
scholar.write_bib_file(csv_path)
return
def load_workspace(folderdir):
docs =[]
for item in os.listdir(folderdir):
if item.endswith('.pdf'):
print(f' > loading {item}')
with suppress_stdout():
content = readPDF(os.path.join(folderdir, item))
docs.append(Document(
text = content,
doc_id = uuid.uuid4().hex
))
if item =='.'or item =='..':continue
if os.path.isdir( os.path.join(folderdir,item) ):
sub_docs = load_workspace(os.path.join(folderdir,item))
for doc in sub_docs:
docs.append(doc)
return docs
# List paths of all pdf files in a folder
def list_workspace_elements(folderdir):
docs =[]
for item in os.listdir(folderdir):
if item.endswith('.pdf'):
docs.append(rf"{os.path.join(folderdir,item)}")
if item =='.'or item =='..':continue
if os.path.isdir( os.path.join(folderdir,item) ):
sub_docs = list_workspace_elements(os.path.join(folderdir,item))
for doc in sub_docs:
docs.append(doc)
return docs
def llama_query_engine(docs:list, pinecone_index_name:str):
pinecone.init(
api_key= os.environ['PINECONE_API_KEY'],
environment= os.environ['PINECONE_API_ENV']
)
# Find the pinecone index
if pinecone_index_name not in pinecone.list_indexes():
# we create a new index
pinecone.create_index(
name=pinecone_index_name,
metric='dotproduct',
dimension=1536 # 1536 dim of text-embedding-ada-002
)
index = pinecone.Index(pinecone_index_name)
# init it
vector_store = PineconeVectorStore(pinecone_index=index)
time.sleep(1)
# setup our storage (vector db)
storage_context = StorageContext.from_defaults(
vector_store=vector_store
)
embed_model = OpenAIEmbedding(model='text-embedding-ada-002', embed_batch_size=100)
service_context = ServiceContext.from_defaults(embed_model=embed_model)
# populate the vector store
LamaIndex = GPTVectorStoreIndex.from_documents(
docs, storage_context=storage_context,
service_context=service_context
)
print('PINECONE Vector Index initialized:\n',index.describe_index_stats())
# init the query engine
query_engine = LamaIndex.as_query_engine()
return query_engine, LamaIndex
@contextmanager
def suppress_stdout():
with open(os.devnull, "w") as devnull:
old_stdout = sys.stdout
sys.stdout = devnull
try:
yield
finally:
sys.stdout = old_stdout
================================================
FILE: Assistant/semantic_scholar/simple.py
================================================
#!/usr/bin/env python3
import dotenv
dotenv.load_dotenv()
import re
import argparse
import os
from requests import Session
from typing import Generator, Union
import urllib3
urllib3.disable_warnings()
S2_API_KEY = os.environ['S2_API_KEY']
def get_paper(session: Session, paper_id: str, fields: str = 'paperId,title', **kwargs) -> dict:
params = {
'fields': fields,
**kwargs,
}
headers = {
'x-api-key': S2_API_KEY,
}
with session.get(f'https://api.semanticscholar.org/graph/v1/paper/{paper_id}', params=params, headers=headers) as response:
response.raise_for_status()
return response.json()
def download_pdf(session: Session, url: str, path: str, user_agent: str = 'requests/2.0.0'):
# send a user-agent to avoid server error
headers = {
'user-agent': user_agent,
}
# stream the response to avoid downloading the entire file into memory
with session.get(url, headers=headers, stream=True, verify=False) as response:
# check if the request was successful
response.raise_for_status()
if response.headers['content-type'] != 'application/pdf':
raise Exception('The response is not a pdf')
with open(path, 'wb') as f:
# write the response to the file, chunk_size bytes at a time
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
def download_paper(session: Session, paper_id: str, directory: str = 'papers', user_agent: str = 'requests/2.0.0') -> Union[str, None]:
try:
directory = os.environ['workspace']
except:
pass
paper = get_paper(session, paper_id, fields='paperId,title,isOpenAccess,openAccessPdf')
# check if the paper is open access
if not paper['isOpenAccess']:
return None
paperId: str =re.sub(r'\W+', '', paper['title']).encode("utf-8").decode("utf-8")
pdf_url: str = paper['openAccessPdf']['url']
pdf_path = os.path.join(directory, f'{paperId}.pdf')
if os.path.isfile(pdf_path):
return None
# create the directory if it doesn't exist
os.makedirs(directory, exist_ok=True)
# check if the pdf has already been downloaded
if not os.path.exists(pdf_path):
download_pdf(session, pdf_url, pdf_path, user_agent=user_agent)
return pdf_path
def download_papers(paper_ids: list[str], directory: str = 'papers', user_agent: str = 'requests/2.0.0') -> Generator[tuple[str, Union[str, None, Exception]], None, None]:
# use a session to reuse the same TCP connection
with Session() as session:
for paper_id in paper_ids:
try:
yield paper_id, download_paper(session, paper_id, directory=directory, user_agent=user_agent)
except Exception as e:
yield paper_id, e
def main(args: argparse.Namespace) -> None:
for paper_id, result in download_papers(args.paper_ids, directory=args.directory, user_agent=args.user_agent):
if isinstance(result, Exception):
return f"Failed to download '{paper_id}': {type(result).__name__}: {result}"
elif result is None:
return f"'{paper_id}' is not open access"
else:
return f"Downloaded '{paper_id}' to '{result}'"
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--directory', '-d', default='papers')
parser.add_argument('--user-agent', '-u', default='requests/2.0.0')
parser.add_argument('paper_ids', nargs='+', default=[])
args = parser.parse_args()
main(args)
def Main(paper_ids=[], dir='papers', user_agent = 'requests/2.0.0', ):
outcome = ''
if isinstance(paper_ids, str):
if ',' in paper_ids:
paper_ids = paper_ids.split(',')
else:
paper_ids = paper_ids.split()
paper_ids = (id.strip() for id in paper_ids)
for paper_id, result in download_papers(paper_ids, directory=dir, user_agent=user_agent):
if isinstance(result, Exception):
outcome += f"Failed to download '{paper_id}': {type(result).__name__}: {result}\n"
elif result is None:
outcome += f"couldn't download '{paper_id} because it is not open access\n"
else:
outcome += f"{result}\n"
return outcome
================================================
FILE: Assistant/tools.py
================================================
# imports for Local Search Engine
import openai
import os
import pandas as pd
import numpy as np
from openai.embeddings_utils import distances_from_embeddings, cosine_similarity
from tqdm import tqdm
import ast
from . import webui
# import for Translator
import regex as re
import langid
from textblob import TextBlob
try: import translators as ts
except: print('could not import translators package')
import argostranslate.package
import argostranslate.translate
import math
import time
import collections
"""
AssistantChat: dictionary on steroids.
"""
class AssistantChat(collections.MutableSequence):
def __init__(self, begin:list, *args):
self.body = begin
self.filename= None
self.extend(list(args))
def is_saved(self):
return True if self.filename != None else False
def insert(self, i, v):
self.body.insert(i, v)
def append(self, item):
self.body.append(item)
def __call__(self):
return self.body
def __len__(self): return len(self.body)
def __getitem__(self, i): return self.body[i]
def __delitem__(self, i): del self.body[i]
def __setitem__(self, i, v):
self.body[i] = v
def __str__(self):
return str(self.body)
"""
Translator:
performs basic translation opration using ChatGPT.
Setting temperature to 0 allows better raw results
"""
"""
options:
- gpt-3.5-turbo: reasonably fast, online, requires openai credit usage
- translators 5.6.3 lib: online, excellent, long lags might occcur
- [default] argostranslator: fast, offline
"""
class Translator:
def __init__(self, model="argostranslator", **kwargs):
POSSIBLE_MODELS = ['argostranslator','gpt-3.5-turbo', 'translators']
if model not in POSSIBLE_MODELS:
raise Exception('this Translation model is not available')
self.DEFAULT_CHAT = [{"role": "system",
"content": "You are a translator. You recieve text and target language as inputs and translate the text to the target language"}]
self.body = None
self.model = model
langs = kwargs['translator_languages']
self.languages = langs
# Download and install Argos Translate packages
argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()
langid.set_languages(langs)
for i in range(len(langs)):
for j in range(len(langs)):
if langs[i]==langs[j]: continue
try:
package_to_install = next(
filter(
lambda x: x.from_code == langs[i] and x.to_code == langs[j], available_packages
)
)
except:
print(f'failed to add {langs[i]} => {langs[j]}')
print(f'downloading Argos Translate Language packages...')
try:
argostranslate.package.install_from_path(package_to_install.download())
except:
pass
def translate(self, input, to_language, from_language=None):
if from_language == to_language: return input
if from_language == None:
from_language = langid.classify(input)[0]
if self.model=="gpt-3.5-turbo":
self.body = self.DEFAULT_CHAT
self.body.append({"role":"user", "content":f"translate in {to_language}:'{input}'"})
try:
API_response = openai.ChatCompletion.create(
model=self.model,
temperature=0,
messages=self.body)
except Exception as e:
print(f"couldn't translate {self.body[-1]}")
print(e)
return input
return API_response['choices'][0]['message']['content']
if self.model=='translators':
try:
res = ts.translate_text(input, translator='google', to_language=to_language, from_language=from_language)
except:
res = input
self.model = 'argostranslator'
print('translation using translators switching to argostranslate')
return res
if self.model == 'argostranslator':
try:
res = argostranslate.translate.translate(input, from_code=from_language, to_code=to_language)
except:
print(f"translation using argostranslate from: {from_language} - to -> {to_language} Failed")
print(input)
res= input
return res
"""
LocalSearchEngine:
- Looks for files in a foder;
- extracts information;
- create high value contents that allow for accurate search;
to be implemented:
- extend reserarch to .pdf and .jpeg (w/ ChatGPT4)
- extends also to videos;
- extends also to scientific papers;
"""
class LocalSearchEngine:
def __init__(self,
embed_model = "text-embedding-ada-002",
tldr_model = "gpt-3.5-turbo",
translator_model = "argostranslator",
translator_languages = ['en','it','es'],
default_dir = os.path.realpath(os.path.join(os.getcwd(),'saved_chats')),
irrelevancy_th=0.8):
self.translate_engine = Translator(model=translator_model, translator_languages=translator_languages)
self.tldr_model = tldr_model
self.embed_model = embed_model
self.default_dir = default_dir
self.irrelevancy_threshold = irrelevancy_th
def compute_similarity(self, key, text):
if type(key)==str: key_embedding = self.compute_embeds(key)
else: key_embedding = key
if type(text)==str: query_embedding =self.compute_embeds(text)
else: query_embedding = text
similarity = cosine_similarity(key_embedding, query_embedding)
return similarity
def accurate_search(self, key, path=None, n=-1, from_csv=False):
if path is None:
path = self.default_dir
print('\n')
if 'DATAFRAME.csv' not in os.listdir(path):
print('> > DATAFRAME.csv not detected building a new one')
pd.DataFrame({'file_names':['DATAFRAME.csv'], 'similarity':[0],"tags":[None]}).to_csv(os.path.join(path, 'DATAFRAME.csv'))
if isinstance(key, list) or isinstance(key, tuple):
key = " ".join(key)
# USE EXISTING DATAFRAME TO MAKE SEARCH FASTER (skip tag generation)
if from_csv:
DataFrame = pd.read_csv(os.path.join(path,'DATAFRAME.csv'))
fnames = DataFrame["file_names"]
tags = DataFrame["tags"]
embeds = DataFrame["embeddings"]
if len(fnames)!=len(os.listdir(path)):
print('> dataset not updated. Updating it now...')
self.produce_folder_tags() ### I should add a parameter to specify HugginFaceHub (free) embeddings or OpenAI ones ($)
print('> Analyzing DataFrame:')
results = []
topics = []
key_embed = {}
for lang in self.translate_engine.languages:
transl_key = self.translate_engine.translate(input=key, to_language=langid.classify(lang)[0], from_language=langid.classify(key)[0])
print(f'> > computing key embedding in {lang} language')
key_embed[lang]= self.compute_embeds(transl_key)
for i in tqdm(range(len(fnames))):
if not(fnames[i].endswith('.txt')):
results.append(0)
topics.append('None')
continue
# extract tags associated to the file
file_tags = tags[i]
topics.append(file_tags)
# extract and parse the saved embeddings
file_embeds = ast.literal_eval( embeds[i] ) # from "[a, b, c,]" to [a, b, c]
# take the key embedding from the same language (more accurate)
key_embedding = key_embed[langid.classify(file_tags)[0]]
done=False
while not(done):
try:
relevance = self.compute_similarity(file_embeds, key_embedding)
done=True
except Exception as e:
print(e)
results.append(relevance)
if n==-1: n=len(fnames)
df = pd.DataFrame({'file_names':fnames, 'similarity':results,"tags":topics})
df = df.sort_values(by='similarity', ascending=False)
df = df.reset_index(drop=True)
return df.head(n)
def produce_folder_tags(self, path=None):
if path is None:
path = self.default_dir
if ('DATAFRAME.csv' in os.listdir(path)):
print('> > DataFrame existing')
else:
print('> > Creating empty DataFrame')
pd.DataFrame(columns=['file_names', 'tags', 'embeddings']).to_csv(os.path.join(path,'DATAFRAME.csv'))
existing_df = pd.read_csv(os.path.join(path, 'DATAFRAME.csv'))
fnames = os.listdir(path)
embeds = []
topics = []
n_updates = 0
for filename in fnames:
# process text files only
if not(filename.endswith('.txt')):
embeds.append(math.nan)
topics.append('NaN')
continue
# don't repeat calculation if the file has already been processed
has_tags = len(existing_df['tags'][existing_df["file_names"]==filename])>=1
try:
has_embeds = len(existing_df['embeddings'][existing_df["file_names"]==filename].to_list()[0]) >5
except:
has_embeds = False
f = open(os.path.join(path,filename), 'r')
text = f.read()
if count_tokens(text)>4096:
# keep 2000 words only
text = " ".join(text.split()[0:2000])
if has_tags:
tags= existing_df['tags'][existing_df["file_names"]==filename].to_list()[0]
topics.append(tags)
else:
n_updates +=1
print(f'> > {filename}: extracting topics')
done= False
while not(done):
try:
tags = self.extract_tags(text)
done= True
except:
print('> > system overloaded, waiting 5 sec')
time.sleep(5)
topics.append(tags)
if has_embeds:
embeds.append(existing_df['embeddings'][existing_df["file_names"]==filename].to_list()[0])
else:
n_updates +=1
print(f'> > {filename}: processing embeddings')
done = False
while not(done):
try:
embedding = self.compute_embeds(tags)
done= True
except:
print('> > system overloaded, waiting 5 sec')
time.sleep(5)
embeds.append(embedding)
df = pd.DataFrame({'file_names':fnames, 'tags':topics, 'embeddings':embeds})
df.to_csv(os.path.join(path,'DATAFRAME.csv'), index=False)
df = df.reset_index(drop=True)
print(f"> > # UPDATES applied:{n_updates}")
return df
def extract_tags(self, text):
text = text.split('user:')
text = "".join(text[1:])
chat = [{"role": "system",
"content": "You recieve text and extract up to 10 different topic covered in the text. You output the topics separated by a comma (,)"}]
chat.append({"role": "user", "content":f"extract tags:{text}"})
API_response = openai.ChatCompletion.create(
model=self.tldr_model,
temperature=0,
messages=chat)
output = API_response['choices'][0]['message']['content']
if ':' in output:
output = output.split(':')
output = "".join(output[1:])
return output
# ADD Free alternative (Huggingface Embeds)
def compute_embeds(self, words):
return openai.Embedding.create(input=words, engine=self.embed_model)['data'][0]['embedding']
def DaVinci_tldr(self, text):
response = openai.Completion.create(
model="text-davinci-003",
prompt=f"{text}\n\nTl;dr",
temperature=0,
max_tokens=200,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0
)
return response['choices'][0]["text"]
def tldr(self, text, to_language=None, with_model = ''):
if self.tldr_model == 'gpt-3.5-turbo'or with_model=='gpt-3.5-turbo':
text = text.replace('\n',' ')
if to_language != None:
context =f'tldr in {to_language}:'
CHAT = [{"role": "system", "content":context},
{"role": "user", "content":f"'{text}'"}]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
temperature=0,
max_tokens=200,
messages=CHAT)
try:
return response['choices'][0]['message']['content']
except:
pass
else:
return self.DaVinci_tldr(text)
if self.tldr_model == 'Vicuna' or with_model=='Vicuna':
try:
webui.set_text_gen_params(temperature=0.1)
result = webui.oobabooga_textgen(prompt=f'Text Summarizer [Question]: summarize the following text: {text}\n[Answer]:')
postprocessed = webui.post_process(result)
return postprocessed
except IndexError as e:
return result
except Exception as e:
print(e)
return ''
"""
OnlineSearchEngine:
to be implemented:
- allows to extract content from the internet with http requests;
- provide context to the VirtualAssistant
- find a way to trigger online search
"""
class OnlineSearchEngine:
# work in progress
pass
"""
MISCELLANEOUS FUNCTIONS
"""
def count_tokens(vCountTokenStr):
# Tokenize the input string
blob = TextBlob(vCountTokenStr)
tokens = blob.words
# Count the number of tokens
num_tokens = len(tokens)
return num_tokens
def parse_conversation(string_chat):
split1_chat = string_chat.split('user:')
rebuilt = []
for item in split1_chat:
if 'system:' in item:
rebuilt.append({"role":"system", "content":f"{item.split('ststem:')[-1]}"})
if 'assistant:' in item:
spl_item = item.split("assistant:")
rebuilt.append({"role":"user", "content":f"{spl_item.pop(0)}"})
while len(spl_item)>=1:
rebuilt.append({"role":"assistant", "content":f"{spl_item.pop(0)}"})
return rebuilt
def take_last_k_interactions(chat, max_tokens=4000):
n_tokens = 0
interactions = []
for item in chat:
n_tokens += count_tokens(item['content'])
if n_tokens>= max_tokens:
return interactions
interactions.append(item)
================================================
FILE: Assistant/voice.py
================================================
# imports
import pyttsx3
from ibm_watson import TextToSpeechV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from TTS.api import TTS
import os
import elevenlabslib
from contextlib import contextmanager
import pygame
from pydub import AudioSegment
import io
import sys
import langid
class Voice:
def __init__(self, languages, **kwargs):
# IBM CLOUD
try:
print('Authorizing IBM Cloud:')
url = kwargs['ibm_url']
apikey = kwargs['ibm_api']
# Setup Service
print(' 1/3: Setting up cloud authenticator...')
authenticator = IAMAuthenticator(apikey)
# New tts service
print(' 2/3: Setting up text-to-speech...')
tts = TextToSpeechV1(authenticator=authenticator)
# set serive url
print(' 3/3: Setting up cloud service ...')
tts.set_service_url(url)
print(' ✓ service established\n')
self.tts_service = tts
except:
print('IBM authentication failed')
if 'elevenlabs_api' in kwargs:
try:
eleven_labs_user = elevenlabslib.ElevenLabsUser(kwargs['elevenlabs_api'])
if 'elevenlabs_voice' in list(kwargs.keys()):
if kwargs['elevenlabs_voice'] in (voice.initialName for voice in eleven_labs_user.get_all_voices()):
self.elevenlabs_voice = eleven_labs_user.get_voices_by_name(kwargs['elevenlabs_voice'])[0]
except:
print('Couldn t connect with Elevenlabs')
# <to do: initiate Jarvis cloned voice if available and disable TTS>
# PYTTSX3 for backup plan
engine = pyttsx3.init()
# SYNTHETIC VOICES
# CoquiAI - coqui-ai/TTS (https://github.com/coqui-ai/tts)
synth = TTS(model_name=os.path.join("tts_models/multilingual/multi-dataset/your_tts"), progress_bar=False, gpu=True)
self.languages = languages
self.write_dir = kwargs['write_dir']
self.path = kwargs['voice_id']
print('cloning voice form:',self.path)
self.synthetic_voice = synth
self.offline = engine
def speak(self, text, VoiceIdx, mode, elevenlabs=False, IBM=False):
## delete old last_aswer.wav to avoid conflicts
if os.path.exists((self.write_dir, "last_answer.wav")): os.remove((self.write_dir, "last_answer.wav"))
## generate the speech: last_answer.wav
if mode == 'online':
if elevenlabs==True:
if VoiceIdx == 'en':
try:
audio = self.elevenlabs_voice.generate_audio_bytes(text)
audio = AudioSegment.from_file(io.BytesIO(audio), format="mp3")
audio.export(os.path.join(self.write_dir, "last_answer.wav"), format="wav")
except Exception as e:
print(f'Elevenlabs credit might have ended. {e}')
raise Exception
if VoiceIdx == 'jarvis':
# to do: use voice duplication from elevenlabs
print('(ElevenLabs Jarvis voice not yet available)')
raise Exception()
elif IBM==True:
with open(os.path.join(self.write_dir, "last_answer.wav"),'wb') as audio_file:
try:
if VoiceIdx=='jarvis':VoiceIdx='en'
res = self.tts_service.synthesize(text, accept='audio/wav', voice=get_ibm_voice_id(VoiceIdx)).get_result()
audio_file.write(res.content)
except:
print('(IBM credit might have ended)')
raise Exception
if mode == 'offline':
if VoiceIdx == 'jarvis' and langid.classify(text)[0]=='en':
LangIdx = 'en'
print(self.path, LangIdx)
self.synthetic_voice.tts_to_file(text=text, speaker_wav=self.path[LangIdx], language=LangIdx, file_path=os.path.join(self.write_dir, 'last_answer.wav'))
""" Idea for multiple language Text-To-Speech: dictionaries
if VoiceIdx == 'other-language':
self.synthetic_voice['other-language'].tts_to_file(text=text, speaker_wav=self.path, language="en", file_path=os.path.join(self.DIRECTORIES['SOUND_DIR'], 'last_answer.wav'))
"""
else:
LangIdx = langid.classify(text)[0]
self.offline = self.change_offline_lang(lang_id=LangIdx)
self.offline.say(text)
self.offline.runAndWait()
return
# play the generated speech:
if pygame.mixer.get_init() is None:pygame.mixer.init()
pygame.mixer.music.load(os.path.join(self.write_dir, 'last_answer.wav'))
pygame.mixer.music.set_volume(0.5)
pygame.mixer.music.play()
while(pygame.mixer.music.get_busy()): pass
return
def change_offline_lang(self, lang_id):
engine = pyttsx3.init()
try:
for voice in self.offline.getProperty('voices'):
if self.languages[lang_id] in voice.name:
engine.setProperty('voice', voice.id)
return engine
return engine
except Exception as e:
print('error while switching to lang: ',lang_id,e)
return engine
# know more at: https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices
def get_ibm_voice_id(VoiceIdx):
voices={
'ar':'ar-MS_OmarVoice',
'zh':'zh-CN_LiNaVoice',
'zh':'zh-CN_WangWeiVoice',
'zh':'zh-CN_ZhangJingVoice',
'cz':'cs-CZ_AlenaVoice',
'nl':'nl-BE_AdeleVoice',
'nl':'nl-BE_BramVoice',
'nl':'nl-NL_EmmaVoice',
'nl':'nl-NL_LiamVoice',
'nl':'nl-NL_MerelV3Voice',
'en':'en-GB_CharlotteV3Voice',
'en':'en-GB_JamesV3Voice',
'en':'en-GB_KateV3Voice',
'en':'en-US_AllisonV3Voice',
'en':'en-US_EmilyV3Voice',
'en':'en-US_HenryV3Voice',
'en':'en-US_KevinV3Voice',
'en':'en-US_LisaV3Voice',
'en':'en-US_MichaelV3Voice',
'en':'en-US_OliviaV3Voice',
'fr':'fr-CA_LouiseV3Voice',
'fr':'fr-FR_NicolasV3Voice',
'fr':'fr-FR_ReneeV3Voice',
'de':'de-DE_BirgitV3Voice',
'de':'de-DE_DieterV3Voice',
'de':'de-DE_ErikaV3Voice',
'it':'it-IT_FrancescaV3Voice',
'ja':'ja-JP_EmiV3Voice',
'ko':'ko-KR_HyunjunVoice',
'ko':'ko-KR_SiWooVoice',
'ko':'ko-KR_YoungmiVoice',
'ko':'ko-KR_YunaVoice',
'ko':'ko-KR_JinV3Voice',
'pt':'pt-BR_IsabelaV3Voice',
'es':'es-ES_EnriqueV3Voice',
'es':'es-ES_LauraV3Voice',
'es':'es-LA_SofiaV3Voice',
'es':'es-US_SofiaV3Voice',
'sv':'sv-SE_IngridVoice'
}
return voices[VoiceIdx]
@contextmanager
def suppress_stdout():
with open(os.devnull, "w") as devnull:
old_stdout = sys.stdout
sys.stdout = devnull
try:
yield
finally:
sys.stdout = old_stdout
================================================
FILE: Assistant/webui.py
================================================
import json
import requests
import re
import langid
SERVER = 'localhost'
TEXT_GEN_PARAMS = {
'max_new_tokens': 200,
'do_sample': True,
'temperature': 0.72,
'top_p': 0.73,
'typical_p': 1,
'repetition_penalty': 1.1,
'encoder_repetition_penalty': 1.0,
'top_k': 0,
'min_length': 0,
'no_repeat_ngram_size': 0,
'num_beams': 1,
'penalty_alpha': 0,
'length_penalty': 1,
'early_stopping': False,
'seed': -1,
'add_bos_token': True,
'custom_stopping_strings': [],
'truncation_length': 2048,
'ban_eos_token': False,
}
def set_text_gen_params(**kwargs):
for key in kwargs:
if key not in list(TEXT_GEN_PARAMS.keys()): raise Exception('no such parameter in oogabooga text generation')
TEXT_GEN_PARAMS[key]=kwargs[key]
def oobabooga_textgen(prompt, params=TEXT_GEN_PARAMS, server=SERVER):
ChatMode = True if type(prompt) == list else False
if ChatMode:
nMessages = len(prompt)
prompt = parse_conversation(prompt)
payload = json.dumps([prompt, params])
APIresponse = requests.post(f"http://{server}:7860/run/textgen", json={
"data": [
payload
]
}).json()
reply = APIresponse["data"][0]
# hallucination filter:
if ChatMode:
reply = reply.replace("[assistant]:","###")
reply = reply.replace("[user]:","###")
reply = reply.replace("[system]:","###")
reply = reply.split('###')
reply = " ".join(reply[(nMessages+1):(nMessages+2)])
return reply
def post_process(answer):
allowed = ['Answer','Outcome','Discussion','Conclusion']
answer = answer.split('[Question]')[-1]
relevant =''
for a in allowed:
if a in answer:
temp = re.split(r'\[|\]', answer)
try:
relevant += temp[temp.index(a)+1].strip(':')
except:
print('Failure processing answer')
pass
print(len(relevant.split()))
return relevant
def parse_conversation(chat):
linkDetectionRegexStr = "[a-zA-Z0-9]((?i) dot |(?i) dotcom|(?i)dotcom|(?i)dotcom |\.|\. | \.| \. |\,)[a-zA-Z]*((?i) slash |(?i) slash|(?i)slash |(?i)slash|\/|\/ | \/| \/ ).+[a-zA-Z0-9]"
oobaboogaChatHistory = ""
for message in chat:
oobaboogaChatHistory += f"[{str(message['role'])}]:{message['content']}\n"
oobaboogaChatHistory = re.sub(linkDetectionRegexStr, "<url>", oobaboogaChatHistory)
return oobaboogaChatHistory
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2023 Gianmarco Guarnier
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# JARVIS-ChatGPT: A conversational assistant equipped with J.A.R.V.I.S's voice
**A voice-based interactive assistant equipped with a variety of synthetic voices (including J.A.R.V.I.S's voice from IronMan)**

<p align="center">
<img src="https://user-images.githubusercontent.com/49094051/227788148-a8ff8e06-86a4-41a6-aa53-8b7d6855360c.png"/>
<span style=color:grey> <i>image by MidJourney AI </i> </span>
</p>
Ever dreamed to ask hyper-intelligent system tips to improve your armor? Now you can! Well, maybe not the armor part... This project exploits OpenAI Whisper, OpenAI ChatGPT and IBM Watson.
<p align="center"> <strong> PROJECT MOTIVATION: </strong> </p>
*Many times ideas come in the worst moment and they fade away before you have the time to explore them better. The objective of this project is to develop a system capable of giving tips and opinions in quasi-real-time about anything you ask. The ultimate assistant will be able to be accessed from any authorized microphone inside your house or your phone, it should run constantly in the background and when summoned should be able to generate meaningful answers (with a badass voice) as well as interface with the pc or a server and save/read/write files that can be accessed later. It should be able to run research, gather material from the internet (extract content from HTML pages, transcribe Youtube videos, find scientific papers...) and provide summaries that can be used as context to make informed decisions. In addition, it might interface with some external gadgets (IoT) but that's extra.*
<br>
<br>
<br>
<p align="center"> <strong> DEMO: </strong> </p>
https://user-images.githubusercontent.com/49094051/231303323-9859e028-33e1-490d-9967-44852fd0efc5.mp4
<br>
---
## JULY 14th 2023 UPDATE: Research Mode
I can finnaly share the first draft of the Research Mode. This modality was thought for people often dealing with research papers.
- Switch to research mode by saying *'Switch to Research Mode'*
- :star: Initialize a new workspace like this: *'Initialize a new workspace about Carbon Fiber Applications in the Spacecraft industry'*. A workspace is a folder that collects and organize the results of the research. This protocol is subdivided into 3 sub-routines:
1. Core Paper identification: Use the **Semantic Scholar API** to identify some strongly relevant papers;
2. Core Expansion: for each paper, finds some suggestions, then keep only the suggestions that appear to be similar to at least 2 paper;
3. Refy Expansion: use the refy suggestion package to enlarge the results;
- Find suggestions like: *'find suggestions that are sililar to the paper with title ...'*
- Download: *'download the paper with title ...'*
- :star: Query your database like: *'what is the author of the paper with title ...?'* *'what are the experimental conditions set for the paper with title ...?'*
PS: This mode is not super stable and needs to be worked on<br>
*PPS: This project will be discontinued for some time since I'll be working on my thesis until 2024. However there are already so many things that can be improved so I'll be back!*
## What you'll need:
<p align="center"><i>DISCLAIMER:<br> The project might consume your OpenAI credit resulting in undesired billing;<br> I don't take responsibility for any unwanted charges;<br>Consider setting limitations on credit consumption at your OpenAI account; </i> </p>
- An [OpenAI](https://openai.com) account and API key; (check FAQs below for the alternatives)
- <i>[PicoVoice](https://picovoice.ai/platform/porcupine/) account and a free AccessKey; (optional) </i>
- <i>[ElevenLabs](https://beta.elevenlabs.io/) account and free Api Key (optional)</i>;
- [langChain API keys](https://github.com/hwchase17/langchain/blob/master/docs/modules/agents/tools/getting_started.md) for web surfing (news, weather, serpapi, google-serp, google-search... they are all free)
- [ffmpeg](https://ffmpeg.org/) ;
- Python virtual environment (Python>=3.9 and <3.10);
- <i> Some credit to spend on ChatGPT (you can get three months of free usage by signing up to OpenAI) (suggested)</i>;
- CUDA version >= 11.2;
- <i> An IBM Cloud account to exploit their cloud-based text-to-speech models ([tutorial](https://www.youtube.com/watch?v=A9_0OgW1LZU))(optional)</i>;
- A (reasonably) fast internet connection (most of the code relies on API so a slower connection might result in a longer time to respond);
- mic and speaker;
- CUDA capable graphic engine (my Torch Version: 2.0 and CUDA v11.7 ```pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117```);
- Patience :sweat_smile:
> you can rely on the new ```setup.bat``` that will do most of the things for you.
## GitHub overview
**MAIN** script you should run: `openai_api_chatbot.py` if you want to use the latest version of the OpenAI API Inside the demos folder you'll find some guidance for the packages used in the project, if you have errors you might check these files first to target the problem. Mostly is stored in the Assistant folder: `get_audio.py` stores all the functions to handle mic interactions, `tools.py` implements some basic aspects of the Virtual Assistant, `voice.py` describes a (very) rough Voice class. ```Agents.py``` handle the LangChain part of the system (here you can add or remove tools from the toolkits of the agents)<br> The remaining scripts are supplementary to the voice generation and should not be edited.
# INSTALLATION TUTORIAL
## Automatic installation
You can run ```setup.bat``` if you are running on Windows/Linux. The script will perform every step of the manual installation in sequence. Refer to those in case the procedure should fail.<br>
The automatic installation will also run the Vicuna installation ([Vicuna Installation Guide](https://hub.tcno.co/ai/text-ai/vicuna/))
## Manual Installation
## Step 1: installation, accounts, APIs...
### Environment
1. Make a new, empty virtual environment with Python 3.8 and activate it (.\venv_name\Scripts\activate );
2. ```pip install -r venv_requirements.txt```; This might take some time; if you encounter conflicts on specific packages, install them manually without the ```==<version>```;
3. install manually PyTorch according to your CUDA VERSION;
4. Copy and paste the files you'll find in the folder ```whisper_edits``` to the ```whisper``` folder of your environment (.\venv\lib\site-packages\whisper\ ) <span style="color:grey"> these edits will add just an attribute to the whisper model to access its dimension more easily; </span>
5. install [TTS](https://github.com/coqui-ai/tts);
6. Run [their script](https://github.com/coqui-ai/TTS/blob/dev/README.md#-python-api) and check everything is working (it should download some models) (you can alternatively run ```demos/tts_demo.py```);
7. Rename or delete the TTS folder and download the Assistant and other scripts from this repo
9. Install Vicuna following the instructions on the Vicuna folder or by running:<br><p align='center'>
```cd Vicuna```<br>
```call vicuna.ps1```<br></p>
<span style="color:grey"> Manual instructions will instruct you to follow the [Vicuna Installation Guide](https://hub.tcno.co/ai/text-ai/vicuna/) </span>
10. paste all your keys in the ```env.txt``` file and rename it to ```.env``` (yes, remove the txt extension)
11. Check everything works *(following)*
<br>
### Checks
- Verify your graphic engine and CUDA version are compatible with PyTorch by running `torch.cuda.is_available()` and `torch.cuda.get_device_name(0)` inside Pyhton; .
- run ```tests.py```. This file attempt to perform basic operations that might raise errors;
- [WARNING] Check the FAQs below if you have errors;
- You can check the sources of error by running demos in the demos folder;
## Step 2: Language support
- To have answers spoken in your language you should first check if your language is supported by the speech generator at __https://cloud.ibm.com/docs/text-to-speech?topic=text-to-speech-voices__;
- If it's supported, add or change the languages inside ```VirtualAssistant.__init__()``` ;<br>
<p align="center">
<img src="https://user-images.githubusercontent.com/49094051/230505516-4dba0f29-f45a-4311-aa54-1d93fca25de5.PNG"/>
</p>
- Remember: The loaded Whisper is the medium one. If it performs badly in your language, upgrade to the larger one in the ```__main__()``` at `whisper_model = whisper.load_model("large")`; but I hope your GPU memory is large likewise.
## Step 3: Running (`openai_api_chatbot.py`):
When running, you'll see much information being displayed. I'm constantly striving to improve the readability of the execution, the whole project is a huge beta, forgive slight variations from the screens below. Anyway, this is what happens in general terms when you hit 'run':
- Preliminary initializations take place, you should hear a chime when the Assistant is ready;
- When *awaiting for triggering words* is displayed you'll need to say `Jarvis` to summon the assistant. At this point, a conversation will begin and you can speak in whatever language you want (if you followed step 2). The conversation will terminate when you 1) say a [stop word](https://github.com/gianmarcoguarnier/JARVIS-ChatGPT/tree/main#key-words) 2) say something with one word (like 'ok') 3) when you stop making questions for more than 30 seconds <br>
<p align="center">
<img src="https://user-images.githubusercontent.com/49094051/230505896-c8a2ff80-4265-41e4-a6d5-e9f56d156afa.PNG" /><br>
<img src="https://user-images.githubusercontent.com/49094051/230506756-287a1d6b-9652-4c66-bea8-cd75380ab45b.PNG" /><br>
</p>
- After the magic word is said, the word *listening...* should then appear. At this point, you can make your question. When you are done just wait (3 seconds) for the answer to be submitted;
- The script will convert the recorded audio to text using Whisper;
- The text will be analyzed and a decision will be made. If the Assistant believes it needs to take some action to respond (like looking for a past conversation) the langchain agents will make a plan and use their tool to answer.
- Elsewise, the script will then expand the `chat_history` with your question, it will send a request with the API and it will update the history as soon as it receives a full answer from ChatGPT (this may take up to 5-10 seconds, consider explicitly asking for a short answer if you are in a hurry);
- The `say()` function will perform voice duplication to speak with Jarvis/Someone's voice; if the argument is not in English, IBM Watson will send the response from one of their nice text-to-speech models. If everything fails, the functions will rely on pyttsx3 which is a fast yet not as cool alternative;
<p align="center">
</p>
- When any of the stop keywords are said, the script will ask ChatGPT to give a title to the conversation and will save the chat in a .txt file with the format 'CurrentDate_Title.txt';
- The assistant will then go back to sleep;
<p align="center">
<img src='https://user-images.githubusercontent.com/49094051/227788180-b9da0957-a58b-4c1c-bc34-4a4c8a0e0957.PNG'/><br>
<i><span style="color:grey">I made some prompts and closed the conversation</span> </i>
</p>
# Keywords:
- to stop or save the chat, just say 'THANKS' at some point;
- To summon JARVIS voice just say 'JARVIS' at some point;
<span style="color:grey">*not ideal I know but works for now*</span>
# History:
- [x] [11 - 2022] Deliver chat-like prompts from Python from a keyboard
- [x] [12 - 2022] Deliver chat-like prompts from Python with voice
- [x] [2 - 2023] International language support for prompt and answers
- [x] [3 - 2023] Jarvis voice set up
- [x] [3 - 2023] Save conversation
- [x] [3 - 2023] Background execution & Voice Summoning
- [x] [3 - 2023] Improve output displayed info
- [x] [3 - 2023] Improve JARVIS's voice performances through prompt preprocessing
- [x] [4 - 2023] Introducing: *Project memory* store chats, events, timelines and other relevant information for a given project to be accessed later by the user or the assistant itself
- [x] [4 - 2023] Create a full stack ```VirtualAssistant``` class with memory and local storage access
- [x] [4 - 2023] Add sound feedback at different stages (chimes, beeps...)
- [x] [4 - 2023] International language support for voice commands (beta)
- [x] [4 - 2023] Making a step-by-step tutorial
- [x] [4 - 2023] Move some processing locally to reduce credit consumption: [Vicuna: A new, powerful model based on LLaMa, and trained with GPT-4](https://www.youtube.com/watch?v=ByV5w1ES38A&ab_channel=TroubleChute);
- [x] [4 - 2023] Integrate with Eleven Labs Voices for super expressive voices and outstanding voice cloning;
- [x] [4 - 2023] Extending voice commands and *Actions* (make a better active assistant)
- [x] [4 - 2023] Connect the system to the internet
- [x] [6 - 2023] Connect with paper database
currently working on:
- [ ] Extend doc processing tools
- [ ] Find a free alternative for LangChain Agents
following:
- [ ] fixing chat length bug (when the chat is too long it can't be processed by ChatGPT 3.5 Turbo)
- [ ] expanding *Memory*
- [ ] crash reports
- [ ] Refine capabilities
<br>
<br>
### waiting for ChatGPT4 to:
- [ ] add multimodal input (i.e. "Do you think 'this' [holding a paper plane] could fly" -> camera -> ChatGPT4 -> "you should improve the tip of the wings" )
- [ ] Extend *project memory* to images, pdfs, papers...
<span style="color:grey">*Check the [UpdateHistory.md](https://github.com/gianmarcoguarnier/JARVIS-ChatGPT/blob/main/UpdateHistory.md) of the project for more insights.*</span>
Have fun!
# ERRORS and FAQs
categories: Install, General, Runtime
### INSTALL: I have conflicting packages while installing *venv_requirements.txt*, what should I do? <br>
1. Make sure you have the right Python version (3.7) on the .venv (>python --version with the virtual environment activated).
2. Try to edit the _venv_requirements.txt_ and remove the version requirements of the incriminated dependencies.
3. Straight remove the package from the txt file and install them manually afterward.<br>
### INSTALL: I meet an error when running openai_api_chatbot.py saying: TypeError: LoadLibrary( ) argument 1 must be str, not None what's wrong? <br>
The problem is concerning Whisper. You should re-install it manually with ```pip install whisper-openai``` <br>
### INSTALL: I can't import 'openai.embeddings_utils'<br>
1. Try to ```pip install --upgrade openai```.
2. This happens because openai elevated their minimum requirements. I had this problem and solved by manually downloading [embeddings_utils.py](https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py) inside ./<your_venv>/Lib/site-packages/openai/
<br>
3. If the problem persists with ```datalib``` raise an issue and I'll provide you the missing file
4. upgrade to Python 3.8 (create new env and re-install TTS, requirements)
### INSTALL: I encounter the error ModuleNotFoundError: No module named '\<some module\>' <br>
Requirements are not updated every commit. While this might generate errors you can quickly install the missing modules, at the same time it keeps the environment clean from conflicts when I try new packages (and I try LOTS of them) <br>
### RUN TIME: I encounter some OOM memory when loading the Whisper model, what does it mean?<br>
It means the model you selected is too big for your CUDA device memory. Unfortunately, there is not much you can do about it except load a smaller model. If the smaller model does not satisfy you, you might want to speak 'clearer' or make longer prompts to let the model predict more accurately what you are saying. This sounds inconvenient but, in my case, greatly improved my English-speaking :) <br>
### RUN TIME: Max length tokens for ChatGPT-3.5-Turbo is 4096 but received... tokens.<br>
This is a bug still present, don't expect to have ever long conversations with your assistant as it will simply have enough memory to remember the whole conversation at some point. A fix is in development, it might consist of adopting a 'sliding windows' approach even if it might cause repetition of some concepts. <br>
### GENERAL: I finished my OPENAI credit/demo, what can I do? <br>
1. Go online only. The price is not that bad and you might end up paying a few dollars a month since pricing depends on usage (with heavy testing I ended up consuming the equivalent of ~4 dollars a month during my free trial). You can set limits on your monthly tokens consumption.
2. Use a Hybrid mode where the most credit-intensive tasks are executed locally for free and the rest is done online.
3. Install Vicuna and run OFFLINE mode only with limited performance.
### GENERAL: For how long will this project be updated?
Right now (April 2023) I'm working almost non-stop on this. I will likely take a break in the summer because I'll be working on my thesis.
If you have questions you can contact me by raising an Issue and I'll do my best to help as soon as possible.
<p align="right"><i>Gianmarco Guarnier<i></p>
================================================
FILE: TTS/.models.json
================================================
{
"tts_models": {
"multilingual":{
"multi-dataset":{
"your_tts":{
"description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--multilingual--multi-dataset--your_tts.zip",
"default_vocoder": null,
"commit": "e9a1953e",
"license": "CC BY-NC-ND 4.0",
"contact": "egolge@coqui.ai"
}
}
},
"bg": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--bg--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"cs": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--cs--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"da": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--da--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"et": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--et--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"ga": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ga--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"en": {
"ek1": {
"tacotron2": {
"description": "EK1 en-rp tacotron2 by NMStoker",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ek1--tacotron2.zip",
"default_vocoder": "vocoder_models/en/ek1/wavegrad",
"commit": "c802255",
"license": "apache 2.0"
}
},
"ljspeech": {
"tacotron2-DDC": {
"description": "Tacotron2 with Double Decoder Consistency.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "bae2ad0f",
"author": "Eren Gölge @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.com"
},
"tacotron2-DDC_ph": {
"description": "Tacotron2 with Double Decoder Consistency with phonemes.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC_ph.zip",
"default_vocoder": "vocoder_models/en/ljspeech/univnet",
"commit": "3900448",
"author": "Eren Gölge @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.com"
},
"glow-tts": {
"description": "",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--glow-tts.zip",
"stats_file": null,
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
},
"speedy-speech": {
"description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--speedy-speech.zip",
"stats_file": null,
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "4581e3d",
"author": "Eren Gölge @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.com"
},
"tacotron2-DCA": {
"description": "",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
},
"vits": {
"description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--vits.zip",
"default_vocoder": null,
"commit": "3900448",
"author": "Eren Gölge @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.com"
},
"vits--neon": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--en--ljspeech--vits.zip",
"default_vocoder": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause",
"contact": null,
"commit": null
},
"fast_pitch": {
"description": "FastPitch model trained on LJSpeech using the Aligner Network",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip",
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "b27b3ba",
"author": "Eren Gölge @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.com"
},
"overflow": {
"description": "Overflow model trained on LJSpeech",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.0_models/tts_models--en--ljspeech--overflow.zip",
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "3b1a28f",
"author": "Eren Gölge @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.ai"
},
"neural_hmm": {
"description": "Neural HMM model trained on LJSpeech",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.11.0_models/tts_models--en--ljspeech--neural_hmm.zip",
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "3b1a28f",
"author": "Shivam Metha @shivammehta25",
"license": "apache 2.0",
"contact": "d83ee8fe45e3c0d776d4a865aca21d7c2ac324c4"
}
},
"vctk": {
"vits": {
"description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--vits.zip",
"default_vocoder": null,
"commit": "3900448",
"author": "Eren @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.ai"
},
"fast_pitch":{
"description": "FastPitch model trained on VCTK dataseset.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip",
"default_vocoder": null,
"commit": "bdab788d",
"author": "Eren @erogol",
"license": "CC BY-NC-ND 4.0",
"contact": "egolge@coqui.ai"
}
},
"sam": {
"tacotron-DDC": {
"description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--sam--tacotron-DDC.zip",
"default_vocoder": "vocoder_models/en/sam/hifigan_v2",
"commit": "bae2ad0f",
"author": "Eren Gölge @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.com"
}
},
"blizzard2013": {
"capacitron-t2-c50": {
"description": "Capacitron additions to Tacotron 2 with Capacity at 50 as in https://arxiv.org/pdf/1906.03402.pdf",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--en--blizzard2013--capacitron-t2-c50.zip",
"commit": "d6284e7",
"default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2",
"author": "Adam Froghyar @a-froghyar",
"license": "apache 2.0",
"contact": "adamfroghyar@gmail.com"
},
"capacitron-t2-c150_v2": {
"description": "Capacitron additions to Tacotron 2 with Capacity at 150 as in https://arxiv.org/pdf/1906.03402.pdf",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.1_models/tts_models--en--blizzard2013--capacitron-t2-c150_v2.zip",
"commit": "a67039d",
"default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2",
"author": "Adam Froghyar @a-froghyar",
"license": "apache 2.0",
"contact": "adamfroghyar@gmail.com"
}
}
},
"es": {
"mai": {
"tacotron2-DDC": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--es--mai--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
}
},
"css10":{
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--es--css10--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"fr": {
"mai": {
"tacotron2-DDC": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"commit": null,
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
}
},
"css10":{
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fr--css10--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"uk":{
"mai": {
"glow-tts": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--uk--mai--glow-tts.zip",
"author":"@robinhad",
"commit": "bdab788d",
"license": "MIT",
"contact": "",
"default_vocoder": "vocoder_models/uk/mai/multiband-melgan"
},
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--uk--mai--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"zh-CN": {
"baker": {
"tacotron2-DDC-GST": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip",
"commit": "unknown",
"author": "@kirianguiller",
"license": "apache 2.0",
"default_vocoder": null
}
}
},
"nl": {
"mai": {
"tacotron2-DDC": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--nl--mai--tacotron2-DDC.zip",
"author": "@r-dh",
"license": "apache 2.0",
"default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
"stats_file": null,
"commit": "540d811"
}
},
"css10":{
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--nl--css10--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"de": {
"thorsten": {
"tacotron2-DCA": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--de--thorsten--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/de/thorsten/fullband-melgan",
"author": "@thorstenMueller",
"license": "apache 2.0",
"commit": "unknown"
},
"vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--de--thorsten--vits.zip",
"default_vocoder": null,
"author": "@thorstenMueller",
"license": "apache 2.0",
"commit": "unknown"
},
"tacotron2-DDC": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--thorsten--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/de/thorsten/hifigan_v1",
"description": "Thorsten-Dec2021-22k-DDC",
"author": "@thorstenMueller",
"license": "apache 2.0",
"commit": "unknown"
}
},
"css10": {
"vits-neon":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--css10--vits.zip",
"default_vocoder": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause",
"commit": null
}
}
},
"ja": {
"kokoro": {
"tacotron2-DDC": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--ja--kokoro--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1",
"description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.",
"author": "@kaiidams",
"license": "apache 2.0",
"commit": "401fbd89"
}
}
},
"tr":{
"common-voice": {
"glow-tts":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip",
"default_vocoder": "vocoder_models/tr/common-voice/hifigan",
"license": "MIT",
"description": "Turkish GlowTTS model using an unknown speaker from the Common-Voice dataset.",
"author": "Fatih Akademi",
"commit": null
}
}
},
"it": {
"mai_female": {
"glow-tts":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip",
"default_vocoder": null,
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
"author": "@nicolalandro",
"license": "apache 2.0",
"commit": null
},
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--vits.zip",
"default_vocoder": null,
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
"author": "@nicolalandro",
"license": "apache 2.0",
"commit": null
}
},
"mai_male": {
"glow-tts":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip",
"default_vocoder": null,
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
"author": "@nicolalandro",
"license": "apache 2.0",
"commit": null
},
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--vits.zip",
"default_vocoder": null,
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
"author": "@nicolalandro",
"license": "apache 2.0",
"commit": null
}
}
},
"ewe": {
"openbible": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--ewe--openbible--vits.zip",
"default_vocoder": null,
"license": "CC-BY-SA 4.0",
"description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
"author": "@coqui_ai",
"commit": "1b22f03"
}
}
},
"hau": {
"openbible": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--hau--openbible--vits.zip",
"default_vocoder": null,
"license": "CC-BY-SA 4.0",
"description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
"author": "@coqui_ai",
"commit": "1b22f03"
}
}
},
"lin": {
"openbible": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--lin--openbible--vits.zip",
"default_vocoder": null,
"license": "CC-BY-SA 4.0",
"description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
"author": "@coqui_ai",
"commit": "1b22f03"
}
}
},
"tw_akuapem": {
"openbible": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip",
"default_vocoder": null,
"license": "CC-BY-SA 4.0",
"description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
"author": "@coqui_ai",
"commit": "1b22f03"
}
}
},
"tw_asante": {
"openbible": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip",
"default_vocoder": null,
"license": "CC-BY-SA 4.0",
"description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
"author": "@coqui_ai",
"commit": "1b22f03"
}
}
},
"yor": {
"openbible": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--yor--openbible--vits.zip",
"default_vocoder": null,
"license": "CC-BY-SA 4.0",
"description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
"author": "@coqui_ai",
"commit": "1b22f03"
}
}
},
"hu": {
"css10": {
"vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hu--css10--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"el": {
"cv": {
"vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--el--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"fi": {
"css10": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fi--css10--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"hr": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hr--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"lt": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lt--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"lv": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lv--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"mt": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--mt--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"pl": {
"mai_female": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pl--mai_female--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"pt": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pt--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"ro": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ro--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"sk": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sk--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"sl": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sl--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"sv": {
"cv": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sv--cv--vits.zip",
"default_vocoder": null,
"commit": null,
"author": "@NeonGeckoCom",
"license": "bsd-3-clause"
}
}
},
"ca": {
"custom": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--ca--custom--vits.zip",
"default_vocoder": null,
"commit": null,
"description": " It is trained from zero with 101460 utterances consisting of 257 speakers, approx 138 hours of speech. We used three datasets;\nFestcat and Google Catalan TTS (both TTS datasets) and also a part of Common Voice 8. It is trained with TTS v0.8.0.\nhttps://github.com/coqui-ai/TTS/discussions/930#discussioncomment-4466345",
"author": "@gullabi",
"license": "CC-BY-4.0"
}
}
},
"fa":{
"custom":{
"glow-tts": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--fa--custom--glow-tts.zip",
"default_vocoder": null,
"commit": null,
"description": "persian-tts-female-glow_tts model for text to speech purposes. Single-speaker female voice Trained on persian-tts-dataset-famale. \nThis model has no compatible vocoder thus the output quality is not very good. \nDataset: https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale.",
"author": "@karim23657",
"license": "CC-BY-4.0"
}
}
}
},
"vocoder_models": {
"universal": {
"libri-tts": {
"wavegrad": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--wavegrad.zip",
"commit": "ea976b0",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
},
"fullband-melgan": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--fullband-melgan.zip",
"commit": "4132240",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
}
}
},
"en": {
"ek1": {
"wavegrad": {
"description": "EK1 en-rp wavegrad by NMStoker",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ek1--wavegrad.zip",
"commit": "c802255",
"license": "apache 2.0"
}
},
"ljspeech": {
"multiband-melgan": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip",
"commit": "ea976b0",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
},
"hifigan_v2": {
"description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--hifigan_v2.zip",
"commit": "bae2ad0f",
"author": "@erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.ai"
},
"univnet": {
"description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--univnet_v2.zip",
"commit": "4581e3d",
"author": "Eren @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.ai"
}
},
"blizzard2013": {
"hifigan_v2": {
"description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/vocoder_models--en--blizzard2013--hifigan_v2.zip",
"commit": "d6284e7",
"author": "Adam Froghyar @a-froghyar",
"license": "apache 2.0",
"contact": "adamfroghyar@gmail.com"
}
},
"vctk": {
"hifigan_v2": {
"description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--vctk--hifigan_v2.zip",
"commit": "2f07160",
"author": "Edresson Casanova",
"license": "apache 2.0",
"contact": ""
}
},
"sam": {
"hifigan_v2": {
"description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--sam--hifigan_v2.zip",
"commit": "2f07160",
"author": "Eren Gölge @erogol",
"license": "apache 2.0",
"contact": "egolge@coqui.ai"
}
}
},
"nl": {
"mai": {
"parallel-wavegan": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--nl--mai--parallel-wavegan.zip",
"author": "@r-dh",
"license": "apache 2.0",
"commit": "unknown"
}
}
},
"de": {
"thorsten": {
"wavegrad": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--wavegrad.zip",
"author": "@thorstenMueller",
"license": "apache 2.0",
"commit": "unknown"
},
"fullband-melgan": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--fullband-melgan.zip",
"author": "@thorstenMueller",
"license": "apache 2.0",
"commit": "unknown"
},
"hifigan_v1": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/vocoder_models--de--thorsten--hifigan_v1.zip",
"description": "HifiGAN vocoder model for Thorsten Neutral Dec2021 22k Samplerate Tacotron2 DDC model",
"author": "@thorstenMueller",
"license": "apache 2.0",
"commit": "unknown"
}
}
},
"ja": {
"kokoro": {
"hifigan_v1": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--ja--kokoro--hifigan_v1.zip",
"description": "HifiGAN model trained for kokoro dataset by @kaiidams",
"author": "@kaiidams",
"license": "apache 2.0",
"commit": "3900448"
}
}
},
"uk": {
"mai": {
"multiband-melgan": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip",
"author":"@robinhad",
"commit": "bdab788d",
"license": "MIT",
"contact": ""
}
}
},
"tr":{
"common-voice": {
"hifigan":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip",
"description": "HifiGAN model using an unknown speaker from the Common-Voice dataset.",
"author": "Fatih Akademi",
"license": "MIT",
"commit": null
}
}
}
},
"voice_conversion_models":{
"multilingual":{
"vctk":{
"freevc24":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip",
"description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC",
"author": "Jing-Yi Li @OlaWod",
"license": "MIT",
"commit": null
}
}
}
}
}
================================================
FILE: TTS/VERSION
================================================
0.12.0
================================================
FILE: TTS/__init__.py
================================================
import os
with open(os.path.join(os.path.dirname(__file__), "VERSION"), "r", encoding="utf-8") as f:
version = f.read().strip()
__version__ = version
================================================
FILE: TTS/api.py
================================================
import tempfile
from pathlib import Path
from TTS.utils.audio.numpy_transforms import save_wav
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
class TTS:
"""TODO: Add voice conversion and Capacitron support."""
def __init__(
self,
model_name: str = None,
model_path: str = None,
config_path: str = None,
vocoder_path: str = None,
vocoder_config_path: str = None,
progress_bar: bool = True,
gpu=False,
):
"""🐸TTS python interface that allows to load and use the released models.
Example with a multi-speaker model:
>>> from TTS.api import TTS
>>> tts = TTS(TTS.list_models()[0])
>>> wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
>>> tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")
Example with a single-speaker model:
>>> tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)
>>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav")
Example loading a model from a path:
>>> tts = TTS(model_path="/path/to/checkpoint_100000.pth", config_path="/path/to/config.json", progress_bar=False, gpu=False)
>>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav")
Example voice cloning with YourTTS in English, French and Portuguese:
>>> tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
>>> tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="thisisit.wav")
>>> tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr", file_path="thisisit.wav")
>>> tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt", file_path="thisisit.wav")
Args:
model_name (str, optional): Model name to load. You can list models by ```tts.models```. Defaults to None.
model_path (str, optional): Path to the model checkpoint. Defaults to None.
config_path (str, optional): Path to the model config. Defaults to None.
vocoder_path (str, optional): Path to the vocoder checkpoint. Defaults to None.
vocoder_config_path (str, optional): Path to the vocoder config. Defaults to None.
progress_bar (bool, optional): Whether to pring a progress bar while downloading a model. Defaults to True.
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
"""
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
self.synthesizer = None
self.voice_converter = None
if model_name:
self.load_tts_model_by_name(model_name, gpu)
if model_path:
self.load_tts_model_by_path(
model_path, config_path, vocoder_path=vocoder_path, vocoder_config=vocoder_config_path, gpu=gpu
)
@property
def models(self):
return self.manager.list_tts_models()
@property
def is_multi_speaker(self):
if hasattr(self.synthesizer.tts_model, "speaker_manager") and self.synthesizer.tts_model.speaker_manager:
return self.synthesizer.tts_model.speaker_manager.num_speakers > 1
return False
@property
def is_multi_lingual(self):
if hasattr(self.synthesizer.tts_model, "language_manager") and self.synthesizer.tts_model.language_manager:
return self.synthesizer.tts_model.language_manager.num_languages > 1
return False
@property
def speakers(self):
if not self.is_multi_speaker:
return None
return self.synthesizer.tts_model.speaker_manager.speaker_names
@property
def languages(self):
if not self.is_multi_lingual:
return None
return self.synthesizer.tts_model.language_manager.language_names
@staticmethod
def get_models_file_path():
return Path(__file__).parent / ".models.json"
@staticmethod
def list_models():
manager = ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False)
return manager.list_tts_models()
def download_model_by_name(self, model_name: str):
model_path, config_path, model_item = self.manager.download_model(model_name)
if model_item.get("default_vocoder") is None:
return model_path, config_path, None, None
vocoder_path, vocoder_config_path, _ = self.manager.download_model(model_item["default_vocoder"])
return model_path, config_path, vocoder_path, vocoder_config_path
def load_vc_model_by_name(self, model_name: str, gpu: bool = False):
"""Load one of the voice conversion models by name.
Args:
model_name (str): Model name to load. You can list models by ```tts.models```.
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
"""
model_path, config_path, _, _ = self.download_model_by_name(model_name)
self.voice_converter = Synthesizer(vc_checkpoint=model_path, vc_config=config_path, use_cuda=gpu)
def load_tts_model_by_name(self, model_name: str, gpu: bool = False):
"""Load one of 🐸TTS models by name.
Args:
model_name (str): Model name to load. You can list models by ```tts.models```.
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
TODO: Add tests
"""
model_path, config_path, vocoder_path, vocoder_config_path = self.download_model_by_name(model_name)
# init synthesizer
# None values are fetch from the model
self.synthesizer = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
tts_speakers_file=None,
tts_languages_file=None,
vocoder_checkpoint=vocoder_path,
vocoder_config=vocoder_config_path,
encoder_checkpoint=None,
encoder_config=None,
use_cuda=gpu,
)
def load_tts_model_by_path(
self, model_path: str, config_path: str, vocoder_path: str = None, vocoder_config: str = None, gpu: bool = False
):
"""Load a model from a path.
Args:
model_path (str): Path to the model checkpoint.
config_path (str): Path to the model config.
vocoder_path (str, optional): Path to the vocoder checkpoint. Defaults to None.
vocoder_config (str, optional): Path to the vocoder config. Defaults to None.
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
"""
self.synthesizer = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
tts_speakers_file=None,
tts_languages_file=None,
vocoder_checkpoint=vocoder_path,
vocoder_config=vocoder_config,
encoder_checkpoint=None,
encoder_config=None,
use_cuda=gpu,
)
def _check_arguments(self, speaker: str = None, language: str = None, speaker_wav: str = None):
if self.is_multi_speaker and (speaker is None and speaker_wav is None):
raise ValueError("Model is multi-speaker but no speaker is provided.")
if self.is_multi_lingual and language is None:
raise ValueError("Model is multi-lingual but no language is provided.")
if not self.is_multi_speaker and speaker is not None:
raise ValueError("Model is not multi-speaker but speaker is provided.")
if not self.is_multi_lingual and language is not None:
raise ValueError("Model is not multi-lingual but language is provided.")
def tts(self, text: str, speaker: str = None, language: str = None, speaker_wav: str = None):
"""Convert text to speech.
Args:
text (str):
Input text to synthesize.
speaker (str, optional):
Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
`tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
language (str, optional):
Language code for multi-lingual models. You can check whether loaded model is multi-lingual
`tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
speaker_wav (str, optional):
Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
Defaults to None.
"""
self._check_arguments(speaker=speaker, language=language, speaker_wav=speaker_wav)
wav = self.synthesizer.tts(
text=text,
speaker_name=speaker,
language_name=language,
speaker_wav=speaker_wav,
reference_wav=None,
style_wav=None,
style_text=None,
reference_speaker_name=None,
)
return wav
def tts_to_file(
self,
text: str,
speaker: str = None,
language: str = None,
speaker_wav: str = None,
file_path: str = "output.wav",
):
"""Convert text to speech.
Args:
text (str):
Input text to synthesize.
speaker (str, optional):
Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
`tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
language (str, optional):
Language code for multi-lingual models. You can check whether loaded model is multi-lingual
`tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
speaker_wav (str, optional):
Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
Defaults to None.
file_path (str, optional):
Output file path. Defaults to "output.wav".
"""
wav = self.tts(text=text, speaker=speaker, language=language, speaker_wav=speaker_wav)
self.synthesizer.save_wav(wav=wav, path=file_path)
def voice_conversion(
self,
sourve_wav: str,
target_wav: str,
):
"""Voice conversion with FreeVC. Convert source wav to target speaker.
Args:
source_wav (str):
Path to the source wav file.
target_wav (str):
Path to the target wav file.
"""
wav = self.synthesizer.voice_conversion(source_wav=sourve_wav, target_wav=target_wav)
return wav
def tts_with_vc(self, text: str, language: str = None, speaker_wav: str = None):
"""Convert text to speech with voice conversion.
It combines tts with voice conversion to fake voice cloning.
- Convert text to speech with tts.
- Convert the output wav to target speaker with voice conversion.
Args:
text (str):
Input text to synthesize.
language (str, optional):
Language code for multi-lingual models. You can check whether loaded model is multi-lingual
`tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
speaker_wav (str, optional):
Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
Defaults to None.
"""
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
# Lazy code... save it to a temp file to resample it while reading it for VC
self.tts_to_file(text=text, speaker=None, language=language, file_path=fp.name)
if self.voice_converter is None:
self.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24")
wav = self.voice_converter.voice_conversion(source_wav=fp.name, target_wav=speaker_wav)
return wav
def tts_with_vc_to_file(
self, text: str, language: str = None, speaker_wav: str = None, file_path: str = "output.wav"
):
"""Convert text to speech with voice conversion and save to file.
Check `tts_with_vc` for more details.
Args:
text (str):
Input text to synthesize.
language (str, optional):
Language code for multi-lingual models. You can check whether loaded model is multi-lingual
`tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
speaker_wav (str, optional):
Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
Defaults to None.
file_path (str, optional):
Output file path. Defaults to "output.wav".
"""
wav = self.tts_with_vc(text=text, language=language, speaker_wav=speaker_wav)
save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate)
================================================
FILE: TTS/bin/__init__.py
================================================
================================================
FILE: TTS/bin/collect_env_info.py
================================================
"""Get detailed info about the working environment."""
import os
import platform
import sys
import numpy
import torch
sys.path += [os.path.abspath(".."), os.path.abspath(".")]
import json
import TTS
def system_info():
return {
"OS": platform.system(),
"architecture": platform.architecture(),
"version": platform.version(),
"processor": platform.processor(),
"python": platform.python_version(),
}
def cuda_info():
return {
"GPU": [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())],
"available": torch.cuda.is_available(),
"version": torch.version.cuda,
}
def package_info():
return {
"numpy": numpy.__version__,
"PyTorch_version": torch.__version__,
"PyTorch_debug": torch.version.debug,
"TTS": TTS.__version__,
}
def main():
details = {"System": system_info(), "CUDA": cuda_info(), "Packages": package_info()}
print(json.dumps(details, indent=4, sort_keys=True))
if __name__ == "__main__":
main()
================================================
FILE: TTS/bin/compute_attention_masks.py
================================================
import argparse
import importlib
import os
from argparse import RawTextHelpFormatter
import numpy as np
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from TTS.config import load_config
from TTS.tts.datasets.TTSDataset import TTSDataset
from TTS.tts.models import setup_model
from TTS.tts.utils.text.characters import make_symbols, phonemes, symbols
from TTS.utils.audio import AudioProcessor
from TTS.utils.io import load_checkpoint
if __name__ == "__main__":
# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
description="""Extract attention masks from trained Tacotron/Tacotron2 models.
These masks can be used for different purposes including training a TTS model with a Duration Predictor.\n\n"""
"""Each attention mask is written to the same path as the input wav file with ".npy" file extension.
(e.g. path/bla.wav (wav file) --> path/bla.npy (attention mask))\n"""
"""
Example run:
CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py
--model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth
--config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json
--dataset_metafile metadata.csv
--data_path /root/LJSpeech-1.1/
--batch_size 32
--dataset ljspeech
--use_cuda True
""",
formatter_class=RawTextHelpFormatter,
)
parser.add_argument("--model_path", type=str, required=True, help="Path to Tacotron/Tacotron2 model file ")
parser.add_argument(
"--config_path",
type=str,
required=True,
help="Path to Tacotron/Tacotron2 config file.",
)
parser.add_argument(
"--dataset",
type=str,
default="",
required=True,
help="Target dataset processor name from TTS.tts.dataset.preprocess.",
)
parser.add_argument(
"--dataset_metafile",
type=str,
default="",
required=True,
help="Dataset metafile inclusing file paths with transcripts.",
)
parser.add_argument("--data_path", type=str, default="", help="Defines the data path. It overwrites config.json.")
parser.add_argument("--use_cuda", type=bool, default=False, help="enable/disable cuda.")
parser.add_argument(
"--batch_size", default=16, type=int, help="Batch size for the model. Use batch_size=1 if you have no CUDA."
)
args = parser.parse_args()
C = load_config(args.config_path)
ap = AudioProcessor(**C.audio)
# if the vocabulary was passed, replace the default
if "characters" in C.keys():
symbols, phonemes = make_symbols(**C.characters)
# load the model
num_chars = len(phonemes) if C.use_phonemes else len(symbols)
# TODO: handle multi-speaker
model = setup_model(C)
model, _ = load_checkpoint(model, args.model_path, args.use_cuda, True)
# data loader
preprocessor = importlib.import_module("TTS.tts.datasets.formatters")
preprocessor = getattr(preprocessor, args.dataset)
meta_data = preprocessor(args.data_path, args.dataset_metafile)
dataset = TTSDataset(
model.decoder.r,
C.text_cleaner,
compute_linear_spec=False,
ap=ap,
meta_data=meta_data,
characters=C.characters if "characters" in C.keys() else None,
add_blank=C["add_blank"] if "add_blank" in C.keys() else False,
use_phonemes=C.use_phonemes,
phoneme_cache_path=C.phoneme_cache_path,
phoneme_language=C.phoneme_language,
enable_eos_bos=C.enable_eos_bos_chars,
)
dataset.sort_and_filter_items(C.get("sort_by_audio_len", default=False))
loader = DataLoader(
dataset,
batch_size=args.batch_size,
num_workers=4,
collate_fn=dataset.collate_fn,
shuffle=False,
drop_last=False,
)
# compute attentions
file_paths = []
with torch.no_grad():
for data in tqdm(loader):
# setup input data
text_input = data[0]
text_lengths = data[1]
linear_input = data[3]
mel_input = data[4]
mel_lengths = data[5]
stop_targets = data[6]
item_idxs = data[7]
# dispatch data to GPU
if args.use_cuda:
text_input = text_input.cuda()
text_lengths = text_lengths.cuda()
mel_input = mel_input.cuda()
mel_lengths = mel_lengths.cuda()
model_outputs = model.forward(text_input, text_lengths, mel_input)
alignments = model_outputs["alignments"].detach()
for idx, alignment in enumerate(alignments):
item_idx = item_idxs[idx]
# interpolate if r > 1
alignment = (
torch.nn.functional.interpolate(
alignment.transpose(0, 1).unsqueeze(0),
size=None,
scale_factor=model.decoder.r,
mode="nearest",
align_corners=None,
recompute_scale_factor=None,
)
.squeeze(0)
.transpose(0, 1)
)
# remove paddings
alignment = alignment[: mel_lengths[idx], : text_lengths[idx]].cpu().numpy()
# set file paths
wav_file_name = os.path.basename(item_idx)
align_file_name = os.path.splitext(wav_file_name)[0] + "_attn.npy"
file_path = item_idx.replace(wav_file_name, align_file_name)
# save output
wav_file_abs_path = os.path.abspath(item_idx)
file_abs_path = os.path.abspath(file_path)
file_paths.append([wav_file_abs_path, file_abs_path])
np.save(file_path, alignment)
# ourput metafile
metafile = os.path.join(args.data_path, "metadata_attn_mask.txt")
with open(metafile, "w", encoding="utf-8") as f:
for p in file_paths:
f.write(f"{p[0]}|{p[1]}\n")
print(f" >> Metafile created: {metafile}")
================================================
FILE: TTS/bin/compute_embeddings.py
================================================
import argparse
import os
from argparse import RawTextHelpFormatter
import torch
from tqdm import tqdm
from TTS.config import load_config
from TTS.config.shared_configs import BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.managers import save_file
from TTS.tts.utils.speakers import SpeakerManager
def compute_embeddings(
model_path,
config_path,
output_path,
old_spakers_file=None,
config_dataset_path=None,
formatter_name=None,
dataset_name=None,
dataset_path=None,
meta_file_train=None,
meta_file_val=None,
disable_cuda=False,
no_eval=False,
):
use_cuda = torch.cuda.is_available() and not disable_cuda
if config_dataset_path is not None:
c_dataset = load_config(config_dataset_path)
meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=not no_eval)
else:
c_dataset = BaseDatasetConfig()
c_dataset.formatter = formatter_name
c_dataset.dataset_name = dataset_name
c_dataset.path = dataset_path
if meta_file_train is not None:
c_dataset.meta_file_train = meta_file_train
if meta_file_val is not None:
c_dataset.meta_file_val = meta_file_val
meta_data_train, meta_data_eval = load_tts_samples(c_dataset, eval_split=not no_eval)
if meta_data_eval is None:
samples = meta_data_train
else:
samples = meta_data_train + meta_data_eval
encoder_manager = SpeakerManager(
encoder_model_path=model_path,
encoder_config_path=config_path,
d_vectors_file_path=old_spakers_file,
use_cuda=use_cuda,
)
class_name_key = encoder_manager.encoder_config.class_name_key
# compute speaker embeddings
speaker_mapping = {}
for fields in tqdm(samples):
class_name = fields[class_name_key]
audio_file = fields["audio_file"]
embedding_key = fields["audio_unique_name"]
if old_spakers_file is not None and embedding_key in encoder_manager.clip_ids:
# get the embedding from the old file
embedd = encoder_manager.get_embedding_by_clip(embedding_key)
else:
# extract the embedding
embedd = encoder_manager.compute_embedding_from_clip(audio_file)
# create speaker_mapping if target dataset is defined
speaker_mapping[embedding_key] = {}
speaker_mapping[embedding_key]["name"] = class_name
speaker_mapping[embedding_key]["embedding"] = embedd
if speaker_mapping:
# save speaker_mapping if target dataset is defined
if os.path.isdir(output_path):
mapping_file_path = os.path.join(output_path, "speakers.pth")
else:
mapping_file_path = output_path
if os.path.dirname(mapping_file_path) != "":
os.makedirs(os.path.dirname(mapping_file_path), exist_ok=True)
save_file(speaker_mapping, mapping_file_path)
print("Speaker embeddings saved at:", mapping_file_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
"""
Example runs:
python TTS/bin/compute_embeddings.py --model_path speaker_encoder_model.pth --config_path speaker_encoder_config.json --config_dataset_path dataset_config.json
python TTS/bin/compute_embeddings.py --model_path speaker_encoder_model.pth --config_path speaker_encoder_config.json --formatter_name coqui --dataset_path /path/to/vctk/dataset --dataset_name my_vctk --meta_file_train /path/to/vctk/metafile_train.csv --meta_file_val /path/to/vctk/metafile_eval.csv
""",
formatter_class=RawTextHelpFormatter,
)
parser.add_argument(
"--model_path",
type=str,
help="Path to model checkpoint file. It defaults to the released speaker encoder.",
default="https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
)
parser.add_argument(
"--config_path",
type=str,
help="Path to model config file. It defaults to the released speaker encoder config.",
default="https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
)
parser.add_argument(
"--config_dataset_path",
type=str,
help="Path to dataset config file. You either need to provide this or `formatter_name`, `dataset_name` and `dataset_path` arguments.",
default=None,
)
parser.add_argument("--output_path", type=str, help="Path for output `pth` or `json` file.", default="speakers.pth")
parser.add_argument(
"--old_file", type=str, help="Previous embedding file to only compute new audios.", default=None
)
parser.add_argument("--disable_cuda", type=bool, help="Flag to disable cuda.", default=False)
parser.add_argument("--no_eval", type=bool, help="Do not compute eval?. Default False", default=False)
parser.add_argument(
"--formatter_name",
type=str,
help="Name of the formatter to use. You either need to provide this or `config_dataset_path`",
default=None,
)
parser.add_argument(
"--dataset_name",
type=str,
help="Name of the dataset to use. You either need to provide this or `config_dataset_path`",
default=None,
)
parser.add_argument(
"--dataset_path",
type=str,
help="Path to the dataset. You either need to provide this or `config_dataset_path`",
default=None,
)
parser.add_argument(
"--meta_file_train",
type=str,
help="Path to the train meta file. If not set, dataset formatter uses the default metafile if it is defined in the formatter. You either need to provide this or `config_dataset_path`",
default=None,
)
parser.add_argument(
"--meta_file_val",
type=str,
help="Path to the evaluation meta file. If not set, dataset formatter uses the default metafile if it is defined in the formatter. You either need to provide this or `config_dataset_path`",
default=None,
)
args = parser.parse_args()
compute_embeddings(
args.model_path,
args.config_path,
args.output_path,
old_spakers_file=args.old_file,
config_dataset_path=args.config_dataset_path,
formatter_name=args.formatter_name,
dataset_name=args.dataset_name,
dataset_path=args.dataset_path,
meta_file_train=args.meta_file_train,
meta_file_val=args.meta_file_val,
disable_cuda=args.disable_cuda,
no_eval=args.no_eval,
)
================================================
FILE: TTS/bin/compute_statistics.py
================================================
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import glob
import os
import numpy as np
from tqdm import tqdm
# from TTS.utils.io import load_config
from TTS.config import load_config
from TTS.tts.datasets import load_tts_samples
from TTS.utils.audio import AudioProcessor
def main():
"""Run preprocessing process."""
parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
parser.add_argument("out_path", type=str, help="save path (directory and filename).")
parser.add_argument(
"--data_path",
type=str,
required=False,
help="folder including the target set of wavs overriding dataset config.",
)
args, overrides = parser.parse_known_args()
CONFIG = load_config(args.config_path)
CONFIG.parse_known_args(overrides, relaxed_parser=True)
# load config
CONFIG.audio.signal_norm = False # do not apply earlier normalization
CONFIG.audio.stats_path = None # discard pre-defined stats
# load audio processor
ap = AudioProcessor(**CONFIG.audio.to_dict())
# load the meta data of target dataset
if args.data_path:
dataset_items = glob.glob(os.path.join(args.data_path, "**", "*.wav"), recursive=True)
else:
dataset_items = load_tts_samples(CONFIG.datasets)[0] # take only train data
print(f" > There are {len(dataset_items)} files.")
mel_sum = 0
mel_square_sum = 0
linear_sum = 0
linear_square_sum = 0
N = 0
for item in tqdm(dataset_items):
# compute features
wav = ap.load_wav(item if isinstance(item, str) else item["audio_file"])
linear = ap.spectrogram(wav)
mel = ap.melspectrogram(wav)
# compute stats
N += mel.shape[1]
mel_sum += mel.sum(1)
linear_sum += linear.sum(1)
mel_square_sum += (mel**2).sum(axis=1)
linear_square_sum += (linear**2).sum(axis=1)
mel_mean = mel_su
gitextract_p75g_f1y/
├── .gitignore
├── Assistant/
│ ├── Agents.py
│ ├── Chat.py
│ ├── VirtualAssistant.py
│ ├── __init__.py
│ ├── get_audio.py
│ ├── research_mode.py
│ ├── semantic_scholar/
│ │ ├── S2_tools.py
│ │ ├── __init__.py
│ │ ├── agent_tools.py
│ │ └── simple.py
│ ├── tools.py
│ ├── voice.py
│ └── webui.py
├── LICENSE
├── README.md
├── TTS/
│ ├── .models.json
│ ├── VERSION
│ ├── __init__.py
│ ├── api.py
│ ├── bin/
│ │ ├── __init__.py
│ │ ├── collect_env_info.py
│ │ ├── compute_attention_masks.py
│ │ ├── compute_embeddings.py
│ │ ├── compute_statistics.py
│ │ ├── eval_encoder.py
│ │ ├── extract_tts_spectrograms.py
│ │ ├── find_unique_chars.py
│ │ ├── find_unique_phonemes.py
│ │ ├── remove_silence_using_vad.py
│ │ ├── resample.py
│ │ ├── synthesize.py
│ │ ├── train_encoder.py
│ │ ├── train_tts.py
│ │ ├── train_vocoder.py
│ │ └── tune_wavegrad.py
│ ├── config/
│ │ ├── __init__.py
│ │ └── shared_configs.py
│ ├── encoder/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── configs/
│ │ │ ├── base_encoder_config.py
│ │ │ ├── emotion_encoder_config.py
│ │ │ └── speaker_encoder_config.py
│ │ ├── dataset.py
│ │ ├── losses.py
│ │ ├── models/
│ │ │ ├── base_encoder.py
│ │ │ ├── lstm.py
│ │ │ └── resnet.py
│ │ ├── requirements.txt
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── generic_utils.py
│ │ ├── io.py
│ │ ├── prepare_voxceleb.py
│ │ ├── training.py
│ │ └── visual.py
│ ├── model.py
│ ├── server/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── conf.json
│ │ ├── server.py
│ │ └── templates/
│ │ ├── details.html
│ │ └── index.html
│ ├── tts/
│ │ ├── __init__.py
│ │ ├── configs/
│ │ │ ├── __init__.py
│ │ │ ├── align_tts_config.py
│ │ │ ├── fast_pitch_config.py
│ │ │ ├── fast_speech_config.py
│ │ │ ├── fastspeech2_config.py
│ │ │ ├── glow_tts_config.py
│ │ │ ├── neuralhmm_tts_config.py
│ │ │ ├── overflow_config.py
│ │ │ ├── shared_configs.py
│ │ │ ├── speedy_speech_config.py
│ │ │ ├── tacotron2_config.py
│ │ │ ├── tacotron_config.py
│ │ │ └── vits_config.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── dataset.py
│ │ │ └── formatters.py
│ │ ├── layers/
│ │ │ ├── __init__.py
│ │ │ ├── align_tts/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── duration_predictor.py
│ │ │ │ └── mdn.py
│ │ │ ├── feed_forward/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── decoder.py
│ │ │ │ ├── duration_predictor.py
│ │ │ │ └── encoder.py
│ │ │ ├── generic/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── aligner.py
│ │ │ │ ├── gated_conv.py
│ │ │ │ ├── normalization.py
│ │ │ │ ├── pos_encoding.py
│ │ │ │ ├── res_conv_bn.py
│ │ │ │ ├── time_depth_sep_conv.py
│ │ │ │ ├── transformer.py
│ │ │ │ └── wavenet.py
│ │ │ ├── glow_tts/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── decoder.py
│ │ │ │ ├── duration_predictor.py
│ │ │ │ ├── encoder.py
│ │ │ │ ├── glow.py
│ │ │ │ └── transformer.py
│ │ │ ├── losses.py
│ │ │ ├── overflow/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── common_layers.py
│ │ │ │ ├── decoder.py
│ │ │ │ ├── neural_hmm.py
│ │ │ │ └── plotting_utils.py
│ │ │ ├── tacotron/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── attentions.py
│ │ │ │ ├── capacitron_layers.py
│ │ │ │ ├── common_layers.py
│ │ │ │ ├── gst_layers.py
│ │ │ │ ├── tacotron.py
│ │ │ │ └── tacotron2.py
│ │ │ └── vits/
│ │ │ ├── discriminator.py
│ │ │ ├── networks.py
│ │ │ ├── stochastic_duration_predictor.py
│ │ │ └── transforms.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── align_tts.py
│ │ │ ├── base_tacotron.py
│ │ │ ├── base_tts.py
│ │ │ ├── forward_tts.py
│ │ │ ├── glow_tts.py
│ │ │ ├── neuralhmm_tts.py
│ │ │ ├── overflow.py
│ │ │ ├── tacotron.py
│ │ │ ├── tacotron2.py
│ │ │ └── vits.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── data.py
│ │ ├── helpers.py
│ │ ├── languages.py
│ │ ├── managers.py
│ │ ├── measures.py
│ │ ├── monotonic_align/
│ │ │ ├── __init__.py
│ │ │ ├── core.c
│ │ │ ├── core.pyx
│ │ │ └── setup.py
│ │ ├── speakers.py
│ │ ├── ssim.py
│ │ ├── synthesis.py
│ │ ├── text/
│ │ │ ├── __init__.py
│ │ │ ├── characters.py
│ │ │ ├── chinese_mandarin/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── numbers.py
│ │ │ │ ├── phonemizer.py
│ │ │ │ └── pinyinToPhonemes.py
│ │ │ ├── cleaners.py
│ │ │ ├── cmudict.py
│ │ │ ├── english/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── abbreviations.py
│ │ │ │ ├── number_norm.py
│ │ │ │ └── time_norm.py
│ │ │ ├── french/
│ │ │ │ ├── __init__.py
│ │ │ │ └── abbreviations.py
│ │ │ ├── japanese/
│ │ │ │ ├── __init__.py
│ │ │ │ └── phonemizer.py
│ │ │ ├── korean/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ko_dictionary.py
│ │ │ │ ├── korean.py
│ │ │ │ └── phonemizer.py
│ │ │ ├── phonemizers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── espeak_wrapper.py
│ │ │ │ ├── gruut_wrapper.py
│ │ │ │ ├── ja_jp_phonemizer.py
│ │ │ │ ├── ko_kr_phonemizer.py
│ │ │ │ ├── multi_phonemizer.py
│ │ │ │ └── zh_cn_phonemizer.py
│ │ │ ├── punctuation.py
│ │ │ └── tokenizer.py
│ │ └── visual.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── audio/
│ │ │ ├── __init__.py
│ │ │ ├── numpy_transforms.py
│ │ │ ├── processor.py
│ │ │ └── torch_transforms.py
│ │ ├── callbacks.py
│ │ ├── capacitron_optimizer.py
│ │ ├── distribute.py
│ │ ├── download.py
│ │ ├── downloaders.py
│ │ ├── generic_utils.py
│ │ ├── io.py
│ │ ├── manage.py
│ │ ├── radam.py
│ │ ├── samplers.py
│ │ ├── synthesizer.py
│ │ ├── training.py
│ │ └── vad.py
│ ├── vc/
│ │ ├── configs/
│ │ │ ├── __init__.py
│ │ │ ├── freevc_config.py
│ │ │ └── shared_configs.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── base_vc.py
│ │ │ └── freevc.py
│ │ └── modules/
│ │ ├── __init__.py
│ │ └── freevc/
│ │ ├── __init__.py
│ │ ├── commons.py
│ │ ├── mel_processing.py
│ │ ├── modules.py
│ │ ├── speaker_encoder/
│ │ │ ├── __init__.py
│ │ │ ├── audio.py
│ │ │ ├── hparams.py
│ │ │ └── speaker_encoder.py
│ │ └── wavlm/
│ │ ├── __init__.py
│ │ ├── config.json
│ │ ├── modules.py
│ │ └── wavlm.py
│ └── vocoder/
│ ├── README.md
│ ├── __init__.py
│ ├── configs/
│ │ ├── __init__.py
│ │ ├── fullband_melgan_config.py
│ │ ├── hifigan_config.py
│ │ ├── melgan_config.py
│ │ ├── multiband_melgan_config.py
│ │ ├── parallel_wavegan_config.py
│ │ ├── shared_configs.py
│ │ ├── univnet_config.py
│ │ ├── wavegrad_config.py
│ │ └── wavernn_config.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── gan_dataset.py
│ │ ├── preprocess.py
│ │ ├── wavegrad_dataset.py
│ │ └── wavernn_dataset.py
│ ├── layers/
│ │ ├── __init__.py
│ │ ├── hifigan.py
│ │ ├── losses.py
│ │ ├── lvc_block.py
│ │ ├── melgan.py
│ │ ├── parallel_wavegan.py
│ │ ├── pqmf.py
│ │ ├── upsample.py
│ │ └── wavegrad.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── base_vocoder.py
│ │ ├── fullband_melgan_generator.py
│ │ ├── gan.py
│ │ ├── hifigan_discriminator.py
│ │ ├── hifigan_generator.py
│ │ ├── melgan_discriminator.py
│ │ ├── melgan_generator.py
│ │ ├── melgan_multiscale_discriminator.py
│ │ ├── multiband_melgan_generator.py
│ │ ├── parallel_wavegan_discriminator.py
│ │ ├── parallel_wavegan_generator.py
│ │ ├── random_window_discriminator.py
│ │ ├── univnet_discriminator.py
│ │ ├── univnet_generator.py
│ │ ├── wavegrad.py
│ │ └── wavernn.py
│ └── utils/
│ ├── __init__.py
│ ├── distribution.py
│ └── generic_utils.py
├── TTS_additional_material/
│ ├── .gitignore
│ ├── .pre-commit-config.yaml
│ ├── .pylintrc
│ ├── .readthedocs.yml
│ ├── CODE_OF_CONDUCT.md
│ ├── README.md
│ ├── hubconf.py
│ └── requirements.txt
├── UpdateHistory.md
├── Vicuna/
│ ├── README.md
│ ├── start-webui-vicuna-gpu.bat
│ └── vicuna.ps1
├── demos/
│ ├── chat_with_keyboard.py
│ ├── demo_da_vinci.py
│ ├── demo_elevenlabs.py
│ ├── demo_google_search.py
│ ├── demo_local_search_engine.py
│ ├── demo_pyaudio.py
│ ├── demo_research_mode.py
│ └── demo_tts.py
├── env.txt
├── openai_api_chatbot.py
├── run.bat
├── saved_chats/
│ ├── 2023-03-13_ExploringtheConceptofDeepLearningandItsApplicationsintheAviationIndustry.txt
│ ├── 2023-03-25_ExploringtheAmbitiousWorldofStarCitizenIsitRightforCasualGamers.txt
│ ├── 2023-03-26_AnIntroductiontoArtificialIntelligence.txt
│ ├── 2023-03-26_ArtificialIntelligence.txt
│ ├── 2023-03-26_ExploringthePossibilityofHumanExtinction.txt
│ ├── 2023-03-26_ExploringtheUseofTransformersinImageSegmentation.txt
│ ├── 2023-03-26_FlutterDevelopment.txt
│ ├── 2023-03-26_FlutterDevelopmentLearningMethodsandManagingAppState.txt
│ ├── 2023-03-26_HumanExtinction.txt
│ ├── 2023-03-26_NuclearandElectricPropulsioninSpaceExplorationAdvantagesandChallenges.txt
│ ├── 2023-03-26_RevolutionizingHealthcare.txt
│ ├── 2023-03-26_RevolutionizingHealthcarewithArtificialIntelligenceBenefitsandOpportunities.txt
│ ├── 2023-03-26_SpacePropulsion.txt
│ ├── 2023-03-26_TagImageSegmentation.txt
│ ├── 2023-03-26_TipsforRunningScriptsatSystemStartuponWindowsandLinux.txt
│ ├── 2023-03-26_TipsforRunningStartup.txt
│ ├── 2023-03-28_AsteroidImpact.txt
│ ├── 2023-03-28_ChoosingAppleComputer.txt
│ ├── 2023-03-28_ChoosingtheRightAppleComputerforDeepLearningandMachineLearningM1vsM2ChipandOptimalMemorySize.txt
│ ├── 2023-03-28_SizeandImpactHowBigDoesanAsteroidNeedtoBetoCauseSignificantDamage.txt
│ ├── 2023-03-29_InteractiveSystem.txt
│ ├── 2023-03-29_TraininganInteractiveSystemtoRespondtoCommandsandQuestions.txt
│ ├── 2023-03-30_ExploringtheColdestTemperaturesonEarthRecordBreakingLowTemperaturesandHistoricalEstimates.txt
│ ├── 2023-03-30_ExtremeColdTemperatures.txt
│ ├── 2023-03-30_TagBeerBasics.txt
│ ├── 2023-03-30_TagSkyColors.txt
│ ├── 2023-03-30_TheBasicsofBeerUnderstandingthePopularAlcoholicBeverage.txt
│ ├── 2023-03-30_TheColorsoftheSkyExploringtheHuesandVariations.txt
│ ├── 2023-04-01_IslandTravel.txt
│ ├── 2023-04-01_RegalieattivitàperunviaggioinIslanda.txt
│ ├── 2023-04-02_SoundCardComponentsandADC.txt
│ ├── 2023-04-02_UnderstandingtheComponentsofaSoundCardandtheFunctionofADC.txt
│ ├── 2023-04-03_ExploringMultiHeadSelfAttentionforKeywordIdentificationinNaturalLanguageProcessing.txt
│ ├── 2023-04-03_TagNaturalLanguageProcessing.txt
│ ├── 2023-04-04_ExploringFeasibleOptionsforPoweringanIronManSuitSolarCellsBatteriesandNuclearReactors.txt
│ ├── 2023-04-04_PoweringIronManSuit.txt
│ ├── 2023-04-05_DockerContainerization.txt
│ ├── 2023-04-07_ApproachesforTextto.txt
│ ├── 2023-04-09_FitbitDataandSleep.txt
│ ├── 2023-04-11_Fitness.txt
│ ├── 2023-04-19_ChineseLanguages.txt
│ ├── 2023-04-19_SyntheticMeat.txt
│ ├── 2023-04-20_LabGrownMeat.txt
│ ├── 2023-04-20_SyntheticMeatCont.txt
│ ├── 2023-04-20_andTextto.txt
│ ├── 2023-05-03_AIConversations.txt
│ ├── 2023-05-03_MeaninginLife.txt
│ ├── 2023-06-07_ImageProcessing.txt
│ └── DATAFRAME.csv
├── setup.bat
├── test_TTS.py
├── tests.py
├── venv_requirements.txt
├── whisper_edits/
│ ├── __init__.py
│ └── model.py
└── workspaces/
└── Vision_09df18b156814c80a3e1c1ab544423fc/
├── refy_suggestions/
│ ├── test.csv
│ └── test.html
└── results/
├── papers.bib
└── papers.csv
Showing preview only (221K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (2609 symbols across 205 files)
FILE: Assistant/Agents.py
function generateReactAgent (line 12) | def generateReactAgent(VA:VirtualAssistant, k:int):
function build_memory (line 63) | def build_memory(chat_history, k):
function generateGoogleAgent (line 83) | def generateGoogleAgent(VA:VirtualAssistant, k:int):
function locate_me (line 143) | def locate_me(p):
function today (line 147) | def today(p):
function time_between_dates (line 150) | def time_between_dates(date1, date2):
function news (line 159) | def news(keyword):
FILE: Assistant/VirtualAssistant.py
class VirtualAssistant (line 41) | class VirtualAssistant:
method __init__ (line 62) | def __init__(self,
method switch_mode (line 150) | def switch_mode(self):
method identify_explicit_command (line 164) | def identify_explicit_command(self, prompt):
method use_tools (line 196) | def use_tools(self, prompt, debug = DEBUG):
method secondary_agent (line 208) | def secondary_agent(self, prompt, debug = DEBUG):
method set_directories (line 218) | def set_directories(self, **kwargs):
method go_to_sleep (line 230) | def go_to_sleep(self):
method analyze_prompt (line 239) | def analyze_prompt(self, prompt, debug = DEBUG):
method start_new_conversation (line 300) | def start_new_conversation(self):
method expand_conversation (line 305) | def expand_conversation(self, role, content): self.current_conversatio...
method get_answer (line 307) | def get_answer(self, question, optimize_cuda = False, debug=DEBUG):
method save_chat (line 353) | def save_chat(self, debug = DEBUG):
method init_research_mode (line 394) | def init_research_mode(self, workspace=None):
method deallocate_whisper (line 418) | def deallocate_whisper(self):
method allocate_whisper (line 430) | def allocate_whisper(self):
method switch_whisper_device (line 448) | def switch_whisper_device(self):
method open_file (line 470) | def open_file(self, filename, debug=DEBUG):
method find_file (line 486) | def find_file(self, keywords, n=3, debug=DEBUG):
method play (line 501) | def play(self, fname, PlayAndWait=False, loop=False, debug = DEBUG):
method say (line 519) | def say(self, text, VoiceIdx='jarvis', elevenlabs=False, IBM=False):
method block_until_wakeword (line 563) | def block_until_wakeword(self, verbosity=False):
method listen_passively (line 615) | def listen_passively(self, verbosity=False):
method record_to_file (line 633) | def record_to_file(self, file_path):
method record (line 644) | def record(self):
function suppress_stdout (line 719) | def suppress_stdout():
FILE: Assistant/get_audio.py
function whisper_wav_to_text (line 11) | def whisper_wav_to_text(audio_name, model=[], model_name=False, prior=No...
function get_device_channels (line 49) | def get_device_channels():
function detect_microphones (line 57) | def detect_microphones():
function get_devices (line 67) | def get_devices():
FILE: Assistant/research_mode.py
class ResearchAssistant (line 13) | class ResearchAssistant:
method __init__ (line 14) | def __init__(self, current_conversation, workspace = None, index_name ...
method boot_workspace (line 32) | def boot_workspace(self, workspace):
method wrapper_find_papers_from_query (line 53) | def wrapper_find_papers_from_query(self, query):
method load_pdf_to_pinecone (line 67) | def load_pdf_to_pinecone(self, paths):
method PROTOCOL_begin_new_workspace (line 87) | def PROTOCOL_begin_new_workspace(self, query):
method wrapper_download_paper (line 119) | def wrapper_download_paper(self, id):
method wrapper_find_reccomendation (line 127) | def wrapper_find_reccomendation(self, paperId):
method find_in_papers (line 130) | def find_in_papers(self, query):
function generateResearchAgent (line 144) | def generateResearchAgent(RA:ResearchAssistant, k:int):
function build_memory (line 197) | def build_memory(chat_history, k):
FILE: Assistant/semantic_scholar/S2_tools.py
function get_paper (line 33) | def get_paper(session: Session, paper_id: str, fields: str = 'paperId,ti...
function find_paper_from_query (line 48) | def find_paper_from_query(query, result_limit=RESULT_LIMIT):
function find_recommendations (line 85) | def find_recommendations(paper, result_limit = RESULT_LIMIT):
function extract_title (line 96) | def extract_title(path):
function find_paper_online (line 133) | def find_paper_online(path):
function print_papers (line 177) | def print_papers(papers):
function chunks (line 184) | def chunks(items, chunk_size):
function fetch_paper_batch (line 188) | def fetch_paper_batch(paperid: list):
function download_pdf_from_id (line 201) | def download_pdf_from_id(paperid, path=os.getcwd()):
function update_dataframe (line 210) | def update_dataframe(incomplete, dest):
function write_bib_file (line 306) | def write_bib_file(csv_file, bib_file=None):
function create_bib_entry (line 317) | def create_bib_entry(row):
function replace_non_alphanumeric (line 347) | def replace_non_alphanumeric(string, replacement=' '):
function refy_reccomend (line 353) | def refy_reccomend(bib_path, number=20):
function write_to_pdf (line 362) | def write_to_pdf(text, dest):
FILE: Assistant/semantic_scholar/agent_tools.py
function PaperSearchAndDownload (line 22) | def PaperSearchAndDownload(query):
function download_paper (line 93) | def download_paper(url, save_path=f"{uuid.uuid4().hex}.pdf"):
function download_bibtex_library (line 137) | def download_bibtex_library(csv_path):
function generate_chunks (line 153) | def generate_chunks(text, CHUNK_LENGTH = 4000):
function get_result_path (line 205) | def get_result_path(path, exclude = []):
function get_workspace_titles (line 214) | def get_workspace_titles(workspace_name):
function same_title (line 224) | def same_title(title1, title2):
function glimpse_pdf (line 235) | def glimpse_pdf(title):
function count_tokens (line 249) | def count_tokens(text):
function readPDF (line 254) | def readPDF(pdf_path):
function get_pdf_path (line 265) | def get_pdf_path(dir, exclude=[]):
function delete_duplicates_from_csv (line 275) | def delete_duplicates_from_csv(csv_file):
function update_workspace_dataframe (line 325) | def update_workspace_dataframe(workspace, verbose = True):
function load_workspace (line 423) | def load_workspace(folderdir):
function list_workspace_elements (line 445) | def list_workspace_elements(folderdir):
function llama_query_engine (line 458) | def llama_query_engine(docs:list, pinecone_index_name:str):
function suppress_stdout (line 504) | def suppress_stdout():
FILE: Assistant/semantic_scholar/simple.py
function get_paper (line 16) | def get_paper(session: Session, paper_id: str, fields: str = 'paperId,ti...
function download_pdf (line 30) | def download_pdf(session: Session, url: str, path: str, user_agent: str ...
function download_paper (line 50) | def download_paper(session: Session, paper_id: str, directory: str = 'pa...
function download_papers (line 78) | def download_papers(paper_ids: list[str], directory: str = 'papers', use...
function main (line 88) | def main(args: argparse.Namespace) -> None:
function Main (line 106) | def Main(paper_ids=[], dir='papers', user_agent = 'requests/2.0.0', ):
FILE: Assistant/tools.py
class AssistantChat (line 28) | class AssistantChat(collections.MutableSequence):
method __init__ (line 29) | def __init__(self, begin:list, *args):
method is_saved (line 34) | def is_saved(self):
method insert (line 37) | def insert(self, i, v):
method append (line 40) | def append(self, item):
method __call__ (line 43) | def __call__(self):
method __len__ (line 46) | def __len__(self): return len(self.body)
method __getitem__ (line 48) | def __getitem__(self, i): return self.body[i]
method __delitem__ (line 50) | def __delitem__(self, i): del self.body[i]
method __setitem__ (line 52) | def __setitem__(self, i, v):
method __str__ (line 55) | def __str__(self):
class Translator (line 72) | class Translator:
method __init__ (line 73) | def __init__(self, model="argostranslator", **kwargs):
method translate (line 110) | def translate(self, input, to_language, from_language=None):
class LocalSearchEngine (line 164) | class LocalSearchEngine:
method __init__ (line 165) | def __init__(self,
method compute_similarity (line 179) | def compute_similarity(self, key, text):
method accurate_search (line 190) | def accurate_search(self, key, path=None, n=-1, from_csv=False):
method produce_folder_tags (line 260) | def produce_folder_tags(self, path=None):
method extract_tags (line 336) | def extract_tags(self, text):
method compute_embeds (line 355) | def compute_embeds(self, words):
method DaVinci_tldr (line 358) | def DaVinci_tldr(self, text):
method tldr (line 370) | def tldr(self, text, to_language=None, with_model = ''):
class OnlineSearchEngine (line 413) | class OnlineSearchEngine:
function count_tokens (line 420) | def count_tokens(vCountTokenStr):
function parse_conversation (line 430) | def parse_conversation(string_chat):
function take_last_k_interactions (line 447) | def take_last_k_interactions(chat, max_tokens=4000):
FILE: Assistant/voice.py
class Voice (line 15) | class Voice:
method __init__ (line 16) | def __init__(self, languages, **kwargs):
method speak (line 65) | def speak(self, text, VoiceIdx, mode, elevenlabs=False, IBM=False):
method change_offline_lang (line 124) | def change_offline_lang(self, lang_id):
function get_ibm_voice_id (line 137) | def get_ibm_voice_id(VoiceIdx):
function suppress_stdout (line 194) | def suppress_stdout():
FILE: Assistant/webui.py
function set_text_gen_params (line 29) | def set_text_gen_params(**kwargs):
function oobabooga_textgen (line 35) | def oobabooga_textgen(prompt, params=TEXT_GEN_PARAMS, server=SERVER):
function post_process (line 60) | def post_process(answer):
function parse_conversation (line 77) | def parse_conversation(chat):
FILE: TTS/api.py
class TTS (line 9) | class TTS:
method __init__ (line 12) | def __init__(
method models (line 66) | def models(self):
method is_multi_speaker (line 70) | def is_multi_speaker(self):
method is_multi_lingual (line 76) | def is_multi_lingual(self):
method speakers (line 82) | def speakers(self):
method languages (line 88) | def languages(self):
method get_models_file_path (line 94) | def get_models_file_path():
method list_models (line 98) | def list_models():
method download_model_by_name (line 102) | def download_model_by_name(self, model_name: str):
method load_vc_model_by_name (line 109) | def load_vc_model_by_name(self, model_name: str, gpu: bool = False):
method load_tts_model_by_name (line 119) | def load_tts_model_by_name(self, model_name: str, gpu: bool = False):
method load_tts_model_by_path (line 145) | def load_tts_model_by_path(
method _check_arguments (line 170) | def _check_arguments(self, speaker: str = None, language: str = None, ...
method tts (line 180) | def tts(self, text: str, speaker: str = None, language: str = None, sp...
method tts_to_file (line 210) | def tts_to_file(
method voice_conversion (line 238) | def voice_conversion(
method tts_with_vc (line 254) | def tts_with_vc(self, text: str, language: str = None, speaker_wav: st...
method tts_with_vc_to_file (line 280) | def tts_with_vc_to_file(
FILE: TTS/bin/collect_env_info.py
function system_info (line 15) | def system_info():
function cuda_info (line 25) | def cuda_info():
function package_info (line 33) | def package_info():
function main (line 42) | def main():
FILE: TTS/bin/compute_embeddings.py
function compute_embeddings (line 15) | def compute_embeddings(
FILE: TTS/bin/compute_statistics.py
function main (line 17) | def main():
FILE: TTS/bin/eval_encoder.py
function compute_encoder_accuracy (line 12) | def compute_encoder_accuracy(dataset_items, encoder_manager):
FILE: TTS/bin/extract_tts_spectrograms.py
function setup_loader (line 23) | def setup_loader(ap, r, verbose=False):
function set_filename (line 62) | def set_filename(wav_path, out_path):
function format_data (line 76) | def format_data(data):
function inference (line 116) | def inference(
function extract_spectrograms (line 161) | def extract_spectrograms(
function main (line 223) | def main(args): # pylint: disable=redefined-outer-name
FILE: TTS/bin/find_unique_chars.py
function main (line 9) | def main():
FILE: TTS/bin/find_unique_phonemes.py
function compute_phonemes (line 13) | def compute_phonemes(item):
function main (line 19) | def main():
FILE: TTS/bin/remove_silence_using_vad.py
function adjust_path_and_remove_silence (line 11) | def adjust_path_and_remove_silence(audio_path):
function preprocess_audios (line 31) | def preprocess_audios():
FILE: TTS/bin/resample.py
function resample_file (line 13) | def resample_file(func_args):
function resample_files (line 19) | def resample_files(input_dir, output_sr, output_dir=None, file_ext="wav"...
FILE: TTS/bin/synthesize.py
function str2bool (line 15) | def str2bool(v):
function main (line 25) | def main():
FILE: TTS/bin/train_encoder.py
function setup_loader (line 34) | def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool...
function evaluation (line 85) | def evaluation(model, criterion, data_loader, global_step):
function train (line 126) | def train(model, optimizer, scheduler, criterion, data_loader, eval_data...
function main (line 254) | def main(args): # pylint: disable=redefined-outer-name
FILE: TTS/bin/train_tts.py
class TrainTTSArgs (line 12) | class TrainTTSArgs(TrainerArgs):
function main (line 16) | def main():
FILE: TTS/bin/train_vocoder.py
class TrainVocoderArgs (line 13) | class TrainVocoderArgs(TrainerArgs):
function main (line 17) | def main():
FILE: TTS/config/__init__.py
function read_json_with_comments (line 14) | def read_json_with_comments(json_path):
function register_config (line 26) | def register_config(model_name: str) -> Coqpit:
function _process_model_name (line 51) | def _process_model_name(config_dict: Dict) -> str:
function load_config (line 65) | def load_config(config_path: str) -> Coqpit:
function check_config_and_model_args (line 100) | def check_config_and_model_args(config, arg_name, value):
function get_from_config_or_model_args (line 117) | def get_from_config_or_model_args(config, arg_name):
function get_from_config_or_model_args_with_default (line 125) | def get_from_config_or_model_args_with_default(config, arg_name, def_val):
FILE: TTS/config/shared_configs.py
class BaseAudioConfig (line 9) | class BaseAudioConfig(Coqpit):
method check_values (line 156) | def check_values(
class BaseDatasetConfig (line 192) | class BaseDatasetConfig(Coqpit):
method check_values (line 236) | def check_values(
class BaseTrainingConfig (line 249) | class BaseTrainingConfig(TrainerConfig):
FILE: TTS/encoder/configs/base_encoder_config.py
class BaseEncoderConfig (line 10) | class BaseEncoderConfig(BaseTrainingConfig):
method check_values (line 56) | def check_values(self):
FILE: TTS/encoder/configs/emotion_encoder_config.py
class EmotionEncoderConfig (line 7) | class EmotionEncoderConfig(BaseEncoderConfig):
FILE: TTS/encoder/configs/speaker_encoder_config.py
class SpeakerEncoderConfig (line 7) | class SpeakerEncoderConfig(BaseEncoderConfig):
FILE: TTS/encoder/dataset.py
class EncoderDataset (line 9) | class EncoderDataset(Dataset):
method __init__ (line 10) | def __init__(
method load_wav (line 61) | def load_wav(self, filename):
method __parse_items (line 65) | def __parse_items(self):
method __len__ (line 98) | def __len__(self):
method get_num_classes (line 101) | def get_num_classes(self):
method get_class_list (line 104) | def get_class_list(self):
method set_classes (line 107) | def set_classes(self, classes):
method get_map_classid_to_classname (line 111) | def get_map_classid_to_classname(self):
method __getitem__ (line 114) | def __getitem__(self, idx):
method collate_fn (line 117) | def collate_fn(self, batch):
FILE: TTS/encoder/losses.py
class GE2ELoss (line 7) | class GE2ELoss(nn.Module):
method __init__ (line 8) | def __init__(self, init_w=10.0, init_b=-5.0, loss_method="softmax"):
method calc_new_centroids (line 36) | def calc_new_centroids(self, dvecs, centroids, spkr, utt):
method calc_cosine_sim (line 50) | def calc_cosine_sim(self, dvecs, centroids):
method embed_loss_softmax (line 75) | def embed_loss_softmax(self, dvecs, cos_sim_matrix):
method embed_loss_contrast (line 90) | def embed_loss_contrast(self, dvecs, cos_sim_matrix):
method forward (line 106) | def forward(self, x, _label=None):
class AngleProtoLoss (line 122) | class AngleProtoLoss(nn.Module):
method __init__ (line 134) | def __init__(self, init_w=10.0, init_b=-5.0):
method forward (line 144) | def forward(self, x, _label=None):
class SoftmaxLoss (line 166) | class SoftmaxLoss(nn.Module):
method __init__ (line 174) | def __init__(self, embedding_dim, n_speakers):
method forward (line 182) | def forward(self, x, label=None):
method inference (line 192) | def inference(self, embedding):
class SoftmaxAngleProtoLoss (line 199) | class SoftmaxAngleProtoLoss(nn.Module):
method __init__ (line 209) | def __init__(self, embedding_dim, n_speakers, init_w=10.0, init_b=-5.0):
method forward (line 217) | def forward(self, x, label=None):
FILE: TTS/encoder/models/base_encoder.py
class PreEmphasis (line 12) | class PreEmphasis(nn.Module):
method __init__ (line 13) | def __init__(self, coefficient=0.97):
method forward (line 18) | def forward(self, x):
class BaseEncoder (line 25) | class BaseEncoder(nn.Module):
method __init__ (line 32) | def __init__(self):
method get_torch_mel_spectrogram_class (line 35) | def get_torch_mel_spectrogram_class(self, audio_config):
method inference (line 64) | def inference(self, x, l2_norm=True):
method compute_embedding (line 68) | def compute_embedding(self, x, num_frames=250, num_eval=10, return_mea...
method get_criterion (line 98) | def get_criterion(self, c: Coqpit, num_classes=None):
method load_checkpoint (line 109) | def load_checkpoint(
FILE: TTS/encoder/models/lstm.py
class LSTMWithProjection (line 7) | class LSTMWithProjection(nn.Module):
method __init__ (line 8) | def __init__(self, input_size, hidden_size, proj_size):
method forward (line 16) | def forward(self, x):
class LSTMWithoutProjection (line 22) | class LSTMWithoutProjection(nn.Module):
method __init__ (line 23) | def __init__(self, input_dim, lstm_dim, proj_dim, num_lstm_layers):
method forward (line 29) | def forward(self, x):
class LSTMSpeakerEncoder (line 34) | class LSTMSpeakerEncoder(BaseEncoder):
method __init__ (line 35) | def __init__(
method _init_layers (line 70) | def _init_layers(self):
method forward (line 77) | def forward(self, x, l2_norm=True):
FILE: TTS/encoder/models/resnet.py
class SELayer (line 8) | class SELayer(nn.Module):
method __init__ (line 9) | def __init__(self, channel, reduction=8):
method forward (line 19) | def forward(self, x):
class SEBasicBlock (line 26) | class SEBasicBlock(nn.Module):
method __init__ (line 29) | def __init__(self, inplanes, planes, stride=1, downsample=None, reduct...
method forward (line 40) | def forward(self, x):
class ResNetSpeakerEncoder (line 59) | class ResNetSpeakerEncoder(BaseEncoder):
method __init__ (line 65) | def __init__(
method _init_layers (line 123) | def _init_layers(self):
method create_layer (line 131) | def create_layer(self, block, planes, blocks, stride=1):
method new_parameter (line 148) | def new_parameter(self, *size):
method forward (line 153) | def forward(self, x, l2_norm=False):
FILE: TTS/encoder/utils/generic_utils.py
class AugmentWAV (line 15) | class AugmentWAV(object):
method __init__ (line 16) | def __init__(self, ap, augmentation_config):
method create_augmentation_global_list (line 60) | def create_augmentation_global_list(self):
method additive_noise (line 68) | def additive_noise(self, noise_type, audio):
method reverberate (line 105) | def reverberate(self, audio):
method apply_one (line 113) | def apply_one(self, audio):
function to_camel (line 121) | def to_camel(text):
function setup_encoder_model (line 126) | def setup_encoder_model(config: "Coqpit"):
function save_checkpoint (line 147) | def save_checkpoint(model, optimizer, criterion, model_loss, out_path, c...
function save_best_model (line 165) | def save_best_model(model, optimizer, criterion, model_loss, best_loss, ...
FILE: TTS/encoder/utils/io.py
function save_checkpoint (line 7) | def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
function save_best_model (line 23) | def save_best_model(model, optimizer, model_loss, best_loss, out_path, c...
FILE: TTS/encoder/utils/prepare_voxceleb.py
function download_and_extract (line 65) | def download_and_extract(directory, subset, urls):
function exec_cmd (line 111) | def exec_cmd(cmd):
function decode_aac_with_ffmpeg (line 128) | def decode_aac_with_ffmpeg(aac_file, wav_file):
function convert_audio_and_make_label (line 146) | def convert_audio_and_make_label(input_dir, subset, output_dir, output_f...
function processor (line 193) | def processor(directory, subset, force_process):
FILE: TTS/encoder/utils/training.py
class TrainArgs (line 16) | class TrainArgs(TrainerArgs):
function getarguments (line 20) | def getarguments():
function process_args (line 26) | def process_args(args, config=None):
function init_arguments (line 88) | def init_arguments():
function init_training (line 94) | def init_training(config: Coqpit = None):
FILE: TTS/encoder/utils/visual.py
function plot_embeddings (line 32) | def plot_embeddings(embeddings, num_classes_in_batch):
FILE: TTS/model.py
class BaseTrainerModel (line 11) | class BaseTrainerModel(TrainerModel):
method init_from_config (line 19) | def init_from_config(config: Coqpit):
method inference (line 27) | def inference(self, input: torch.Tensor, aux_input={}) -> Dict:
method load_checkpoint (line 47) | def load_checkpoint(
FILE: TTS/server/server.py
function create_argparser (line 19) | def create_argparser():
function style_wav_uri_to_dict (line 131) | def style_wav_uri_to_dict(style_wav: str) -> Union[str, dict]:
function index (line 151) | def index():
function details (line 164) | def details():
function tts (line 184) | def tts():
function mary_tts_api_locales (line 204) | def mary_tts_api_locales():
function mary_tts_api_voices (line 215) | def mary_tts_api_voices():
function mary_tts_api_process (line 228) | def mary_tts_api_process():
function main (line 244) | def main():
FILE: TTS/tts/configs/align_tts_config.py
class AlignTTSConfig (line 9) | class AlignTTSConfig(BaseTTSConfig):
FILE: TTS/tts/configs/fast_pitch_config.py
class FastPitchConfig (line 9) | class FastPitchConfig(BaseTTSConfig):
method __post_init__ (line 166) | def __post_init__(self):
FILE: TTS/tts/configs/fast_speech_config.py
class FastSpeechConfig (line 9) | class FastSpeechConfig(BaseTTSConfig):
method __post_init__ (line 160) | def __post_init__(self):
FILE: TTS/tts/configs/fastspeech2_config.py
class Fastspeech2Config (line 9) | class Fastspeech2Config(BaseTTSConfig):
method __post_init__ (line 181) | def __post_init__(self):
FILE: TTS/tts/configs/glow_tts_config.py
class GlowTTSConfig (line 8) | class GlowTTSConfig(BaseTTSConfig):
FILE: TTS/tts/configs/neuralhmm_tts_config.py
class NeuralhmmTTSConfig (line 8) | class NeuralhmmTTSConfig(BaseTTSConfig):
method check_values (line 157) | def check_values(self):
FILE: TTS/tts/configs/overflow_config.py
class OverflowConfig (line 8) | class OverflowConfig(BaseTTSConfig): # The classname has to be camel case
method check_values (line 188) | def check_values(self):
FILE: TTS/tts/configs/shared_configs.py
class GSTConfig (line 10) | class GSTConfig(Coqpit):
method check_values (line 37) | def check_values(
class CapacitronVAEConfig (line 52) | class CapacitronVAEConfig(Coqpit):
method check_values (line 81) | def check_values(
class CharactersConfig (line 96) | class CharactersConfig(Coqpit):
class BaseTTSConfig (line 156) | class BaseTTSConfig(BaseTrainingConfig):
FILE: TTS/tts/configs/speedy_speech_config.py
class SpeedySpeechConfig (line 9) | class SpeedySpeechConfig(BaseTTSConfig):
method __post_init__ (line 175) | def __post_init__(self):
FILE: TTS/tts/configs/tacotron2_config.py
class Tacotron2Config (line 7) | class Tacotron2Config(TacotronConfig):
FILE: TTS/tts/configs/tacotron_config.py
class TacotronConfig (line 8) | class TacotronConfig(BaseTTSConfig):
method check_values (line 225) | def check_values(self):
FILE: TTS/tts/configs/vits_config.py
class VitsConfig (line 9) | class VitsConfig(BaseTTSConfig):
method __post_init__ (line 173) | def __post_init__(self):
FILE: TTS/tts/datasets/__init__.py
function split_dataset (line 13) | def split_dataset(items, eval_split_max_size=None, eval_split_size=0.01):
function add_extra_keys (line 59) | def add_extra_keys(metadata, language, dataset_name):
function load_tts_samples (line 70) | def load_tts_samples(
function load_attention_mask_meta_data (line 151) | def load_attention_mask_meta_data(metafile_path):
function _get_formatter_by_name (line 163) | def _get_formatter_by_name(name):
function find_unique_chars (line 169) | def find_unique_chars(data_samples, verbose=True):
FILE: TTS/tts/datasets/dataset.py
function _parse_sample (line 21) | def _parse_sample(item):
function noise_augment_audio (line 35) | def noise_augment_audio(wav):
function string2filename (line 39) | def string2filename(string):
class TTSDataset (line 45) | class TTSDataset(Dataset):
method __init__ (line 46) | def __init__(
method lengths (line 175) | def lengths(self):
method samples (line 184) | def samples(self):
method samples (line 188) | def samples(self, new_samples):
method __len__ (line 197) | def __len__(self):
method __getitem__ (line 200) | def __getitem__(self, idx):
method print_logs (line 203) | def print_logs(self, level: int = 0) -> None:
method load_wav (line 211) | def load_wav(self, filename):
method get_phonemes (line 216) | def get_phonemes(self, idx, text):
method get_f0 (line 222) | def get_f0(self, idx):
method get_energy (line 228) | def get_energy(self, idx):
method get_attn_mask (line 235) | def get_attn_mask(attn_file):
method get_token_ids (line 238) | def get_token_ids(self, idx, text):
method load_data (line 245) | def load_data(self, idx):
method _compute_lengths (line 295) | def _compute_lengths(samples):
method filter_by_length (line 306) | def filter_by_length(lengths: List[int], min_len: int, max_len: int):
method sort_by_length (line 319) | def sort_by_length(samples: List[List]):
method create_buckets (line 325) | def create_buckets(samples, batch_group_size: int):
method _select_samples_by_idx (line 336) | def _select_samples_by_idx(idxs, samples):
method preprocess_samples (line 342) | def preprocess_samples(self):
method _sort_batch (line 394) | def _sort_batch(batch, text_lengths):
method collate_fn (line 405) | def collate_fn(self, batch):
class PhonemeDataset (line 566) | class PhonemeDataset(Dataset):
method __init__ (line 586) | def __init__(
method __getitem__ (line 600) | def __getitem__(self, index):
method __len__ (line 606) | def __len__(self):
method compute_or_load (line 609) | def compute_or_load(self, file_name, text, language):
method get_pad_id (line 623) | def get_pad_id(self):
method precompute (line 627) | def precompute(self, num_workers=1):
method collate_fn (line 641) | def collate_fn(self, batch):
method print_logs (line 652) | def print_logs(self, level: int = 0) -> None:
class F0Dataset (line 661) | class F0Dataset:
method __init__ (line 685) | def __init__(
method __getitem__ (line 708) | def __getitem__(self, idx):
method __len__ (line 716) | def __len__(self):
method precompute (line 719) | def precompute(self, num_workers=0):
method get_pad_id (line 742) | def get_pad_id(self):
method create_pitch_file_path (line 746) | def create_pitch_file_path(file_name, cache_path):
method _compute_and_save_pitch (line 751) | def _compute_and_save_pitch(ap, wav_file, pitch_file=None):
method compute_pitch_stats (line 759) | def compute_pitch_stats(pitch_vecs):
method load_stats (line 764) | def load_stats(self, cache_path):
method normalize (line 770) | def normalize(self, pitch):
method denormalize (line 777) | def denormalize(self, pitch):
method compute_or_load (line 784) | def compute_or_load(self, wav_file, audio_unique_name):
method collate_fn (line 795) | def collate_fn(self, batch):
method print_logs (line 805) | def print_logs(self, level: int = 0) -> None:
class EnergyDataset (line 812) | class EnergyDataset:
method __init__ (line 836) | def __init__(
method __getitem__ (line 859) | def __getitem__(self, idx):
method __len__ (line 867) | def __len__(self):
method precompute (line 870) | def precompute(self, num_workers=0):
method get_pad_id (line 893) | def get_pad_id(self):
method create_energy_file_path (line 897) | def create_energy_file_path(wav_file, cache_path):
method _compute_and_save_energy (line 903) | def _compute_and_save_energy(ap, wav_file, energy_file=None):
method compute_energy_stats (line 911) | def compute_energy_stats(energy_vecs):
method load_stats (line 916) | def load_stats(self, cache_path):
method normalize (line 922) | def normalize(self, energy):
method denormalize (line 929) | def denormalize(self, energy):
method compute_or_load (line 936) | def compute_or_load(self, wav_file, audio_unique_name):
method collate_fn (line 947) | def collate_fn(self, batch):
method print_logs (line 957) | def print_logs(self, level: int = 0) -> None:
FILE: TTS/tts/datasets/formatters.py
function coqui (line 16) | def coqui(root_path, meta_file, ignored_speakers=None):
function tweb (line 54) | def tweb(root_path, meta_file, **kwargs): # pylint: disable=unused-argu...
function mozilla (line 70) | def mozilla(root_path, meta_file, **kwargs): # pylint: disable=unused-a...
function mozilla_de (line 85) | def mozilla_de(root_path, meta_file, **kwargs): # pylint: disable=unuse...
function mailabs (line 101) | def mailabs(root_path, meta_files=None, ignored_speakers=None):
function ljspeech (line 152) | def ljspeech(root_path, meta_file, **kwargs): # pylint: disable=unused-...
function ljspeech_test (line 167) | def ljspeech_test(root_path, meta_file, **kwargs): # pylint: disable=un...
function thorsten (line 187) | def thorsten(root_path, meta_file, **kwargs): # pylint: disable=unused-...
function sam_accenture (line 202) | def sam_accenture(root_path, meta_file, **kwargs): # pylint: disable=un...
function ruslan (line 219) | def ruslan(root_path, meta_file, **kwargs): # pylint: disable=unused-ar...
function css10 (line 234) | def css10(root_path, meta_file, **kwargs): # pylint: disable=unused-arg...
function nancy (line 248) | def nancy(root_path, meta_file, **kwargs): # pylint: disable=unused-arg...
function common_voice (line 262) | def common_voice(root_path, meta_file, ignored_speakers=None):
function libri_tts (line 284) | def libri_tts(root_path, meta_files=None, ignored_speakers=None):
function custom_turkish (line 320) | def custom_turkish(root_path, meta_file, **kwargs): # pylint: disable=u...
function brspeech (line 339) | def brspeech(root_path, meta_file, ignored_speakers=None):
function vctk (line 359) | def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", ...
function vctk_old (line 406) | def vctk_old(root_path, meta_files=None, wavs_path="wav48", ignored_spea...
function synpaflex (line 426) | def synpaflex(root_path, metafiles=None, **kwargs): # pylint: disable=u...
function open_bible (line 445) | def open_bible(root_path, meta_files="train", ignore_digits_sentences=Tr...
function mls (line 467) | def mls(root_path, meta_files=None, ignored_speakers=None):
function voxceleb2 (line 487) | def voxceleb2(root_path, meta_file=None, **kwargs): # pylint: disable=u...
function voxceleb1 (line 494) | def voxceleb1(root_path, meta_file=None, **kwargs): # pylint: disable=u...
function _voxcel_x (line 501) | def _voxcel_x(root_path, meta_file, voxcel_idx):
function emotion (line 536) | def emotion(root_path, meta_file, ignored_speakers=None):
function baker (line 558) | def baker(root_path: str, meta_file: str, **kwargs) -> List[List[str]]: ...
function kokoro (line 578) | def kokoro(root_path, meta_file, **kwargs): # pylint: disable=unused-ar...
function kss (line 592) | def kss(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
FILE: TTS/tts/layers/align_tts/duration_predictor.py
class DurationPredictor (line 7) | class DurationPredictor(nn.Module):
method __init__ (line 8) | def __init__(self, num_chars, hidden_channels, hidden_channels_ffn, nu...
method forward (line 15) | def forward(self, text, text_lengths):
FILE: TTS/tts/layers/align_tts/mdn.py
class MDNBlock (line 4) | class MDNBlock(nn.Module):
method __init__ (line 9) | def __init__(self, in_channels, out_channels):
method forward (line 18) | def forward(self, x):
FILE: TTS/tts/layers/feed_forward/decoder.py
class WaveNetDecoder (line 10) | class WaveNetDecoder(nn.Module):
method __init__ (line 36) | def __init__(self, in_channels, out_channels, hidden_channels, c_in_ch...
method forward (line 54) | def forward(self, x, x_mask=None, g=None):
class RelativePositionTransformerDecoder (line 61) | class RelativePositionTransformerDecoder(nn.Module):
method __init__ (line 83) | def __init__(self, in_channels, out_channels, hidden_channels, params):
method forward (line 88) | def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-a...
class FFTransformerDecoder (line 94) | class FFTransformerDecoder(nn.Module):
method __init__ (line 112) | def __init__(self, in_channels, out_channels, params):
method forward (line 117) | def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-a...
class ResidualConv1dBNDecoder (line 125) | class ResidualConv1dBNDecoder(nn.Module):
method __init__ (line 146) | def __init__(self, in_channels, out_channels, hidden_channels, params):
method forward (line 157) | def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-a...
class Decoder (line 163) | class Decoder(nn.Module):
method __init__ (line 177) | def __init__(
method forward (line 219) | def forward(self, x, x_mask, g=None): # pylint: disable=unused-argument
FILE: TTS/tts/layers/feed_forward/duration_predictor.py
class DurationPredictor (line 6) | class DurationPredictor(nn.Module):
method __init__ (line 20) | def __init__(self, hidden_channels):
method forward (line 32) | def forward(self, x, x_mask):
FILE: TTS/tts/layers/feed_forward/encoder.py
class RelativePositionTransformerEncoder (line 8) | class RelativePositionTransformerEncoder(nn.Module):
method __init__ (line 20) | def __init__(self, in_channels, out_channels, hidden_channels, params):
method forward (line 33) | def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-a...
class ResidualConv1dBNEncoder (line 41) | class ResidualConv1dBNEncoder(nn.Module):
method __init__ (line 53) | def __init__(self, in_channels, out_channels, hidden_channels, params):
method forward (line 67) | def forward(self, x, x_mask=None, g=None): # pylint: disable=unused-a...
class Encoder (line 76) | class Encoder(nn.Module):
method __init__ (line 121) | def __init__(
method forward (line 154) | def forward(self, x, x_mask, g=None): # pylint: disable=unused-argument
FILE: TTS/tts/layers/generic/aligner.py
class AlignmentNetwork (line 7) | class AlignmentNetwork(torch.nn.Module):
method __init__ (line 22) | def __init__(
method forward (line 60) | def forward(
FILE: TTS/tts/layers/generic/gated_conv.py
class GatedConvBlock (line 6) | class GatedConvBlock(nn.Module):
method __init__ (line 14) | def __init__(self, in_out_channels, kernel_size, dropout_p, num_layers):
method forward (line 27) | def forward(self, x, x_mask):
FILE: TTS/tts/layers/generic/normalization.py
class LayerNorm (line 5) | class LayerNorm(nn.Module):
method __init__ (line 6) | def __init__(self, channels, eps=1e-4):
method forward (line 23) | def forward(self, x):
class LayerNorm2 (line 31) | class LayerNorm2(nn.Module):
method __init__ (line 42) | def __init__(self, channels, eps=1e-5):
method forward (line 50) | def forward(self, x):
class TemporalBatchNorm1d (line 56) | class TemporalBatchNorm1d(nn.BatchNorm1d):
method __init__ (line 59) | def __init__(self, channels, affine=True, track_running_stats=True, mo...
method forward (line 62) | def forward(self, x):
class ActNorm (line 66) | class ActNorm(nn.Module):
method __init__ (line 80) | def __init__(self, channels, ddi=False, **kwargs): # pylint: disable=...
method forward (line 88) | def forward(self, x, x_mask=None, reverse=False, **kwargs): # pylint:...
method store_inverse (line 105) | def store_inverse(self):
method set_ddi (line 108) | def set_ddi(self, ddi):
method initialize (line 111) | def initialize(self, x, x_mask):
FILE: TTS/tts/layers/generic/pos_encoding.py
class PositionalEncoding (line 7) | class PositionalEncoding(nn.Module):
method __init__ (line 18) | def __init__(self, channels, dropout_p=0.0, max_len=5000, use_scale=Fa...
method forward (line 38) | def forward(self, x, mask=None, first_idx=None, last_idx=None):
FILE: TTS/tts/layers/generic/res_conv_bn.py
class ZeroTemporalPad (line 4) | class ZeroTemporalPad(nn.Module):
method __init__ (line 7) | def __init__(self, kernel_size, dilation):
method forward (line 14) | def forward(self, x):
class Conv1dBN (line 18) | class Conv1dBN(nn.Module):
method __init__ (line 32) | def __init__(self, in_channels, out_channels, kernel_size, dilation):
method forward (line 41) | def forward(self, x):
class Conv1dBNBlock (line 49) | class Conv1dBNBlock(nn.Module):
method __init__ (line 61) | def __init__(self, in_channels, out_channels, hidden_channels, kernel_...
method forward (line 74) | def forward(self, x):
class ResidualConv1dBNBlock (line 82) | class ResidualConv1dBNBlock(nn.Module):
method __init__ (line 100) | def __init__(
method forward (line 117) | def forward(self, x, x_mask=None):
FILE: TTS/tts/layers/generic/time_depth_sep_conv.py
class TimeDepthSeparableConv (line 5) | class TimeDepthSeparableConv(nn.Module):
method __init__ (line 9) | def __init__(self, in_channels, hid_channels, out_channels, kernel_siz...
method forward (line 46) | def forward(self, x):
class TimeDepthSeparableConvBlock (line 60) | class TimeDepthSeparableConvBlock(nn.Module):
method __init__ (line 61) | def __init__(self, in_channels, hid_channels, out_channels, num_layers...
method forward (line 81) | def forward(self, x, mask):
FILE: TTS/tts/layers/generic/transformer.py
class FFTransformer (line 6) | class FFTransformer(nn.Module):
method __init__ (line 7) | def __init__(self, in_out_channels, num_heads, hidden_channels_ffn=102...
method forward (line 21) | def forward(self, src, src_mask=None, src_key_padding_mask=None):
class FFTransformerBlock (line 38) | class FFTransformerBlock(nn.Module):
method __init__ (line 39) | def __init__(self, in_out_channels, num_heads, hidden_channels_ffn, nu...
method forward (line 53) | def forward(self, x, mask=None, g=None): # pylint: disable=unused-arg...
class FFTDurationPredictor (line 72) | class FFTDurationPredictor:
method __init__ (line 73) | def __init__(
method forward (line 79) | def forward(self, x, mask=None, g=None): # pylint: disable=unused-arg...
FILE: TTS/tts/layers/generic/wavenet.py
function fused_add_tanh_sigmoid_multiply (line 6) | def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
class WN (line 15) | class WN(torch.nn.Module):
method __init__ (line 36) | def __init__(
method forward (line 93) | def forward(self, x, x_mask=None, g=None, **kwargs): # pylint: disabl...
method remove_weight_norm (line 116) | def remove_weight_norm(self):
class WNBlocks (line 125) | class WNBlocks(nn.Module):
method __init__ (line 144) | def __init__(
method forward (line 171) | def forward(self, x, x_mask=None, g=None):
FILE: TTS/tts/layers/glow_tts/decoder.py
function squeeze (line 8) | def squeeze(x, x_mask=None, num_sqz=2):
function unsqueeze (line 31) | def unsqueeze(x, x_mask=None, num_sqz=2):
class Decoder (line 50) | class Decoder(nn.Module):
method __init__ (line 68) | def __init__(
method forward (line 113) | def forward(self, x, x_mask, g=None, reverse=False):
method store_inverse (line 139) | def store_inverse(self):
FILE: TTS/tts/layers/glow_tts/duration_predictor.py
class DurationPredictor (line 7) | class DurationPredictor(nn.Module):
method __init__ (line 21) | def __init__(self, in_channels, hidden_channels, kernel_size, dropout_...
method forward (line 47) | def forward(self, x, x_mask, g=None, lang_emb=None):
FILE: TTS/tts/layers/glow_tts/encoder.py
class Encoder (line 15) | class Encoder(nn.Module):
method __init__ (line 78) | def __init__(
method forward (line 143) | def forward(self, x, x_lengths, g=None):
FILE: TTS/tts/layers/glow_tts/glow.py
class ResidualConv1dLayerNormBlock (line 11) | class ResidualConv1dLayerNormBlock(nn.Module):
method __init__ (line 29) | def __init__(self, in_channels, hidden_channels, out_channels, kernel_...
method forward (line 55) | def forward(self, x, x_mask):
class InvConvNear (line 70) | class InvConvNear(nn.Module):
method __init__ (line 85) | def __init__(self, channels, num_splits=4, no_jacobian=False, **kwargs...
method forward (line 102) | def forward(self, x, x_mask=None, reverse=False, **kwargs): # pylint:...
method store_inverse (line 139) | def store_inverse(self):
class CouplingBlock (line 144) | class CouplingBlock(nn.Module):
method __init__ (line 167) | def __init__(
method forward (line 201) | def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs): #...
method store_inverse (line 232) | def store_inverse(self):
FILE: TTS/tts/layers/glow_tts/transformer.py
class RelativePositionMultiHeadAttention (line 10) | class RelativePositionMultiHeadAttention(nn.Module):
method __init__ (line 55) | def __init__(
method forward (line 109) | def forward(self, x, c, attn_mask=None):
method attention (line 123) | def attention(self, query, key, value, mask=None):
method _matmul_with_relative_values (line 166) | def _matmul_with_relative_values(p_attn, re):
method _matmul_with_relative_keys (line 181) | def _matmul_with_relative_keys(query, re):
method _get_relative_embeddings (line 196) | def _get_relative_embeddings(self, relative_embeddings, length):
method _relative_position_to_absolute_position (line 210) | def _relative_position_to_absolute_position(x):
method _absolute_position_to_relative_position (line 228) | def _absolute_position_to_relative_position(x):
method _attn_proximity_bias (line 244) | def _attn_proximity_bias(length):
class FeedForwardNetwork (line 262) | class FeedForwardNetwork(nn.Module):
method __init__ (line 273) | def __init__(self, in_channels, out_channels, hidden_channels, kernel_...
method forward (line 290) | def forward(self, x, x_mask):
method _causal_padding (line 297) | def _causal_padding(self, x):
method _same_padding (line 306) | def _same_padding(self, x):
method _pad_shape (line 316) | def _pad_shape(padding):
class RelativePositionTransformer (line 322) | class RelativePositionTransformer(nn.Module):
method __init__ (line 344) | def __init__(
method forward (line 411) | def forward(self, x, x_mask):
FILE: TTS/tts/layers/losses.py
class L1LossMasked (line 16) | class L1LossMasked(nn.Module):
method __init__ (line 17) | def __init__(self, seq_len_norm):
method forward (line 21) | def forward(self, x, target, length):
class MSELossMasked (line 55) | class MSELossMasked(nn.Module):
method __init__ (line 56) | def __init__(self, seq_len_norm):
method forward (line 60) | def forward(self, x, target, length):
function sample_wise_min_max (line 94) | def sample_wise_min_max(x: torch.Tensor, mask: torch.Tensor) -> torch.Te...
class SSIMLoss (line 105) | class SSIMLoss(torch.nn.Module):
method __init__ (line 110) | def __init__(self):
method forward (line 114) | def forward(self, y_hat, y, length):
class AttentionEntropyLoss (line 145) | class AttentionEntropyLoss(nn.Module):
method forward (line 147) | def forward(self, align):
class BCELossMasked (line 157) | class BCELossMasked(nn.Module):
method __init__ (line 166) | def __init__(self, pos_weight: float = None):
method forward (line 170) | def forward(self, x, target, length):
class DifferentialSpectralLoss (line 203) | class DifferentialSpectralLoss(nn.Module):
method __init__ (line 207) | def __init__(self, loss_func):
method forward (line 211) | def forward(self, x, target, length=None):
class GuidedAttentionLoss (line 227) | class GuidedAttentionLoss(torch.nn.Module):
method __init__ (line 228) | def __init__(self, sigma=0.4):
method _make_ga_masks (line 232) | def _make_ga_masks(self, ilens, olens):
method forward (line 241) | def forward(self, att_ws, ilens, olens):
method _make_ga_mask (line 249) | def _make_ga_mask(ilen, olen, sigma):
method _make_masks (line 255) | def _make_masks(ilens, olens):
class Huber (line 261) | class Huber(nn.Module):
method forward (line 263) | def forward(self, x, y, length=None):
class ForwardSumLoss (line 274) | class ForwardSumLoss(nn.Module):
method __init__ (line 275) | def __init__(self, blank_logprob=-1):
method forward (line 281) | def forward(self, attn_logprob, in_lens, out_lens):
class TacotronLoss (line 309) | class TacotronLoss(torch.nn.Module):
method __init__ (line 312) | def __init__(self, c, ga_sigma=0.4):
method forward (line 349) | def forward(
class GlowTTSLoss (line 503) | class GlowTTSLoss(torch.nn.Module):
method __init__ (line 504) | def __init__(self):
method forward (line 508) | def forward(self, z, means, scales, log_det, y_lengths, o_dur_log, o_a...
function mse_loss_custom (line 528) | def mse_loss_custom(x, y):
class MDNLoss (line 535) | class MDNLoss(nn.Module):
method forward (line 538) | def forward(self, logp, text_lengths, mel_lengths): # pylint: disable...
class AlignTTSLoss (line 559) | class AlignTTSLoss(nn.Module):
method __init__ (line 572) | def __init__(self, c):
method forward (line 584) | def forward(
class VitsGeneratorLoss (line 614) | class VitsGeneratorLoss(nn.Module):
method __init__ (line 615) | def __init__(self, c: Coqpit):
method feature_loss (line 636) | def feature_loss(feats_real, feats_generated):
method generator_loss (line 646) | def generator_loss(scores_fake):
method kl_loss (line 658) | def kl_loss(z_p, logs_q, m_p, logs_p, z_mask):
method cosine_similarity_loss (line 676) | def cosine_similarity_loss(gt_spk_emb, syn_spk_emb):
method forward (line 679) | def forward(
class VitsDiscriminatorLoss (line 739) | class VitsDiscriminatorLoss(nn.Module):
method __init__ (line 740) | def __init__(self, c: Coqpit):
method discriminator_loss (line 745) | def discriminator_loss(scores_real, scores_fake):
method forward (line 759) | def forward(self, scores_disc_real, scores_disc_fake):
class ForwardTTSLoss (line 774) | class ForwardTTSLoss(nn.Module):
method __init__ (line 777) | def __init__(self, c):
method _binary_alignment_loss (line 816) | def _binary_alignment_loss(alignment_hard, alignment_soft):
method forward (line 823) | def forward(
FILE: TTS/tts/layers/overflow/common_layers.py
class Encoder (line 12) | class Encoder(nn.Module):
method __init__ (line 24) | def __init__(self, num_chars, state_per_phone, in_out_channels=512, n_...
method forward (line 44) | def forward(self, x: torch.FloatTensor, x_len: torch.LongTensor) -> Tu...
method inference (line 70) | def inference(self, x, x_len):
class ParameterModel (line 95) | class ParameterModel(nn.Module):
method __init__ (line 108) | def __init__(
method flat_start_output_layer (line 127) | def flat_start_output_layer(self, mean, std, transition_p):
method forward (line 133) | def forward(self, x):
class Outputnet (line 140) | class Outputnet(nn.Module):
method __init__ (line 147) | def __init__(
method forward (line 173) | def forward(self, ar_mels, inputs):
method _floor_std (line 204) | def _floor_std(self, std):
class OverflowUtils (line 222) | class OverflowUtils:
method get_data_parameters_for_flat_start (line 224) | def get_data_parameters_for_flat_start(
method update_flat_start_transition (line 264) | def update_flat_start_transition(model, transition_p):
method log_clamped (line 268) | def log_clamped(x, eps=1e-04):
method inverse_sigmod (line 283) | def inverse_sigmod(x):
method inverse_softplus (line 292) | def inverse_softplus(x):
method logsumexp (line 301) | def logsumexp(x, dim):
method double_pad (line 316) | def double_pad(list_of_different_shape_tensors):
FILE: TTS/tts/layers/overflow/decoder.py
class Decoder (line 8) | class Decoder(nn.Module):
method __init__ (line 25) | def __init__(
method forward (line 56) | def forward(self, x, x_len, g=None, reverse=False):
method preprocess (line 73) | def preprocess(self, y, y_lengths, y_max_length):
method store_inverse (line 80) | def store_inverse(self):
FILE: TTS/tts/layers/overflow/neural_hmm.py
class NeuralHMM (line 14) | class NeuralHMM(nn.Module):
method __init__ (line 48) | def __init__(
method forward (line 94) | def forward(self, inputs, inputs_len, mels, mel_lens):
method _mask_lengths (line 159) | def _mask_lengths(mel_lens, log_c, log_alpha_scaled):
method _process_ar_timestep (line 177) | def _process_ar_timestep(
method _add_go_token (line 209) | def _add_go_token(self, mel_inputs):
method _initialize_forward_algorithm_variables (line 222) | def _initialize_forward_algorithm_variables(mel_inputs, N):
method _init_lstm_states (line 242) | def _init_lstm_states(batch_size, hidden_state_dim, device_tensor):
method get_absorption_state_scaling_factor (line 262) | def get_absorption_state_scaling_factor(self, mels_len, log_alpha_scal...
method get_mask_for_last_item (line 319) | def get_mask_for_last_item(lengths, device, out_tensor=None):
method inference (line 339) | def inference(
method sample (line 386) | def sample(self, inputs, input_lens, sampling_temp, max_sampling_time,...
method _initialize_log_state_priors (line 467) | def _initialize_log_state_priors(text_embeddings):
class TransitionModel (line 483) | class TransitionModel(nn.Module):
method forward (line 487) | def forward(self, log_alpha_scaled, transition_vector, inputs_len): #...
class EmissionModel (line 519) | class EmissionModel(nn.Module):
method __init__ (line 523) | def __init__(self) -> None:
method sample (line 527) | def sample(self, means, stds, sampling_temp):
method forward (line 530) | def forward(self, x_t, means, stds, state_lengths):
FILE: TTS/tts/layers/overflow/plotting_utils.py
function validate_numpy_array (line 8) | def validate_numpy_array(value: Any):
function get_spec_from_most_probable_state (line 33) | def get_spec_from_most_probable_state(log_alpha_scaled, means, decoder=N...
function plot_transition_probabilities_to_numpy (line 59) | def plot_transition_probabilities_to_numpy(states, transition_probabilit...
FILE: TTS/tts/layers/tacotron/attentions.py
class LocationLayer (line 9) | class LocationLayer(nn.Module):
method __init__ (line 18) | def __init__(self, attention_dim, attention_n_filters=32, attention_ke...
method forward (line 30) | def forward(self, attention_cat):
class GravesAttention (line 40) | class GravesAttention(nn.Module):
method __init__ (line 52) | def __init__(self, query_dim, K):
method init_layers (line 66) | def init_layers(self):
method init_states (line 70) | def init_states(self, inputs):
method preprocess_inputs (line 78) | def preprocess_inputs(self, inputs):
method forward (line 81) | def forward(self, query, inputs, processed_inputs, mask):
class OriginalAttention (line 127) | class OriginalAttention(nn.Module):
method __init__ (line 163) | def __init__(
method init_win_idx (line 198) | def init_win_idx(self):
method init_forward_attn (line 203) | def init_forward_attn(self, inputs):
method init_location_attention (line 209) | def init_location_attention(self, inputs):
method init_states (line 214) | def init_states(self, inputs):
method preprocess_inputs (line 225) | def preprocess_inputs(self, inputs):
method update_location_attention (line 228) | def update_location_attention(self, alignments):
method get_location_attention (line 231) | def get_location_attention(self, query, processed_inputs):
method get_attention (line 239) | def get_attention(self, query, processed_inputs):
method apply_windowing (line 245) | def apply_windowing(self, attention, inputs):
method apply_forward_attention (line 260) | def apply_forward_attention(self, alignment):
method forward (line 277) | def forward(self, query, inputs, processed_inputs, mask):
class MonotonicDynamicConvolutionAttention (line 323) | class MonotonicDynamicConvolutionAttention(nn.Module):
method __init__ (line 355) | def __init__(
method forward (line 392) | def forward(self, query, inputs, processed_inputs, mask):
method preprocess_inputs (line 431) | def preprocess_inputs(self, inputs): # pylint: disable=no-self-use
method init_states (line 434) | def init_states(self, inputs):
function init_attn (line 441) | def init_attn(
FILE: TTS/tts/layers/tacotron/capacitron_layers.py
class CapacitronVAE (line 7) | class CapacitronVAE(nn.Module):
method __init__ (line 12) | def __init__(
method forward (line 42) | def forward(self, reference_mel_info=None, text_info=None, speaker_emb...
class ReferenceEncoder (line 78) | class ReferenceEncoder(nn.Module):
method __init__ (line 85) | def __init__(self, num_mel, out_dim):
method forward (line 105) | def forward(self, inputs, input_lengths):
method calculate_post_conv_height (line 159) | def calculate_post_conv_height(height, kernel_size, stride, pad, n_con...
class TextSummary (line 166) | class TextSummary(nn.Module):
method __init__ (line 167) | def __init__(self, embedding_dim, encoder_output_dim):
method forward (line 176) | def forward(self, inputs, input_lengths):
class PostEncoderMLP (line 187) | class PostEncoderMLP(nn.Module):
method __init__ (line 188) | def __init__(self, input_size, hidden_size):
method forward (line 199) | def forward(self, _input):
FILE: TTS/tts/layers/tacotron/common_layers.py
class Linear (line 6) | class Linear(nn.Module):
method __init__ (line 16) | def __init__(self, in_features, out_features, bias=True, init_gain="li...
method _init_w (line 21) | def _init_w(self, init_gain):
method forward (line 24) | def forward(self, x):
class LinearBN (line 28) | class LinearBN(nn.Module):
method __init__ (line 40) | def __init__(self, in_features, out_features, bias=True, init_gain="li...
method _init_w (line 46) | def _init_w(self, init_gain):
method forward (line 49) | def forward(self, x):
class Prenet (line 63) | class Prenet(nn.Module):
method __init__ (line 90) | def __init__(
method forward (line 113) | def forward(self, x):
FILE: TTS/tts/layers/tacotron/gst_layers.py
class GST (line 6) | class GST(nn.Module):
method __init__ (line 11) | def __init__(self, num_mel, num_heads, num_style_tokens, gst_embedding...
method forward (line 16) | def forward(self, inputs, speaker_embedding=None):
class ReferenceEncoder (line 26) | class ReferenceEncoder(nn.Module):
method __init__ (line 33) | def __init__(self, num_mel, embedding_dim):
method forward (line 52) | def forward(self, inputs):
method calculate_post_conv_height (line 75) | def calculate_post_conv_height(height, kernel_size, stride, pad, n_con...
class StyleTokenLayer (line 82) | class StyleTokenLayer(nn.Module):
method __init__ (line 85) | def __init__(self, num_heads, num_style_tokens, gst_embedding_dim, d_v...
method forward (line 100) | def forward(self, inputs):
class MultiHeadAttention (line 111) | class MultiHeadAttention(nn.Module):
method __init__ (line 120) | def __init__(self, query_dim, key_dim, num_units, num_heads):
method forward (line 130) | def forward(self, query, key):
FILE: TTS/tts/layers/tacotron/tacotron.py
class BatchNormConv1d (line 11) | class BatchNormConv1d(nn.Module):
method __init__ (line 29) | def __init__(self, in_channels, out_channels, kernel_size, stride, pad...
method init_layers (line 41) | def init_layers(self):
method forward (line 52) | def forward(self, x):
class Highway (line 61) | class Highway(nn.Module):
method __init__ (line 74) | def __init__(self, in_features, out_feature):
method init_layers (line 84) | def init_layers(self):
method forward (line 88) | def forward(self, inputs):
class CBHG (line 94) | class CBHG(nn.Module):
method __init__ (line 112) | def __init__(
method forward (line 162) | def forward(self, inputs):
class EncoderCBHG (line 191) | class EncoderCBHG(nn.Module):
method __init__ (line 194) | def __init__(self):
method forward (line 206) | def forward(self, x):
class Encoder (line 210) | class Encoder(nn.Module):
method __init__ (line 220) | def __init__(self, in_features):
method forward (line 225) | def forward(self, inputs):
class PostCBHG (line 232) | class PostCBHG(nn.Module):
method __init__ (line 233) | def __init__(self, mel_dim):
method forward (line 245) | def forward(self, x):
class Decoder (line 249) | class Decoder(nn.Module):
method __init__ (line 277) | def __init__(
method set_r (line 336) | def set_r(self, new_r):
method _reshape_memory (line 339) | def _reshape_memory(self, memory):
method _init_states (line 350) | def _init_states(self, inputs):
method _parse_outputs (line 369) | def _parse_outputs(self, outputs, attentions, stop_tokens):
method decode (line 378) | def decode(self, inputs, mask=None):
method _update_memory_input (line 408) | def _update_memory_input(self, new_memory):
method forward (line 424) | def forward(self, inputs, memory, mask):
method inference (line 457) | def inference(self, inputs):
class StopNet (line 488) | class StopNet(nn.Module):
method __init__ (line 494) | def __init__(self, in_features):
method forward (line 500) | def forward(self, inputs):
FILE: TTS/tts/layers/tacotron/tacotron2.py
class ConvBNBlock (line 11) | class ConvBNBlock(nn.Module):
method __init__ (line 25) | def __init__(self, in_channels, out_channels, kernel_size, activation=...
method forward (line 39) | def forward(self, x):
class Postnet (line 47) | class Postnet(nn.Module):
method __init__ (line 58) | def __init__(self, in_out_channels, num_convs=5):
method forward (line 66) | def forward(self, x):
class Encoder (line 73) | class Encoder(nn.Module):
method __init__ (line 84) | def __init__(self, in_out_channels=512):
method forward (line 94) | def forward(self, x, input_lengths):
method inference (line 105) | def inference(self, x):
class Decoder (line 116) | class Decoder(nn.Module):
method __init__ (line 142) | def __init__(
method set_r (line 211) | def set_r(self, new_r):
method get_go_frame (line 214) | def get_go_frame(self, inputs):
method _init_states (line 219) | def _init_states(self, inputs, mask, keep_states=False):
method _reshape_memory (line 232) | def _reshape_memory(self, memory):
method _parse_outputs (line 243) | def _parse_outputs(self, outputs, stop_tokens, alignments):
method _update_memory (line 251) | def _update_memory(self, memory):
method decode (line 256) | def decode(self, memory):
method forward (line 295) | def forward(self, inputs, memories, mask):
method inference (line 329) | def inference(self, inputs):
method inference_truncated (line 369) | def inference_truncated(self, inputs):
method inference_step (line 402) | def inference_step(self, inputs, t, memory=None):
FILE: TTS/tts/layers/vits/discriminator.py
class DiscriminatorS (line 8) | class DiscriminatorS(torch.nn.Module):
method __init__ (line 15) | def __init__(self, use_spectral_norm=False):
method forward (line 30) | def forward(self, x):
class VitsDiscriminator (line 50) | class VitsDiscriminator(nn.Module):
method __init__ (line 61) | def __init__(self, periods=(2, 3, 5, 7, 11), use_spectral_norm=False):
method forward (line 67) | def forward(self, x, x_hat=None):
FILE: TTS/tts/layers/vits/networks.py
function convert_pad_shape (line 13) | def convert_pad_shape(pad_shape):
function init_weights (line 19) | def init_weights(m, mean=0.0, std=0.01):
function get_padding (line 25) | def get_padding(kernel_size, dilation=1):
class TextEncoder (line 29) | class TextEncoder(nn.Module):
method __init__ (line 30) | def __init__(
method forward (line 80) | def forward(self, x, x_lengths, lang_emb=None):
class ResidualCouplingBlock (line 103) | class ResidualCouplingBlock(nn.Module):
method __init__ (line 104) | def __init__(
method forward (line 138) | def forward(self, x, x_mask, g=None, reverse=False):
class ResidualCouplingBlocks (line 169) | class ResidualCouplingBlocks(nn.Module):
method __init__ (line 170) | def __init__(
method forward (line 214) | def forward(self, x, x_mask, g=None, reverse=False):
class PosteriorEncoder (line 235) | class PosteriorEncoder(nn.Module):
method __init__ (line 236) | def __init__(
method forward (line 275) | def forward(self, x, x_lengths, g=None):
FILE: TTS/tts/layers/vits/stochastic_duration_predictor.py
class DilatedDepthSeparableConv (line 11) | class DilatedDepthSeparableConv(nn.Module):
method __init__ (line 12) | def __init__(self, channels, kernel_size, num_layers, dropout_p=0.0) -...
method forward (line 46) | def forward(self, x, x_mask, g=None):
class ElementwiseAffine (line 66) | class ElementwiseAffine(nn.Module):
method __init__ (line 73) | def __init__(self, channels):
method forward (line 78) | def forward(self, x, x_mask, reverse=False, **kwargs): # pylint: disa...
class ConvFlow (line 87) | class ConvFlow(nn.Module):
method __init__ (line 99) | def __init__(
method forward (line 120) | def forward(self, x, x_mask, g=None, reverse=False):
class StochasticDurationPredictor (line 150) | class StochasticDurationPredictor(nn.Module):
method __init__ (line 180) | def __init__(
method forward (line 222) | def forward(self, x, x_mask, dr=None, g=None, lang_emb=None, reverse=F...
FILE: TTS/tts/layers/vits/transforms.py
function piecewise_rational_quadratic_transform (line 12) | def piecewise_rational_quadratic_transform(
function searchsorted (line 45) | def searchsorted(bin_locations, inputs, eps=1e-6):
function unconstrained_rational_quadratic_spline (line 50) | def unconstrained_rational_quadratic_spline(
function rational_quadratic_spline (line 97) | def rational_quadratic_spline(
FILE: TTS/tts/models/__init__.py
function setup_model (line 6) | def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]]...
FILE: TTS/tts/models/align_tts.py
class AlignTTSArgs (line 22) | class AlignTTSArgs(Coqpit):
class AlignTTS (line 70) | class AlignTTS(BaseTTS):
method __init__ (line 105) | def __init__(
method compute_log_probs (line 150) | def compute_log_probs(mu, log_sigma, y):
method compute_align_path (line 162) | def compute_align_path(self, mu, log_sigma, y, x_mask, y_mask):
method generate_attn (line 172) | def generate_attn(dr, x_mask, y_mask=None):
method expand_encoder_outputs (line 182) | def expand_encoder_outputs(self, en, dr, x_mask, y_mask):
method format_durations (line 200) | def format_durations(self, o_dr_log, x_mask):
method _concat_speaker_embedding (line 207) | def _concat_speaker_embedding(o_en, g):
method _sum_speaker_embedding (line 212) | def _sum_speaker_embedding(self, x, g):
method _forward_encoder (line 219) | def _forward_encoder(self, x, x_lengths, g=None):
method _forward_decoder (line 244) | def _forward_decoder(self, o_en, o_en_dp, dr, x_mask, y_lengths, g):
method _forward_mdn (line 258) | def _forward_mdn(self, o_en, y, y_lengths, x_mask):
method forward (line 265) | def forward(
method inference (line 321) | def inference(self, x, aux_input={"d_vectors": None}): # pylint: disa...
method train_step (line 342) | def train_step(self, batch: dict, criterion: nn.Module):
method _create_logs (line 365) | def _create_logs(self, batch, outputs, ap): # pylint: disable=no-self...
method train_log (line 384) | def train_log(
method eval_step (line 391) | def eval_step(self, batch: dict, criterion: nn.Module):
method eval_log (line 394) | def eval_log(self, batch: dict, outputs: dict, logger: "Logger", asset...
method load_checkpoint (line 399) | def load_checkpoint(
method get_criterion (line 408) | def get_criterion(self):
method _set_phase (line 414) | def _set_phase(config, global_step):
method on_epoch_start (line 430) | def on_epoch_start(self, trainer):
method init_from_config (line 435) | def init_from_config(config: "AlignTTSConfig", samples: Union[List[Lis...
FILE: TTS/tts/models/base_tacotron.py
class BaseTacotron (line 21) | class BaseTacotron(BaseTTS):
method __init__ (line 24) | def __init__(
method _format_aux_input (line 62) | def _format_aux_input(aux_input: Dict) -> Dict:
method _init_backward_decoder (line 72) | def _init_backward_decoder(self):
method _init_coarse_decoder (line 76) | def _init_coarse_decoder(self):
method forward (line 87) | def forward(self):
method inference (line 91) | def inference(self):
method load_checkpoint (line 94) | def load_checkpoint(
method get_criterion (line 122) | def get_criterion(self) -> nn.Module:
method init_from_config (line 127) | def init_from_config(config: Coqpit):
method test_run (line 140) | def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
method test_log (line 177) | def test_log(
method compute_masks (line 187) | def compute_masks(self, text_lengths, mel_lengths):
method _backward_pass (line 199) | def _backward_pass(self, mel_specs, encoder_outputs, mask):
method _coarse_decoder_pass (line 207) | def _coarse_decoder_pass(self, mel_specs, encoder_outputs, alignments,...
method compute_gst (line 230) | def compute_gst(self, inputs, style_input, speaker_embedding=None):
method compute_capacitron_VAE_embedding (line 253) | def compute_capacitron_VAE_embedding(self, inputs, reference_mel_info,...
method _add_speaker_embedding (line 278) | def _add_speaker_embedding(outputs, embedded_speakers):
method _concat_speaker_embedding (line 284) | def _concat_speaker_embedding(outputs, embedded_speakers):
method on_epoch_start (line 293) | def on_epoch_start(self, trainer):
FILE: TTS/tts/models/base_tts.py
class BaseTTS (line 24) | class BaseTTS(BaseTrainerModel):
method __init__ (line 32) | def __init__(
method _set_model_args (line 48) | def _set_model_args(self, config: Coqpit):
method init_multispeaker (line 79) | def init_multispeaker(self, config: Coqpit, data: List = None):
method get_aux_input (line 112) | def get_aux_input(self, **kwargs) -> Dict:
method get_aux_input_from_test_sentences (line 116) | def get_aux_input_from_test_sentences(self, sentence_info):
method format_batch (line 163) | def format_batch(self, batch: Dict) -> Dict:
method get_sampler (line 242) | def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1):
method get_data_loader (line 280) | def get_data_loader(
method _get_test_aux_input (line 363) | def _get_test_aux_input(
method test_run (line 380) | def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
method on_init_start (line 420) | def on_init_start(self, trainer):
FILE: TTS/tts/models/forward_tts.py
class ForwardTTSArgs (line 23) | class ForwardTTSArgs(Coqpit):
class ForwardTTS (line 172) | class ForwardTTS(BaseTTS):
method __init__ (line 201) | def __init__(
method init_multispeaker (line 283) | def init_multispeaker(self, config: Coqpit):
method generate_attn (line 310) | def generate_attn(dr, x_mask, y_mask=None):
method expand_encoder_outputs (line 327) | def expand_encoder_outputs(self, en, dr, x_mask, y_mask):
method format_durations (line 352) | def format_durations(self, o_dr_log, x_mask):
method _forward_encoder (line 373) | def _forward_encoder(
method _forward_decoder (line 412) | def _forward_decoder(
method _forward_pitch_predictor (line 448) | def _forward_pitch_predictor(
method _forward_energy_predictor (line 484) | def _forward_energy_predictor(
method _forward_aligner (line 520) | def _forward_aligner(
method _set_speaker_input (line 561) | def _set_speaker_input(self, aux_input: Dict):
method forward (line 574) | def forward(
method inference (line 668) | def inference(self, x, aux_input={"d_vectors": None, "speaker_ids": No...
method train_step (line 710) | def train_step(self, batch: dict, criterion: nn.Module):
method _create_logs (line 762) | def _create_logs(self, batch, outputs, ap):
method train_log (line 809) | def train_log(
method eval_step (line 816) | def eval_step(self, batch: dict, criterion: nn.Module):
method eval_log (line 819) | def eval_log(self, batch: dict, outputs: dict, logger: "Logger", asset...
method load_checkpoint (line 824) | def load_checkpoint(
method get_criterion (line 833) | def get_criterion(self):
method on_train_step_start (line 838) | def on_train_step_start(self, trainer):
method init_from_config (line 843) | def init_from_config(config: "ForwardTTSConfig", samples: Union[List[L...
FILE: TTS/tts/models/glow_tts.py
class GlowTTS (line 22) | class GlowTTS(BaseTTS):
method __init__ (line 59) | def __init__(
method init_multispeaker (line 107) | def init_multispeaker(self, config: Coqpit):
method compute_outputs (line 138) | def compute_outputs(attn, o_mean, o_log_scale, x_mask):
method unlock_act_norm_layers (line 150) | def unlock_act_norm_layers(self):
method lock_act_norm_layers (line 156) | def lock_act_norm_layers(self):
method _set_speaker_input (line 162) | def _set_speaker_input(self, aux_input: Dict):
method _speaker_embedding (line 179) | def _speaker_embedding(self, aux_input: Dict) -> Union[torch.tensor, N...
method forward (line 193) | def forward(
method inference_with_MAS (line 263) | def inference_with_MAS(
method decoder_inference (line 319) | def decoder_inference(
method inference (line 342) | def inference(
method train_step (line 376) | def train_step(self, batch: dict, criterion: nn.Module):
method _create_logs (line 428) | def _create_logs(self, batch, outputs, ap):
method train_log (line 457) | def train_log(
method eval_step (line 465) | def eval_step(self, batch: dict, criterion: nn.Module):
method eval_log (line 468) | def eval_log(self, batch: dict, outputs: dict, logger: "Logger", asset...
method test_run (line 474) | def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
method preprocess (line 510) | def preprocess(self, y, y_lengths, y_max_length, attn=None):
method store_inverse (line 519) | def store_inverse(self):
method load_checkpoint (line 522) | def load_checkpoint(
method get_criterion (line 533) | def get_criterion():
method on_train_step_start (line 538) | def on_train_step_start(self, trainer):
method init_from_config (line 543) | def init_from_config(config: "GlowTTSConfig", samples: Union[List[List...
FILE: TTS/tts/models/neuralhmm_tts.py
class NeuralhmmTTS (line 23) | class NeuralhmmTTS(BaseTTS):
method __init__ (line 65) | def __init__(
method update_mean_std (line 101) | def update_mean_std(self, statistics_dict: Dict):
method preprocess_batch (line 105) | def preprocess_batch(self, text, text_len, mels, mel_len):
method normalize (line 113) | def normalize(self, x):
method inverse_normalize (line 116) | def inverse_normalize(self, x):
method forward (line 119) | def forward(self, text, text_len, mels, mel_len):
method _training_stats (line 147) | def _training_stats(batch):
method train_step (line 155) | def train_step(self, batch: dict, criterion: nn.Module):
method eval_step (line 173) | def eval_step(self, batch: Dict, criterion: nn.Module):
method _format_aux_input (line 176) | def _format_aux_input(self, aux_input: Dict, default_input_dict):
method inference (line 195) | def inference(
method get_criterion (line 234) | def get_criterion():
method init_from_config (line 238) | def init_from_config(config: "NeuralhmmTTSConfig", samples: Union[List...
method load_checkpoint (line 254) | def load_checkpoint(
method on_init_start (line 263) | def on_init_start(self, trainer):
method _create_logs (line 304) | def _create_logs(self, batch, outputs, ap): # pylint: disable=no-self...
method train_log (line 340) | def train_log(
method eval_log (line 348) | def eval_log(
method test_log (line 363) | def test_log(
class NLLLoss (line 370) | class NLLLoss(nn.Module):
method forward (line 373) | def forward(self, log_prob: torch.Tensor) -> dict: # pylint: disable=...
FILE: TTS/tts/models/overflow.py
class Overflow (line 24) | class Overflow(BaseTTS):
method __init__ (line 62) | def __init__(
method update_mean_std (line 114) | def update_mean_std(self, statistics_dict: Dict):
method preprocess_batch (line 118) | def preprocess_batch(self, text, text_len, mels, mel_len):
method normalize (line 126) | def normalize(self, x):
method inverse_normalize (line 129) | def inverse_normalize(self, x):
method forward (line 132) | def forward(self, text, text_len, mels, mel_len):
method _training_stats (line 160) | def _training_stats(batch):
method train_step (line 168) | def train_step(self, batch: dict, criterion: nn.Module):
method eval_step (line 186) | def eval_step(self, batch: Dict, criterion: nn.Module):
method _format_aux_input (line 189) | def _format_aux_input(self, aux_input: Dict, default_input_dict):
method inference (line 208) | def inference(
method get_criterion (line 249) | def get_criterion():
method init_from_config (line 253) | def init_from_config(config: "OverFlowConfig", samples: Union[List[Lis...
method load_checkpoint (line 269) | def load_checkpoint(
method on_init_start (line 279) | def on_init_start(self, trainer):
method _create_logs (line 320) | def _create_logs(self, batch, outputs, ap): # pylint: disable=no-self...
method train_log (line 356) | def train_log(
method eval_log (line 364) | def eval_log(
method test_log (line 379) | def test_log(
class NLLLoss (line 386) | class NLLLoss(nn.Module):
method forward (line 389) | def forward(self, log_prob: torch.Tensor) -> dict: # pylint: disable=...
FILE: TTS/tts/models/tacotron.py
class Tacotron (line 21) | class Tacotron(BaseTacotron):
method __init__ (line 32) | def __init__(
method forward (line 136) | def forward( # pylint: disable=dangerous-default-value
method inference (line 219) | def inference(self, text_input, aux_input=None):
method before_backward_pass (line 273) | def before_backward_pass(self, loss_dict, optimizer) -> None:
method train_step (line 280) | def train_step(self, batch: Dict, criterion: torch.nn.Module) -> Tuple...
method get_optimizer (line 332) | def get_optimizer(self) -> List:
method get_scheduler (line 337) | def get_scheduler(self, optimizer: object):
method before_gradient_clipping (line 341) | def before_gradient_clipping(self):
method _create_logs (line 351) | def _create_logs(self, batch, outputs, ap):
method train_log (line 380) | def train_log(
method eval_step (line 387) | def eval_step(self, batch: dict, criterion: nn.Module):
method eval_log (line 390) | def eval_log(self, batch: dict, outputs: dict, logger: "Logger", asset...
method init_from_config (line 396) | def init_from_config(config: "TacotronConfig", samples: Union[List[Lis...
FILE: TTS/tts/models/tacotron2.py
class Tacotron2 (line 21) | class Tacotron2(BaseTacotron):
method __init__ (line 46) | def __init__(
method shape_outputs (line 148) | def shape_outputs(mel_outputs, mel_outputs_postnet, alignments):
method forward (line 154) | def forward( # pylint: disable=dangerous-default-value
method inference (line 239) | def inference(self, text, aux_input=None):
method before_backward_pass (line 302) | def before_backward_pass(self, loss_dict, optimizer) -> None:
method train_step (line 309) | def train_step(self, batch: Dict, criterion: torch.nn.Module):
method get_optimizer (line 360) | def get_optimizer(self) -> List:
method get_scheduler (line 365) | def get_scheduler(self, optimizer: object):
method before_gradient_clipping (line 369) | def before_gradient_clipping(self):
method _create_logs (line 379) | def _create_logs(self, batch, outputs, ap):
method train_log (line 403) | def train_log(
method eval_step (line 411) | def eval_step(self, batch: dict, criterion: nn.Module):
method eval_log (line 414) | def eval_log(self, batch: dict, outputs: dict, logger: "Logger", asset...
method init_from_config (line 420) | def init_from_config(config: "Tacotron2Config", samples: Union[List[Li...
FILE: TTS/tts/models/vits.py
function weights_reset (line 50) | def weights_reset(m: nn.Module):
function get_module_weights_sum (line 57) | def get_module_weights_sum(mdl: nn.Module):
function load_audio (line 66) | def load_audio(file_path):
function _amp_to_db (line 77) | def _amp_to_db(x, C=1, clip_val=1e-5):
function _db_to_amp (line 81) | def _db_to_amp(x, C=1):
function amp_to_db (line 85) | def amp_to_db(magnitudes):
function db_to_amp (line 90) | def db_to_amp(magnitudes):
function wav_to_spec (line 95) | def wav_to_spec(y, n_fft, hop_length, win_length, center=False):
function spec_to_mel (line 140) | def spec_to_mel(spec, n_fft, num_mels, sample_rate, fmin, fmax):
function wav_to_mel (line 159) | def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, ...
class VitsAudioConfig (line 216) | class VitsAudioConfig(Coqpit):
function get_attribute_balancer_weights (line 231) | def get_attribute_balancer_weights(items: list, attr_name: str, multi_di...
class VitsDataset (line 255) | class VitsDataset(TTSDataset):
method __init__ (line 256) | def __init__(self, model_args, *args, **kwargs):
method __getitem__ (line 261) | def __getitem__(self, idx):
method lengths (line 293) | def lengths(self):
method collate_fn (line 301) | def collate_fn(self, batch):
class VitsArgs (line 365) | class VitsArgs(Coqpit):
class Vits (line 602) | class Vits(BaseTTS):
method __init__ (line 630) | def __init__(
method device (line 726) | def device(self):
method init_multispeaker (line 729) | def init_multispeaker(self, config: Coqpit):
method _init_speaker_embedding (line 773) | def _init_speaker_embedding(self):
method _init_d_vector (line 780) | def _init_d_vector(self):
method init_multilingual (line 786) | def init_multilingual(self, config: Coqpit):
method init_upsampling (line 804) | def init_upsampling(self):
method on_epoch_start (line 814) | def on_epoch_start(self, trainer): # pylint: disable=W0613
method on_init_end (line 821) | def on_init_end(self, trainer): # pylint: disable=W0613
method get_aux_input (line 843) | def get_aux_input(self, aux_input: Dict):
method _freeze_layers (line 847) | def _freeze_layers(self):
method _set_cond_input (line 873) | def _set_cond_input(aux_input: Dict):
method _set_speaker_input (line 895) | def _set_speaker_input(self, aux_input: Dict):
method forward_mas (line 908) | def forward_mas(self, outputs, z_p, m_p, logs_p, x, x_mask, y_mask, g,...
method upsampling_z (line 943) | def upsampling_z(self, z, slice_ids=None, y_lengths=None, y_mask=None):
method forward (line 960) | def forward( # pylint: disable=dangerous-default-value
method _set_x_lengths (line 1082) | def _set_x_lengths(x, aux_input):
method inference (line 1088) | def inference(
method inference_voice_conversion (line 1175) | def inference_voice_conversion(
method voice_conversion (line 1201) | def voice_conversion(self, y, y_lengths, speaker_cond_src, speaker_con...
method train_step (line 1229) | def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx:...
method _log (line 1332) | def _log(self, ap, batch, outputs, name_prefix="train"): # pylint: di...
method train_log (line 1349) | def train_log(
method eval_step (line 1370) | def eval_step(self, batch: dict, criterion: nn.Module, optimizer_idx: ...
method eval_log (line 1373) | def eval_log(self, batch: dict, outputs: dict, logger: "Logger", asset...
method get_aux_input_from_test_sentences (line 1378) | def get_aux_input_from_test_sentences(self, sentence_info):
method test_run (line 1427) | def test_run(self, assets) -> Tuple[Dict, Dict]:
method test_log (line 1457) | def test_log(
method format_batch (line 1463) | def format_batch(self, batch: Dict) -> Dict:
method format_batch_on_device (line 1494) | def format_batch_on_device(self, batch):
method get_sampler (line 1547) | def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1,...
method get_data_loader (line 1583) | def get_data_loader(
method get_optimizer (line 1651) | def get_optimizer(self) -> List:
method get_lr (line 1666) | def get_lr(self) -> List:
method get_scheduler (line 1674) | def get_scheduler(self, optimizer) -> List:
method get_criterion (line 1687) | def get_criterion(self):
method load_checkpoint (line 1697) | def load_checkpoint(
method init_from_config (line 1727) | def init_from_config(config: "VitsConfig", samples: Union[List[List], ...
class VitsCharacters (line 1767) | class VitsCharacters(BaseCharacters):
method __init__ (line 1770) | def __init__(
method _create_vocab (line 1781) | def _create_vocab(self):
method init_from_config (line 1788) | def init_from_config(config: Coqpit):
method to_config (line 1802) | def to_config(self) -> "CharactersConfig":
FILE: TTS/tts/utils/data.py
function _pad_data (line 7) | def _pad_data(x, length):
function prepare_data (line 13) | def prepare_data(inputs):
function _pad_tensor (line 18) | def _pad_tensor(x, length):
function prepare_tensor (line 25) | def prepare_tensor(inputs, out_steps):
function _pad_stop_target (line 32) | def _pad_stop_target(x: np.ndarray, length: int, pad_val=1) -> np.ndarray:
function prepare_stop_target (line 47) | def prepare_stop_target(inputs, out_steps):
function pad_per_step (line 55) | def pad_per_step(inputs, pad_len):
function get_length_balancer_weights (line 59) | def get_length_balancer_weights(items: list, num_buckets=10):
FILE: TTS/tts/utils/helpers.py
class StandardScaler (line 13) | class StandardScaler:
method __init__ (line 16) | def __init__(self, mean: np.ndarray = None, scale: np.ndarray = None) ...
method set_stats (line 20) | def set_stats(self, mean, scale):
method reset_stats (line 24) | def reset_stats(self):
method transform (line 28) | def transform(self, X):
method inverse_transform (line 34) | def inverse_transform(self, X):
function sequence_mask (line 42) | def sequence_mask(sequence_length, max_len=None):
function segment (line 60) | def segment(x: torch.tensor, segment_indices: torch.tensor, segment_size...
function rand_segments (line 86) | def rand_segments(
function average_over_durations (line 123) | def average_over_durations(values, durs):
function convert_pad_shape (line 148) | def convert_pad_shape(pad_shape):
function generate_path (line 154) | def generate_path(duration, mask):
function maximum_path (line 174) | def maximum_path(value, mask):
function maximum_path_cython (line 180) | def maximum_path_cython(value, mask):
function maximum_path_numpy (line 199) | def maximum_path_numpy(value, mask, max_neg_val=None):
FILE: TTS/tts/utils/languages.py
class LanguageManager (line 13) | class LanguageManager(BaseIDManager):
method __init__ (line 28) | def __init__(
method num_languages (line 39) | def num_languages(self) -> int:
method language_names (line 43) | def language_names(self) -> List:
method parse_language_ids_from_config (line 47) | def parse_language_ids_from_config(c: Coqpit) -> Dict:
method set_language_ids_from_config (line 64) | def set_language_ids_from_config(self, c: Coqpit) -> None:
method parse_ids_from_data (line 73) | def parse_ids_from_data(items: List, parse_key: str) -> Any:
method set_ids_from_data (line 76) | def set_ids_from_data(self, items: List, parse_key: str) -> Any:
method save_ids_to_file (line 79) | def save_ids_to_file(self, file_path: str) -> None:
method init_from_config (line 88) | def init_from_config(config: Coqpit) -> "LanguageManager":
function _set_file_path (line 102) | def _set_file_path(path):
function get_language_balancer_weights (line 115) | def get_language_balancer_weights(items: list):
FILE: TTS/tts/utils/managers.py
function load_file (line 14) | def load_file(path: str):
function save_file (line 25) | def save_file(obj: Any, path: str):
class BaseIDManager (line 36) | class BaseIDManager:
method __init__ (line 41) | def __init__(self, id_file_path: str = ""):
method _load_json (line 48) | def _load_json(json_file_path: str) -> Dict:
method _save_json (line 53) | def _save_json(json_file_path: str, data: dict) -> None:
method set_ids_from_data (line 57) | def set_ids_from_data(self, items: List, parse_key: str) -> None:
method load_ids_from_file (line 65) | def load_ids_from_file(self, file_path: str) -> None:
method save_ids_to_file (line 73) | def save_ids_to_file(self, file_path: str) -> None:
method get_random_id (line 81) | def get_random_id(self) -> Any:
method parse_ids_from_data (line 95) | def parse_ids_from_data(items: List, parse_key: str) -> Tuple[Dict]:
class EmbeddingManager (line 109) | class EmbeddingManager(BaseIDManager):
method __init__ (line 130) | def __init__(
method num_embeddings (line 157) | def num_embeddings(self):
method num_names (line 162) | def num_names(self):
method embedding_dim (line 167) | def embedding_dim(self):
method embedding_names (line 174) | def embedding_names(self):
method save_embeddings_to_file (line 178) | def save_embeddings_to_file(self, file_path: str) -> None:
method read_embeddings_from_file (line 187) | def read_embeddings_from_file(file_path: str):
method load_embeddings_from_file (line 206) | def load_embeddings_from_file(self, file_path: str) -> None:
method load_embeddings_from_list_of_files (line 216) | def load_embeddings_from_list_of_files(self, file_paths: List[str]) ->...
method get_embedding_by_clip (line 241) | def get_embedding_by_clip(self, clip_idx: str) -> List:
method get_embeddings_by_name (line 252) | def get_embeddings_by_name(self, idx: str) -> List[List]:
method get_embeddings_by_names (line 263) | def get_embeddings_by_names(self) -> Dict:
method get_mean_embedding (line 277) | def get_mean_embedding(self, idx: str, num_samples: int = None, random...
method get_random_embedding (line 299) | def get_random_embedding(self) -> Any:
method get_clips (line 312) | def get_clips(self) -> List:
method init_encoder (line 315) | def init_encoder(self, model_path: str, config_path: str, use_cuda=Fal...
method compute_embedding_from_clip (line 331) | def compute_embedding_from_clip(self, wav_file: Union[str, List[str]])...
method compute_embeddings (line 368) | def compute_embeddings(self, feats: Union[torch.Tensor, np.ndarray]) -...
FILE: TTS/tts/utils/measures.py
function alignment_diagonal_score (line 1) | def alignment_diagonal_score(alignments, binary=False):
FILE: TTS/tts/utils/monotonic_align/core.c
function CYTHON_INLINE (line 338) | static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int k, int l,...
type PyObject (line 423) | typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *co...
type PyObject (line 424) | typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, ...
type Py_tss_t (line 465) | typedef int Py_tss_t;
function CYTHON_INLINE (line 466) | static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
function CYTHON_INLINE (line 470) | static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
function CYTHON_INLINE (line 475) | static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
function CYTHON_INLINE (line 478) | static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
function CYTHON_INLINE (line 481) | static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
function CYTHON_INLINE (line 485) | static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
function CYTHON_INLINE (line 488) | static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
type Py_hash_t (line 633) | typedef long Py_hash_t;
type __Pyx_PyAsyncMethodsStruct (line 656) | typedef struct {
function CYTHON_INLINE (line 670) | static CYTHON_INLINE float __PYX_NAN() {
type __Pyx_StringTabEntry (line 719) | typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const c...
function CYTHON_INLINE (line 740) | static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t l...
function CYTHON_INLINE (line 789) | static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) {
function __Pyx_init_sys_getdefaultencoding_params (line 822) | static int __Pyx_init_sys_getdefaultencoding_params(void) {
function __Pyx_init_sys_getdefaultencoding_params (line 872) | static int __Pyx_init_sys_getdefaultencoding_params(void) {
function CYTHON_INLINE (line 904) | static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void...
type __pyx_memoryview_obj (line 955) | struct __pyx_memoryview_obj
type __Pyx_memviewslice (line 956) | typedef struct {
type __pyx_atomic_int_type (line 1001) | typedef volatile __pyx_atomic_int_type __pyx_atomic_int;
type __Pyx_StructField_ (line 1021) | struct __Pyx_StructField_
type __Pyx_TypeInfo (line 1023) | typedef struct {
type __Pyx_StructField (line 1033) | typedef struct __Pyx_StructField_ {
type __Pyx_BufFmt_StackElem (line 1038) | typedef struct {
type __Pyx_BufFmt_Context (line 1042) | typedef struct {
type npy_int8 (line 1063) | typedef npy_int8 __pyx_t_5numpy_int8_t;
type npy_int16 (line 1072) | typedef npy_int16 __pyx_t_5numpy_int16_t;
type npy_int32 (line 1081) | typedef npy_int32 __pyx_t_5numpy_int32_t;
type npy_int64 (line 1090) | typedef npy_int64 __pyx_t_5numpy_int64_t;
type npy_uint8 (line 1099) | typedef npy_uint8 __pyx_t_5numpy_uint8_t;
type npy_uint16 (line 1108) | typedef npy_uint16 __pyx_t_5numpy_uint16_t;
type npy_uint32 (line 1117) | typedef npy_uint32 __pyx_t_5numpy_uint32_t;
type npy_uint64 (line 1126) | typedef npy_uint64 __pyx_t_5numpy_uint64_t;
type npy_float32 (line 1135) | typedef npy_float32 __pyx_t_5numpy_float32_t;
type npy_float64 (line 1144) | typedef npy_float64 __pyx_t_5numpy_float64_t;
type npy_long (line 1153) | typedef npy_long __pyx_t_5numpy_int_t;
type npy_longlong (line 1162) | typedef npy_longlong __pyx_t_5numpy_long_t;
type npy_longlong (line 1171) | typedef npy_longlong __pyx_t_5numpy_longlong_t;
type npy_ulong (line 1180) | typedef npy_ulong __pyx_t_5numpy_uint_t;
type npy_ulonglong (line 1189) | typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
type npy_ulonglong (line 1198) | typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
type npy_intp (line 1207) | typedef npy_intp __pyx_t_5numpy_intp_t;
type npy_uintp (line 1216) | typedef npy_uintp __pyx_t_5numpy_uintp_t;
type npy_double (line 1225) | typedef npy_double __pyx_t_5numpy_float_t;
type npy_double (line 1234) | typedef npy_double __pyx_t_5numpy_double_t;
type npy_longdouble (line 1243) | typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
type std (line 1247) | typedef ::std::complex< float > __pyx_t_float_complex;
type __pyx_t_float_complex (line 1249) | typedef float _Complex __pyx_t_float_complex;
type __pyx_t_float_complex (line 1252) | typedef struct { float real, imag; } __pyx_t_float_complex;
type std (line 1259) | typedef ::std::complex< double > __pyx_t_double_complex;
type __pyx_t_double_complex (line 1261) | typedef double _Complex __pyx_t_double_complex;
type __pyx_t_double_complex (line 1264) | typedef struct { double real, imag; } __pyx_t_double_complex;
type __pyx_array_obj (line 1270) | struct __pyx_array_obj
type __pyx_MemviewEnum_obj (line 1271) | struct __pyx_MemviewEnum_obj
type __pyx_memoryview_obj (line 1272) | struct __pyx_memoryview_obj
type __pyx_memoryviewslice_obj (line 1273) | struct __pyx_memoryviewslice_obj
type npy_cfloat (line 1282) | typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
type npy_cdouble (line 1291) | typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
type npy_clongdouble (line 1300) | typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
type npy_cdouble (line 1309) | typedef npy_cdouble __pyx_t_5numpy_complex_t;
type __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c (line 1310) | struct __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_p...
type __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c (line 1319) | struct __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_p...
type __pyx_array_obj (line 1331) | struct __pyx_array_obj {
type __pyx_MemviewEnum_obj (line 1356) | struct __pyx_MemviewEnum_obj {
type __pyx_memoryview_obj (line 1369) | struct __pyx_memoryview_obj {
type __pyx_memoryviewslice_obj (line 1392) | struct __pyx_memoryviewslice_obj {
type __pyx_vtabstruct_array (line 1410) | struct __pyx_vtabstruct_array {
type __pyx_vtabstruct_array (line 1413) | struct __pyx_vtabstruct_array
type __pyx_vtabstruct_memoryview (line 1424) | struct __pyx_vtabstruct_memoryview {
type __pyx_vtabstruct_memoryview (line 1433) | struct __pyx_vtabstruct_memoryview
type __pyx_vtabstruct__memoryviewslice (line 1444) | struct __pyx_vtabstruct__memoryviewslice {
type __pyx_vtabstruct__memoryviewslice (line 1447) | struct __pyx_vtabstruct__memoryviewslice
type __Pyx_RefNannyAPIStruct (line 1455) | typedef struct {
type __pyx_memoryview_obj (line 1534) | struct __pyx_memoryview_obj
type __pyx_array_obj (line 1715) | struct __pyx_array_obj
function CYTHON_INLINE (line 1749) | static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s...
function CYTHON_INLINE (line 1753) | static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16LE(const char ...
function CYTHON_INLINE (line 1757) | static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char ...
function CYTHON_INLINE (line 1857) | static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject*...
function CYTHON_INLINE (line 1881) | static CYTHON_INLINE int __Pyx_PyList_Extend(PyObject* L, PyObject* v) {
function CYTHON_INLINE (line 1895) | static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) {
type __Pyx_ImportType_CheckSize (line 1945) | enum __Pyx_ImportType_CheckSize {
type __Pyx_ImportType_CheckSize (line 1950) | enum __Pyx_ImportType_CheckSize
type __Pyx_CodeObjectCacheEntry (line 1961) | typedef struct {
type __Pyx_CodeObjectCache (line 1965) | struct __Pyx_CodeObjectCache {
type __Pyx_CodeObjectCache (line 1970) | struct __Pyx_CodeObjectCache
type __Pyx_Buf_DimInfo (line 1989) | typedef struct {
type __Pyx_Buffer (line 1992) | typedef struct {
type __Pyx_LocalBuf_ND (line 1996) | typedef struct {
type __pyx_array_obj (line 2176) | struct __pyx_array_obj
type __pyx_memoryview_obj (line 2177) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2178) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2179) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2180) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2180) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2181) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2182) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2183) | struct __pyx_memoryview_obj
type __pyx_memoryviewslice_obj (line 2184) | struct __pyx_memoryviewslice_obj
type __pyx_memoryviewslice_obj (line 2185) | struct __pyx_memoryviewslice_obj
type __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c (line 2242) | struct __pyx_opt_args_3TTS_3tts_5utils_15monotonic_align_4core_maximum_p...
type __pyx_array_obj (line 2243) | struct __pyx_array_obj
type __pyx_memoryview_obj (line 2249) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2249) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2254) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2255) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2256) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2257) | struct __pyx_memoryview_obj
type __pyx_MemviewEnum_obj (line 2275) | struct __pyx_MemviewEnum_obj
type __pyx_array_obj (line 2483) | struct __pyx_array_obj
type __pyx_array_obj (line 2484) | struct __pyx_array_obj
type __pyx_array_obj (line 2485) | struct __pyx_array_obj
type __pyx_array_obj (line 2486) | struct __pyx_array_obj
type __pyx_array_obj (line 2487) | struct __pyx_array_obj
type __pyx_array_obj (line 2488) | struct __pyx_array_obj
type __pyx_array_obj (line 2489) | struct __pyx_array_obj
type __pyx_array_obj (line 2490) | struct __pyx_array_obj
type __pyx_MemviewEnum_obj (line 2493) | struct __pyx_MemviewEnum_obj
type __pyx_MemviewEnum_obj (line 2494) | struct __pyx_MemviewEnum_obj
type __pyx_MemviewEnum_obj (line 2495) | struct __pyx_MemviewEnum_obj
type __pyx_MemviewEnum_obj (line 2496) | struct __pyx_MemviewEnum_obj
type __pyx_memoryview_obj (line 2497) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2498) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2499) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2500) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2501) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2502) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2503) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2504) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2505) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2506) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2507) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2508) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2509) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2510) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2511) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2512) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2513) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2514) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2515) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2516) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 2517) | struct __pyx_memoryview_obj
type __pyx_memoryviewslice_obj (line 2520) | struct __pyx_memoryviewslice_obj
type __pyx_memoryviewslice_obj (line 2521) | struct __pyx_memoryviewslice_obj
function __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_each (line 2571) | static void __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_pat...
function __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_path_c (line 2869) | static void __pyx_f_3TTS_3tts_5utils_15monotonic_align_4core_maximum_pat...
function PyObject (line 3023) | static PyObject *__pyx_pw_3TTS_3tts_5utils_15monotonic_align_4core_1maxi...
function PyObject (line 3125) | static PyObject *__pyx_pf_3TTS_3tts_5utils_15monotonic_align_4core_maxim...
function CYTHON_INLINE (line 3171) | static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyOb...
function CYTHON_INLINE (line 3221) | static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyOb...
function CYTHON_INLINE (line 3271) | static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyOb...
function CYTHON_INLINE (line 3321) | static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyOb...
function CYTHON_INLINE (line 3371) | static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyOb...
function CYTHON_INLINE (line 3421) | static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_D...
function CYTHON_INLINE (line 3495) | static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *_...
function CYTHON_INLINE (line 3537) | static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObje...
function CYTHON_INLINE (line 3618) | static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
function CYTHON_INLINE (line 3750) | static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
function CYTHON_INLINE (line 3882) | static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
function CYTHON_INLINE (line 4014) | static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *...
function CYTHON_INLINE (line 4051) | static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *_...
function CYTHON_INLINE (line 4088) | static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(Py...
function CYTHON_INLINE (line 4122) | static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(...
function CYTHON_INLINE (line 4156) | static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit...
function __pyx_array___cinit__ (line 4190) | static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx...
function __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__ (line 4318) | static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(s...
function CYTHON_UNUSED (line 4941) | static CYTHON_UNUSED int __pyx_array_getbuffer(PyObject *__pyx_v_self, P...
function __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__ (line 4952) | static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffe...
function __pyx_array___dealloc__ (line 5248) | static void __pyx_array___dealloc__(PyObject *__pyx_v_self) {
function __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__ (line 5257) | static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc...
function PyObject (line 5379) | static PyObject *__pyx_pw_15View_dot_MemoryView_5array_7memview_1__get__...
function PyObject (line 5390) | static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(...
function PyObject (line 5440) | static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v...
function Py_ssize_t (line 5522) | static Py_ssize_t __pyx_array___len__(PyObject *__pyx_v_self) {
function Py_ssize_t (line 5533) | static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__l...
function PyObject (line 5572) | static PyObject *__pyx_array___getattr__(PyObject *__pyx_v_self, PyObjec...
function PyObject (line 5583) | static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__ge...
function PyObject (line 5640) | static PyObject *__pyx_array___getitem__(PyObject *__pyx_v_self, PyObjec...
function PyObject (line 5651) | static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__g...
function __pyx_array___setitem__ (line 5708) | static int __pyx_array___setitem__(PyObject *__pyx_v_self, PyObject *__p...
function __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__ (line 5719) | static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem...
function PyObject (line 5768) | static PyObject *__pyx_pw___pyx_array_1__reduce_cython__(PyObject *__pyx...
function PyObject (line 5779) | static PyObject *__pyx_pf___pyx_array___reduce_cython__(CYTHON_UNUSED st...
function PyObject (line 5825) | static PyObject *__pyx_pw___pyx_array_3__setstate_cython__(PyObject *__p...
function PyObject (line 5836) | static PyObject *__pyx_pf___pyx_array_2__setstate_cython__(CYTHON_UNUSED...
type __pyx_array_obj (line 5881) | struct __pyx_array_obj
type __pyx_array_obj (line 5882) | struct __pyx_array_obj
type __pyx_array_obj (line 5883) | struct __pyx_array_obj
type __pyx_array_obj (line 5935) | struct __pyx_array_obj
type __pyx_array_obj (line 5999) | struct __pyx_array_obj
function __pyx_MemviewEnum___init__ (line 6058) | static int __pyx_MemviewEnum___init__(PyObject *__pyx_v_self, PyObject *...
function __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__ (line 6109) | static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init...
function PyObject (line 6151) | static PyObject *__pyx_MemviewEnum___repr__(PyObject *__pyx_v_self) {
function PyObject (line 6162) | static PyObject *__pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_...
function PyObject (line 6202) | static PyObject *__pyx_pw___pyx_MemviewEnum_1__reduce_cython__(PyObject ...
function PyObject (line 6213) | static PyObject *__pyx_pf___pyx_MemviewEnum___reduce_cython__(struct __p...
function PyObject (line 6437) | static PyObject *__pyx_pw___pyx_MemviewEnum_3__setstate_cython__(PyObjec...
function PyObject (line 6448) | static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct ...
function __pyx_memoryview___cinit__ (line 6580) | static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *...
function __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit__ (line 6660) | static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_...
function __pyx_memoryview___dealloc__ (line 6959) | static void __pyx_memoryview___dealloc__(PyObject *__pyx_v_self) {
function __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__dealloc__ (line 6968) | static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview...
type __pyx_memoryview_obj (line 7188) | struct __pyx_memoryview_obj
function PyObject (line 7328) | static PyObject *__pyx_memoryview___getitem__(PyObject *__pyx_v_self, Py...
function PyObject (line 7339) | static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memor...
function __pyx_memoryview___setitem__ (line 7517) | static int __pyx_memoryview___setitem__(PyObject *__pyx_v_self, PyObject...
function __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setitem__ (line 7528) | static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_...
function PyObject (line 7743) | static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *...
function PyObject (line 7953) | static PyObject *__pyx_memoryview_setitem_slice_assignment(struct __pyx_...
function PyObject (line 8043) | static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __p...
function PyObject (line 8333) | static PyObject *__pyx_memoryview_setitem_indexed(struct __pyx_memoryvie...
function PyObject (line 8394) | static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_me...
function PyObject (line 8671) | static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_m...
function CYTHON_UNUSED (line 8912) | static CYTHON_UNUSED int __pyx_memoryview_getbuffer(PyObject *__pyx_v_se...
function __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbuffer__ (line 8923) | static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_...
function PyObject (line 9256) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_1T_1__get__...
function PyObject (line 9267) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(...
function PyObject (line 9342) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4base_1__ge...
function PyObject (line 9353) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get...
function PyObject (line 9395) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_5shape_1__g...
function PyObject (line 9406) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___ge...
function PyObject (line 9476) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_7strides_1_...
function PyObject (line 9487) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___...
function PyObject (line 9590) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_10suboffset...
function PyObject (line 9601) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffset...
function PyObject (line 9708) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4ndim_1__ge...
function PyObject (line 9719) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get...
function PyObject (line 9771) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_8itemsize_1...
function PyObject (line 9782) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize__...
function PyObject (line 9834) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_6nbytes_1__...
function PyObject (line 9845) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___g...
function PyObject (line 9907) | static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4size_1__ge...
function PyObject (line 9918) | static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get...
function Py_ssize_t (line 10048) | static Py_ssize_t __pyx_memoryview___len__(PyObject *__pyx_v_self) {
function Py_ssize_t (line 10059) | static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memo...
function PyObject (line 10128) | static PyObject *__pyx_memoryview___repr__(PyObject *__pyx_v_self) {
function PyObject (line 10139) | static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memor...
function PyObject (line 10230) | static PyObject *__pyx_memoryview___str__(PyObject *__pyx_v_self) {
function PyObject (line 10241) | static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memor...
function PyObject (line 10309) | static PyObject *__pyx_memoryview_is_c_contig(PyObject *__pyx_v_self, CY...
function PyObject (line 10320) | static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memor...
function PyObject (line 10385) | static PyObject *__pyx_memoryview_is_f_contig(PyObject *__pyx_v_self, CY...
function PyObject (line 10396) | static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memor...
function PyObject (line 10461) | static PyObject *__pyx_memoryview_copy(PyObject *__pyx_v_self, CYTHON_UN...
function PyObject (line 10472) | static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memor...
function PyObject (line 10555) | static PyObject *__pyx_memoryview_copy_fortran(PyObject *__pyx_v_self, C...
function PyObject (line 10566) | static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memor...
function PyObject (line 10648) | static PyObject *__pyx_pw___pyx_memoryview_1__reduce_cython__(PyObject *...
function PyObject (line 10659) | static PyObject *__pyx_pf___pyx_memoryview___reduce_cython__(CYTHON_UNUS...
function PyObject (line 10705) | static PyObject *__pyx_pw___pyx_memoryview_3__setstate_cython__(PyObject...
function PyObject (line 10716) | static PyObject *__pyx_pf___pyx_memoryview_2__setstate_cython__(CYTHON_U...
function PyObject (line 10761) | static PyObject *__pyx_memoryview_new(PyObject *__pyx_v_o, int __pyx_v_f...
function CYTHON_INLINE (line 10852) | static CYTHON_INLINE int __pyx_memoryview_check(PyObject *__pyx_v_o) {
function PyObject (line 10891) | static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) {
function PyObject (line 11348) | static PyObject *assert_direct_dimensions(Py_ssize_t *__pyx_v_suboffsets...
type __pyx_memoryview_obj (line 11436) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 11436) | struct __pyx_memoryview_obj
type __pyx_memoryviewslice_obj (line 11443) | struct __pyx_memoryviewslice_obj
type __pyx_memoryview_obj (line 11453) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 11458) | struct __pyx_memoryview_obj
type __pyx_memoryviewslice_obj (line 11528) | struct __pyx_memoryviewslice_obj
type __pyx_memoryview_obj (line 11940) | struct __pyx_memoryview_obj
type __pyx_memoryview_obj (line 11981) | struct __pyx_memoryview_obj
function __pyx_memoryview_slice_memviewslice (line 12016) | static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx...
function __pyx_memslice_transpose (line 13109) | static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) {
function __pyx_memoryviewslice___dealloc__ (line 13285) | static void __pyx_memoryviewslice___dealloc__(PyObject *__pyx_v_self) {
function __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewslice___dealloc__ (line 13294) | static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memo...
function PyObject (line 13327) | static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __p...
function PyObject (line 13413) | static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __...
function PyObject (line 13498) | static PyObject *__pyx_pw_15View_dot_MemoryView_16_memoryviewslice_4base...
function PyObject (line 13509) | static PyObject *__pyx_pf_15View_dot_MemoryView_16_memoryviewslice_4base...
function PyObject (line 13549) | static PyObject *__pyx_pw___pyx_memoryviewslice_1__reduce_cython__(PyObj...
function PyObject (line 13560) | static PyObject *__pyx_pf___pyx_memoryviewslice___reduce_cython__(CYTHON...
function PyObject (line 13606) | static PyObject *__pyx_pw___pyx_memoryviewslice_3__setstate_cython__(PyO...
function PyObject (line 13617) | static PyObject *__pyx_pf___pyx_memoryviewslice_2__setstate_cython__(CYT...
function PyObject (line 13662) | static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_m...
function __Pyx_memviewslice (line 14048) | static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(st...
function __pyx_memoryview_slice_copy (line 14151) | static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__p...
function PyObject (line 14277) | static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_ob...
function PyObject (line 14337) | static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_me...
function Py_ssize_t (line 14463) | static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) {
function __pyx_get_best_slice_order (line 14529) | static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslic...
function _copy_strided_to_strided (line 14719) | static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t ...
function copy_strided_to_strided (line 14956) | static void copy_strided_to_strided(__Pyx_memviewslice *__pyx_v_src, __P...
function Py_ssize_t (line 14986) | static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__...
function Py_ssize_t (line 15058) | static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_sh...
type __pyx_memoryview_obj (line 15189) | struct __pyx_memoryview_obj
function __pyx_memoryview_err_extents (line 15435) | static int __pyx_memoryview_err_extents(int __pyx_v_i, Py_ssize_t __pyx_...
function __pyx_memoryview_err_dim (line 15523) | static int __pyx_memoryview_err_dim(PyObject *__pyx_v_error, char *__pyx...
function __pyx_memoryview_err (line 15607) | static int __pyx_memoryview_err(PyObject *__pyx_v_error, char *__pyx_v_m...
function __pyx_memoryview_copy_contents (line 15717) | static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src...
function __pyx_memoryview_broadcast_leading (line 16296) | static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx...
function __pyx_memoryview_refcount_copying (line 16409) | static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *__pyx_...
function __pyx_memoryview_refcount_objects_in_slice_with_gil (line 16459) | static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *__...
function __pyx_memoryview_refcount_objects_in_slice (line 16498) | static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_dat...
function __pyx_memoryview_slice_assign_scalar (line 16630) | static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *__p...
function __pyx_memoryview__slice_assign_scalar (line 16678) | static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py...
function PyObject (line 16810) | static PyObject *__pyx_pw_15View_dot_MemoryView_1__pyx_unpickle_Enum(PyO...
function PyObject (line 16883) | static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTH...
function PyObject (line 17074) | static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum...
type __pyx_vtabstruct_array (line 17197) | struct __pyx_vtabstruct_array
function PyObject (line 17199) | static PyObject *__pyx_tp_new_array(PyTypeObject *t, PyObject *a, PyObje...
function __pyx_tp_dealloc_array (line 17219) | static void __pyx_tp_dealloc_array(PyObject *o) {
function PyObject (line 17238) | static PyObject *__pyx_sq_item_array(PyObject *o, Py_ssize_t i) {
function __pyx_mp_ass_subscript_array (line 17246) | static int __pyx_mp_ass_subscript_array(PyObject *o, PyObject *i, PyObje...
function PyObject (line 17257) | static PyObject *__pyx_tp_getattro_array(PyObject *o, PyObject *n) {
function PyObject (line 17266) | static PyObject *__pyx_getprop___pyx_array_memview(PyObject *o, CYTHON_U...
type PyGetSetDef (line 17277) | struct PyGetSetDef
type __pyx_array_obj (line 17321) | struct __pyx_array_obj
function PyObject (line 17390) | static PyObject *__pyx_tp_new_Enum(PyTypeObject *t, CYTHON_UNUSED PyObje...
function __pyx_tp_dealloc_Enum (line 17404) | static void __pyx_tp_dealloc_Enum(PyObject *o) {
function __pyx_tp_traverse_Enum (line 17416) | static int __pyx_tp_traverse_Enum(PyObject *o, visitproc v, void *a) {
function __pyx_tp_clear_Enum (line 17425) | static int __pyx_tp_clear_Enum(PyObject *o) {
type __pyx_MemviewEnum_obj (line 17443) | struct __pyx_MemviewEnum_obj
type __pyx_vtabstruct_memoryview (line 17511) | struct __pyx_vtabstruct_memoryview
function PyObject (line 17513) | static PyObject *__pyx_tp_new_memoryview(PyTypeObject *t, PyObject *a, P...
function __pyx_tp_dealloc_memoryview (line 17535) | static void __pyx_tp_dealloc_memoryview(PyObject *o) {
function __pyx_tp_traverse_memoryview (line 17557) | static int __pyx_tp_traverse_memoryview(PyObject *o, visitproc v, void *...
function __pyx_tp_clear_memoryview (line 17575) | static int __pyx_tp_clear_memoryview(PyObject *o) {
function PyObject (line 17590) | static PyObject *__pyx_sq_item_memoryview(PyObject *o, Py_ssize_t i) {
function __pyx_mp_ass_subscript_memoryview (line 17598) | static int __pyx_mp_ass_subscript_memoryview(PyObject *o, PyObject *i, P...
function PyObject (line 17609) | static PyObject *__pyx_getprop___pyx_memoryview_T(PyObject *o, CYTHON_UN...
function PyObject (line 17613) | static PyObject *__pyx_getprop___pyx_memoryview_base(PyObject *o, CYTHON...
function PyObject (line 17617) | static PyObject *__pyx_getprop___pyx_memoryview_shape(PyObject *o, CYTHO...
function PyObject (line 17621) | static PyObject *__pyx_getprop___pyx_memoryview_strides(PyObject *o, CYT...
function PyObject (line 17625) | static PyObject *__pyx_getprop___pyx_memoryview_suboffsets(PyObject *o, ...
function PyObject (line 17629) | static PyObject *__pyx_getprop___pyx_memoryview_ndim(PyObject *o, CYTHON...
function PyObject (line 17633) | static PyObject *__pyx_getprop___pyx_memoryview_itemsize(PyObject *o, CY...
function PyObject (line 17637) | static PyObject *__pyx_getprop___pyx_memoryview_nbytes(PyObject *o, CYTH...
function PyObject (line 17641) | static PyObject *__pyx_getprop___pyx_memoryview_size(PyObject *o, CYTHON...
type PyGetSetDef (line 17655) | struct PyGetSetDef
type __pyx_memoryview_obj (line 17707) | struct __pyx_memoryview_obj
type __pyx_vtabstruct__memoryviewslice (line 17775) | struct __pyx_vtabstruct__memoryviewslice
function PyObject (line 17777) | static PyObject *__pyx_tp_new__memoryviewslice(PyTypeObject *t, PyObject...
function __pyx_tp_dealloc__memoryviewslice (line 17788) | static void __pyx_tp_dealloc__memoryviewslice(PyObject *o) {
function __pyx_tp_traverse__memoryviewslice (line 17809) | static int __pyx_tp_traverse__memoryviewslice(PyObject *o, visitproc v, ...
function __pyx_tp_clear__memoryviewslice (line 17819) | static int __pyx_tp_clear__memoryviewslice(PyObject *o) {
function PyObject (line 17830) | static PyObject *__pyx_getprop___pyx_memoryviewslice_base(PyObject *o, C...
type PyGetSetDef (line 17840) | struct PyGetSetDef
type __pyx_memoryviewslice_obj (line 17848) | struct __pyx_memoryviewslice_obj
type PyModuleDef (line 17941) | struct PyModuleDef
function CYTHON_SMALL_CODE (line 18070) | static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
function CYTHON_SMALL_CODE (line 18085) | static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
function CYTHON_SMALL_CODE (line 18374) | static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) {
function __Pyx_modinit_global_init_code (line 18400) | static int __Pyx_modinit_global_init_code(void) {
function __Pyx_modinit_variable_export_code (line 18413) | static int __Pyx_modinit_variable_export_code(void) {
function __Pyx_modinit_function_export_code (line 18421) | static int __Pyx_modinit_function_export_code(void) {
function __Pyx_modinit_type_init_code (line 18429) | static int __Pyx_modinit_type_init_code(void) {
function __Pyx_modinit_type_import_code (line 18494) | static int __Pyx_modinit_type_import_code(void) {
function __Pyx_modinit_variable_import_code (line 18554) | static int __Pyx_modinit_variable_import_code(void) {
function __Pyx_modinit_function_import_code (line 18562) | static int __Pyx_modinit_function_import_code(void) {
function __Pyx_PyMODINIT_FUNC (line 18593) | __Pyx_PyMODINIT_FUNC PyInit_core(void)
function CYTHON_SMALL_CODE (line 18598) | static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) {
function CYTHON_SMALL_CODE (line 18621) | static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, P...
function CYTHON_SMALL_CODE (line 18636) | static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, CY...
function __Pyx_RefNannyAPIStruct (line 18981) | static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modn...
function CYTHON_INLINE (line 18998) | static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, ...
function PyObject (line 19011) | static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
function __Pyx_init_memviewslice (line 19025) | static int
function __pyx_fatalerror (line 19077) | static void __pyx_fatalerror(const char *fmt, ...) Py_NO_RETURN {
function CYTHON_INLINE (line 19089) | static CYTHON_INLINE int
function CYTHON_INLINE (line 19099) | static CYTHON_INLINE int
function CYTHON_INLINE (line 19109) | static CYTHON_INLINE void
function CYTHON_INLINE (line 19130) | static CYTHON_INLINE void __Pyx_XDEC_MEMVIEW(__Pyx_memviewslice *memslice,
function __Pyx_RaiseArgtupleInvalid (line 19157) | static void __Pyx_RaiseArgtupleInvalid(
function __Pyx_RaiseDoubleKeywordsError (line 19183) | static void __Pyx_RaiseDoubleKeywordsError(
function __Pyx_ParseOptionalKeywords (line 19197) | static int __Pyx_ParseOptionalKeywords(
function CYTHON_INLINE (line 19299) | static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varna...
function _PyErr_StackItem (line 19305) | static _PyErr_StackItem *
function CYTHON_INLINE (line 19320) | static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, Py...
function CYTHON_INLINE (line 19335) | static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, P...
function __Pyx_PyErr_ExceptionMatchesTuple (line 19361) | static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObjec...
function CYTHON_INLINE (line 19374) | static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadSta...
function __Pyx_GetException (line 19388) | static int __Pyx_GetException(PyObject **type, PyObject **value, PyObjec...
function CYTHON_INLINE (line 19460) | static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObj...
function CYTHON_INLINE (line 19480) | static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate,...
function CYTHON_INLINE (line 19492) | static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, P...
function __Pyx_Raise (line 19504) | static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
function __Pyx_Raise (line 19555) | static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, P...
function __Pyx__ArgTypeTest (line 19662) | static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const c...
function CYTHON_INLINE (line 19684) | static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *fun...
function PyObject (line 19707) | static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObjec...
function CYTHON_UNUSED (line 19825) | static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* functi...
function CYTHON_INLINE (line 19855) | static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, ...
function PyObject (line 19875) | static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *ar...
function CYTHON_INLINE (line 19885) | static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func,...
function CYTHON_INLINE (line 19903) | static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func,...
function CYTHON_INLINE (line 19914) | static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2...
function CYTHON_INLINE (line 19961) | static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* ...
function CYTHON_INLINE (line 20063) | static CYTHON_INLINE Py_ssize_t __Pyx_div_Py_ssize_t(Py_ssize_t a, Py_ss...
function CYTHON_INLINE (line 20071) | static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) {
function PyObject (line 20084) | static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
function CYTHON_INLINE (line 20091) | static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, P...
function CYTHON_INLINE (line 20109) | static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, ...
function CYTHON_INLINE (line 20127) | static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssi...
function PyObject (line 20172) | static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject* index) {
function PyObject (line 20190) | static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key) {
function CYTHON_INLINE (line 20200) | static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
function PyObject (line 20233) | static PyObject *__Pyx_GetAttr3Default(PyObject *d) {
function CYTHON_INLINE (line 20242) | static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *o, PyObject *n, ...
function CYTHON_INLINE (line 20249) | static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) {
function CYTHON_INLINE (line 20253) | static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject ...
function CYTHON_INLINE (line 20265) | static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj...
function CYTHON_INLINE (line 20277) | static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
function CYTHON_INLINE (line 20309) | static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expec...
function CYTHON_INLINE (line 20315) | static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t inde...
function CYTHON_INLINE (line 20322) | static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) {
function CYTHON_INLINE (line 20327) | static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *typ...
function CYTHON_INLINE (line 20341) | static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, Py...
function CYTHON_INLINE (line 20364) | static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject ...
function PyObject (line 20375) | static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int l...
function __Pyx_InBases (line 20441) | static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) {
function CYTHON_INLINE (line 20449) | static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *...
function __Pyx_inner_PyErr_GivenExceptionMatches2 (line 20465) | static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObj...
function CYTHON_INLINE (line 20487) | static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObje...
function __Pyx_PyErr_GivenExceptionMatchesTuple (line 20495) | static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, Py...
function CYTHON_INLINE (line 20516) | static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err...
function CYTHON_INLINE (line 20528) | static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *er...
function PyObject (line 20541) | static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, CYTHO...
function __Pyx_div_long (line 20664) | static CYTHON_INLINE long __Pyx_div_long(long a, long b) {
function PyObject (line 20672) | static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
function CYTHON_INLINE (line 20686) | static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) {
function PyObject (line 20705) | static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, P...
function CYTHON_INLINE (line 20716) | static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObj...
function PyObject (line 20745) | static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* ...
function __Pyx_SetVtable (line 20754) | static int __Pyx_SetVtable(PyObject *dict, void *vtable) {
function __Pyx_PyObject_GetAttrStr_ClearAttributeError (line 20772) | static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) {
function CYTHON_INLINE (line 20778) | static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject...
function __Pyx_setup_reduce_is_named (line 20794) | static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) {
function __Pyx_setup_reduce (line 20810) | static int __Pyx_setup_reduce(PyObject* type_obj) {
function PyTypeObject (line 20880) | static PyTypeObject *__Pyx_ImportType(PyObject *module, const char *modu...
function __Pyx_CLineForTraceback (line 20940) | static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tsta...
function __pyx_bisect_code_objects (line 20981) | static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries...
function PyCodeObject (line 21002) | static PyCodeObject *__pyx_find_code_object(int code_line) {
function __pyx_insert_code_object (line 21016) | static void __pyx_insert_code_object(int code_line, PyCodeObject* code_o...
function PyCodeObject (line 21064) | static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
function __Pyx_AddTraceback (line 21122) | static void __Pyx_AddTraceback(const char *funcname, int c_line,
function __Pyx_GetBuffer (line 21152) | static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) {
function __Pyx_ReleaseBuffer (line 21159) | static void __Pyx_ReleaseBuffer(Py_buffer *view) {
function __pyx_memviewslice_is_contig (line 21174) | static int
function __pyx_get_array_memory_extents (line 21196) | static void
function __pyx_slices_overlap (line 21220) | static int
function CYTHON_INLINE (line 21232) | static CYTHON_INLINE PyObject *
function CYTHON_INLINE (line 21245) | static CYTHON_INLINE int __Pyx_Is_Little_Endian(void)
function __Pyx_BufFmt_Init (line 21256) | static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
function __Pyx_BufFmt_ParseNumber (line 21283) | static int __Pyx_BufFmt_ParseNumber(const char** ts) {
function __Pyx_BufFmt_ExpectNumber (line 21298) | static int __Pyx_BufFmt_ExpectNumber(const char **ts) {
function __Pyx_BufFmt_RaiseUnexpectedChar (line 21305) | static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) {
function __Pyx_BufFmt_TypeCharToStandardSize (line 21334) | static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_comple...
function __Pyx_BufFmt_TypeCharToNativeSize (line 21352) | static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) {
type __Pyx_st_short (line 21371) | typedef struct { char c; short x; } __Pyx_st_short;
type __Pyx_st_int (line 21372) | typedef struct { char c; int x; } __Pyx_st_int;
type __Pyx_st_long (line 21373) | typedef struct { char c; long x; } __Pyx_st_long;
type __Pyx_st_float (line 21374) | typedef struct { char c; float x; } __Pyx_st_float;
type __Pyx_st_double (line 21375) | typedef struct { char c; double x; } __Pyx_st_double;
type __Pyx_st_longdouble (line 21376) | typedef struct { char c; long double x; } __Pyx_st_longdouble;
type __Pyx_st_void_p (line 21377) | typedef struct { char c; void *x; } __Pyx_st_void_p;
type __Pyx_st_longlong (line 21379) | typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong;
function __Pyx_BufFmt_TypeCharToAlignment (line 21381) | static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, CYTHON_UNUSED in...
type __Pyx_pad_short (line 21403) | typedef struct { short x; char c; } __Pyx_pad_short;
type __Pyx_pad_int (line 21404) | typedef struct { int x; char c; } __Pyx_pad_int;
type __Pyx_pad_long (line 21405) | typedef struct { long x; char c; } __Pyx_pad_long;
type __Pyx_pad_float (line 21406) | typedef struct { float x; char c; } __Pyx_pad_float;
type __Pyx_pad_double (line 21407) | typedef struct { double x; char c; } __Pyx_pad_double;
type __Pyx_pad_longdouble (line 21408) | typedef struct { long double x; char c; } __Pyx_pad_longdouble;
type __Pyx_pad_void_p (line 21409) | typedef struct { void *x; char c; } __Pyx_pad_void_p;
type __Pyx_pad_longlong (line 21411) | typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong;
function __Pyx_BufFmt_TypeCharToPadding (line 21413) | static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, CYTHON_UNUSED int ...
function __Pyx_BufFmt_TypeCharToGroup (line 21431) | static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) {
function __Pyx_BufFmt_RaiseExpected (line 21452) | static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) {
function __Pyx_BufFmt_ProcessTypeChunk (line 21476) | static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
function PyObject (line 21578) | static PyObject *
function __pyx_typeinfo_cmp (line 21758) | static int
function __pyx_check_strides (line 21799) | static int
function __pyx_check_suboffsets (line 21852) | static int
function __pyx_verify_contig (line 21875) | static int
function __Pyx_ValidateAndInit_memviewslice (line 21904) | static int __Pyx_ValidateAndInit_memviewslice(
function CYTHON_INLINE (line 21980) | static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlic...
function CYTHON_INLINE (line 22003) | static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlic...
function CYTHON_INLINE (line 22026) | static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlic...
function CYTHON_INLINE (line 22073) | static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_pa...
function CYTHON_INLINE (line 22077) | static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_pa...
function CYTHON_INLINE (line 22082) | static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_pa...
function CYTHON_INLINE (line 22093) | static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx...
function CYTHON_INLINE (line 22096) | static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_flo...
function CYTHON_INLINE (line 22102) | static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_fl...
function CYTHON_INLINE (line 22108) | static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_fl...
function CYTHON_INLINE (line 22115) | static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_fl...
function CYTHON_INLINE (line 22135) | static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_fl...
function CYTHON_INLINE (line 22146) | static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_flo...
function CYTHON_INLINE (line 22152) | static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) {
function CYTHON_INLINE (line 22155) | static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_fl...
function CYTHON_INLINE (line 22162) | static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) {
function CYTHON_INLINE (line 22169) | static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_flo...
function CYTHON_INLINE (line 22227) | static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_...
function CYTHON_INLINE (line 22231) | static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_...
function CYTHON_INLINE (line 22236) | static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_...
function CYTHON_INLINE (line 22247) | static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __p...
function CYTHON_INLINE (line 22250) | static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_d...
function CYTHON_INLINE (line 22256) | static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_...
function CYTHON_INLINE (line 22262) | static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_...
function CYTHON_INLINE (line 22269) | static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_...
function CYTHON_INLINE (line 22289) | static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_...
function CYTHON_INLINE (line 22300) | static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_d...
function CYTHON_INLINE (line 22306) | static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) {
function CYTHON_INLINE (line 22309) | static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_...
function CYTHON_INLINE (line 22316) | static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) {
function CYTHON_INLINE (line 22323) | static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_d...
function __Pyx_memviewslice (line 22379) | static __Pyx_memviewslice
function CYTHON_INLINE (line 22446) | static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) {
function CYTHON_INLINE (line 22680) | static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
function __Pyx_check_binary_version (line 23110) | static int __Pyx_check_binary_version(void) {
function __Pyx_InitStrings (line 23126) | static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
function CYTHON_INLINE (line 23158) | static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_...
function CYTHON_INLINE (line 23161) | static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) {
function CYTHON_INLINE (line 23188) | static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObjec...
function CYTHON_INLINE (line 23230) | static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
function CYTHON_INLINE (line 23235) | static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) {
function PyObject (line 23242) | static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* resul...
function CYTHON_INLINE (line 23311) | static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
function CYTHON_INLINE (line 23373) | static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) {
function CYTHON_INLINE (line 23390) | static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) {
function CYTHON_INLINE (line 23393) | static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
FILE: TTS/tts/utils/speakers.py
class SpeakerManager (line 14) | class SpeakerManager(EmbeddingManager):
method __init__ (line 54) | def __init__(
method num_speakers (line 75) | def num_speakers(self):
method speaker_names (line 79) | def speaker_names(self):
method get_speakers (line 82) | def get_speakers(self) -> List:
method init_from_config (line 86) | def init_from_config(config: "Coqpit", samples: Union[List[List], List...
function _set_file_path (line 119) | def _set_file_path(path):
function load_speaker_mapping (line 132) | def load_speaker_mapping(out_path):
function save_speaker_mapping (line 142) | def save_speaker_mapping(out_path, speaker_mapping):
function get_speaker_manager (line 150) | def get_speaker_manager(c: Coqpit, data: List = None, restore_path: str ...
function get_speaker_balancer_weights (line 213) | def get_speaker_balancer_weights(items: list):
FILE: TTS/tts/utils/ssim.py
function _reduce (line 10) | def _reduce(x: torch.Tensor, reduction: str = "mean") -> torch.Tensor:
function _validate_input (line 26) | def _validate_input(
function gaussian_filter (line 69) | def gaussian_filter(kernel_size: int, sigma: float) -> torch.Tensor:
function ssim (line 87) | def ssim(
class SSIMLoss (line 155) | class SSIMLoss(_Loss):
method __init__ (line 212) | def __init__(
method forward (line 239) | def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
function _ssim_per_channel (line 266) | def _ssim_per_channel(
function _ssim_per_channel_complex (line 317) | def _ssim_per_channel_complex(
FILE: TTS/tts/utils/synthesis.py
function numpy_to_torch (line 8) | def numpy_to_torch(np_array, dtype, cuda=False):
function compute_style_mel (line 17) | def compute_style_mel(style_wav, ap, cuda=False):
function run_model_torch (line 24) | def run_model_torch(
function trim_silence (line 64) | def trim_silence(wav, ap):
function inv_spectrogram (line 68) | def inv_spectrogram(postnet_output, ap, CONFIG):
function id_to_torch (line 76) | def id_to_torch(aux_id, cuda=False):
function embedding_to_torch (line 85) | def embedding_to_torch(d_vector, cuda=False):
function apply_griffin_lim (line 96) | def apply_griffin_lim(inputs, input_lens, CONFIG, ap):
function synthesis (line 113) | def synthesis(
function transfer_voice (line 247) | def transfer_voice(
FILE: TTS/tts/utils/text/characters.py
function parse_symbols (line 7) | def parse_symbols():
class BaseVocabulary (line 38) | class BaseVocabulary:
method __init__ (line 47) | def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bo...
method pad_id (line 55) | def pad_id(self) -> int:
method blank_id (line 61) | def blank_id(self) -> int:
method vocab (line 67) | def vocab(self):
method vocab (line 72) | def vocab(self, vocab):
method init_from_config (line 81) | def init_from_config(config, **kwargs):
method num_chars (line 97) | def num_chars(self):
method char_to_id (line 101) | def char_to_id(self, char: str) -> int:
method id_to_char (line 108) | def id_to_char(self, idx: int) -> str:
class BaseCharacters (line 113) | class BaseCharacters:
method __init__ (line 148) | def __init__(
method pad_id (line 170) | def pad_id(self) -> int:
method blank_id (line 174) | def blank_id(self) -> int:
method characters (line 178) | def characters(self):
method characters (line 182) | def characters(self, characters):
method punctuations (line 187) | def punctuations(self):
method punctuations (line 191) | def punctuations(self, punctuations):
method pad (line 196) | def pad(self):
method pad (line 200) | def pad(self, pad):
method eos (line 205) | def eos(self):
method eos (line 209) | def eos(self, eos):
method bos (line 214) | def bos(self):
method bos (line 218) | def bos(self, bos):
method blank (line 223) | def blank(self):
method blank (line 227) | def blank(self, blank):
method vocab (line 232) | def vocab(self):
method vocab (line 236) | def vocab(self, vocab):
method num_chars (line 244) | def num_chars(self):
method _create_vocab (line 247) | def _create_vocab(self):
method char_to_id (line 265) | def char_to_id(self, char: str) -> int:
method id_to_char (line 271) | def id_to_char(self, idx: int) -> str:
method print_log (line 274) | def print_log(self, level: int = 0):
method init_from_config (line 289) | def init_from_config(config: "Coqpit"): # pylint: disable=unused-argu...
method to_config (line 302) | def to_config(self) -> "CharactersConfig":
class IPAPhonemes (line 315) | class IPAPhonemes(BaseCharacters):
method __init__ (line 347) | def __init__(
method init_from_config (line 361) | def init_from_config(config: "Coqpit"):
class Graphemes (line 393) | class Graphemes(BaseCharacters):
method __init__ (line 422) | def __init__(
method init_from_config (line 436) | def init_from_config(config: "Coqpit"):
FILE: TTS/tts/utils/text/chinese_mandarin/numbers.py
function _num2chinese (line 12) | def _num2chinese(num: str, big=False, simp=True, o=False, twoalt=False) ...
function _number_replace (line 104) | def _number_replace(match) -> str:
function replace_numbers_to_characters_in_text (line 117) | def replace_numbers_to_characters_in_text(text: str) -> str:
FILE: TTS/tts/utils/text/chinese_mandarin/phonemizer.py
function _chinese_character_to_pinyin (line 9) | def _chinese_character_to_pinyin(text: str) -> List[str]:
function _chinese_pinyin_to_phoneme (line 15) | def _chinese_pinyin_to_phoneme(pinyin: str) -> str:
function chinese_text_to_phonemes (line 22) | def chinese_text_to_phonemes(text: str, seperator: str = "|") -> str:
FILE: TTS/tts/utils/text/cleaners.py
function expand_abbreviations (line 19) | def expand_abbreviations(text, lang="en"):
function lowercase (line 29) | def lowercase(text):
function collapse_whitespace (line 33) | def collapse_whitespace(text):
function convert_to_ascii (line 37) | def convert_to_ascii(text):
function remove_aux_symbols (line 41) | def remove_aux_symbols(text):
function replace_symbols (line 46) | def replace_symbols(text, lang="en"):
function basic_cleaners (line 79) | def basic_cleaners(text):
function transliteration_cleaners (line 86) | def transliteration_cleaners(text):
function basic_german_cleaners (line 94) | def basic_german_cleaners(text):
function basic_turkish_cleaners (line 102) | def basic_turkish_cleaners(text):
function english_cleaners (line 110) | def english_cleaners(text):
function phoneme_cleaners (line 123) | def phoneme_cleaners(text):
function french_cleaners (line 133) | def french_cleaners(text):
function portuguese_cleaners (line 143) | def portuguese_cleaners(text):
function chinese_mandarin_cleaners (line 153) | def chinese_mandarin_cleaners(text: str) -> str:
function multilingual_cleaners (line 159) | def multilingual_cleaners(text):
FILE: TTS/tts/utils/text/cmudict.py
class CMUDict (line 93) | class CMUDict:
method __init__ (line 96) | def __init__(self, file_or_path, keep_ambiguous=True):
method __len__ (line 106) | def __len__(self):
method lookup (line 109) | def lookup(self, word):
method get_arpabet (line 114) | def get_arpabet(word, cmudict, punctuation_symbols):
function _parse_cmudict (line 131) | def _parse_cmudict(file):
function _get_pronunciation (line 146) | def _get_pronunciation(s):
FILE: TTS/tts/utils/text/english/number_norm.py
function _remove_commas (line 16) | def _remove_commas(m):
function _expand_decimal_point (line 20) | def _expand_decimal_point(m):
function __expand_currency (line 24) | def __expand_currency(value: str, inflection: Dict[float, str]) -> str:
function _expand_currency (line 42) | def _expand_currency(m: "re.Match") -> str:
function _expand_ordinal (line 74) | def _expand_ordinal(m):
function _expand_number (line 78) | def _expand_number(m):
function normalize_numbers (line 91) | def normalize_numbers(text):
FILE: TTS/tts/utils/text/english/time_norm.py
function _expand_num (line 18) | def _expand_num(n: int) -> str:
function _expand_time_english (line 22) | def _expand_time_english(match: "re.Match") -> str:
function expand_time_english (line 46) | def expand_time_english(text: str) -> str:
FILE: TTS/tts/utils/text/japanese/phonemizer.py
function _makerulemap (line 310) | def _makerulemap():
function kata2phoneme (line 318) | def kata2phoneme(text: str) -> str:
function hira2kata (line 345) | def hira2kata(text: str) -> str:
function text2kata (line 355) | def text2kata(text: str) -> str:
function japanese_convert_numbers_to_words (line 449) | def japanese_convert_numbers_to_words(text: str) -> str:
function japanese_convert_alpha_symbols_to_words (line 456) | def japanese_convert_alpha_symbols_to_words(text: str) -> str:
function japanese_text_to_phonemes (line 460) | def japanese_text_to_phonemes(text: str) -> str:
FILE: TTS/tts/utils/text/korean/korean.py
function normalize (line 8) | def normalize(text):
function normalize_with_dictionary (line 17) | def normalize_with_dictionary(text, dic):
function normalize_english (line 24) | def normalize_english(text):
FILE: TTS/tts/utils/text/korean/phonemizer.py
function korean_text_to_phonemes (line 8) | def korean_text_to_phonemes(text, character: str = "hangeul") -> str:
FILE: TTS/tts/utils/text/phonemizers/__init__.py
function get_phonemizer_by_name (line 33) | def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
FILE: TTS/tts/utils/text/phonemizers/base.py
class BasePhonemizer (line 7) | class BasePhonemizer(abc.ABC):
method __init__ (line 34) | def __init__(self, language, punctuations=Punctuation.default_puncs(),...
method _init_language (line 46) | def _init_language(self, language):
method language (line 57) | def language(self):
method name (line 63) | def name():
method is_available (line 69) | def is_available(cls):
method version (line 75) | def version(cls):
method supported_languages (line 81) | def supported_languages():
method is_supported_language (line 85) | def is_supported_language(self, language):
method _phonemize (line 90) | def _phonemize(self, text, separator):
method _phonemize_preprocess (line 93) | def _phonemize_preprocess(self, text) -> Tuple[List[str], List]:
method _phonemize_postprocess (line 107) | def _phonemize_postprocess(self, phonemized, punctuations) -> str:
method phonemize (line 116) | def phonemize(self, text: str, separator="|", language: str = None) ->...
method print_logs (line 137) | def print_logs(self, level: int = 0):
FILE: TTS/tts/utils/text/phonemizers/espeak_wrapper.py
function is_tool (line 12) | def is_tool(name):
function get_espeak_version (line 23) | def get_espeak_version():
function get_espeakng_version (line 30) | def get_espeakng_version():
function _espeak_exe (line 47) | def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
class ESpeak (line 83) | class ESpeak(BasePhonemizer):
method __init__ (line 112) | def __init__(self, language: str, backend=None, punctuations=Punctuati...
method backend (line 128) | def backend(self):
method backend_version (line 132) | def backend_version(self):
method backend (line 136) | def backend(self, backend):
method auto_set_espeak_lib (line 142) | def auto_set_espeak_lib(self) -> None:
method name (line 153) | def name():
method phonemize_espeak (line 156) | def phonemize_espeak(self, text: str, separator: str = "|", tie=False)...
method _phonemize (line 212) | def _phonemize(self, text, separator=None):
method supported_languages (line 216) | def supported_languages() -> Dict:
method version (line 238) | def version(self) -> str:
method is_available (line 251) | def is_available(cls):
FILE: TTS/tts/utils/text/phonemizers/gruut_wrapper.py
class Gruut (line 14) | class Gruut(BasePhonemizer):
method __init__ (line 41) | def __init__(
method name (line 54) | def name():
method phonemize_gruut (line 57) | def phonemize_gruut(self, text: str, separator: str = "|", tie=False) ...
method _phonemize (line 109) | def _phonemize(self, text, separator):
method is_supported_language (line 112) | def is_supported_language(self, language):
method supported_languages (line 117) | def supported_languages() -> List:
method version (line 125) | def version(self):
method is_available (line 134) | def is_available(cls):
FILE: TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py
function trans (line 11) | def trans(text):
class JA_JP_Phonemizer (line 17) | class JA_JP_Phonemizer(BasePhonemizer):
method __init__ (line 33) | def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwar...
method name (line 37) | def name():
method _phonemize (line 40) | def _phonemize(self, text: str, separator: str = "|") -> str:
method phonemize (line 46) | def phonemize(self, text: str, separator="|", language=None) -> str:
method supported_languages (line 54) | def supported_languages() -> Dict:
method version (line 57) | def version(self) -> str:
method is_available (line 60) | def is_available(self) -> bool:
FILE: TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py
class KO_KR_Phonemizer (line 9) | class KO_KR_Phonemizer(BasePhonemizer):
method __init__ (line 30) | def __init__(self, punctuations=_DEF_KO_PUNCS, keep_puncs=True, **kwar...
method name (line 34) | def name():
method _phonemize (line 37) | def _phonemize(self, text: str, separator: str = "", character: str = ...
method phonemize (line 43) | def phonemize(self, text: str, separator: str = "", character: str = "...
method supported_languages (line 47) | def supported_languages() -> Dict:
method version (line 50) | def version(self) -> str:
method is_available (line 53) | def is_available(self) -> bool:
FILE: TTS/tts/utils/text/phonemizers/multi_phonemizer.py
class MultiPhonemizer (line 6) | class MultiPhonemizer:
method __init__ (line 19) | def __init__(self, lang_to_phonemizer_name: Dict = {}) -> None: # pyl...
method init_phonemizers (line 29) | def init_phonemizers(lang_to_phonemizer_name: Dict) -> Dict:
method name (line 36) | def name():
method phonemize (line 39) | def phonemize(self, text, separator="|", language=""):
method supported_languages (line 44) | def supported_languages(self) -> List:
method print_logs (line 47) | def print_logs(self, level: int = 0):
FILE: TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py
class ZH_CN_Phonemizer (line 9) | class ZH_CN_Phonemizer(BasePhonemizer):
method __init__ (line 28) | def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwa...
method name (line 32) | def name():
method phonemize_zh_cn (line 36) | def phonemize_zh_cn(text: str, separator: str = "|") -> str:
method _phonemize (line 40) | def _phonemize(self, text, separator):
method supported_languages (line 44) | def supported_languages() -> Dict:
method version (line 47) | def version(self) -> str:
method is_available (line 50) | def is_available(self) -> bool:
FILE: TTS/tts/utils/text/punctuation.py
class PuncPosition (line 12) | class PuncPosition(Enum):
class Punctuation (line 21) | class Punctuation:
method __init__ (line 43) | def __init__(self, puncs: str = _DEF_PUNCS):
method default_puncs (line 47) | def default_puncs():
method puncs (line 52) | def puncs(self):
method puncs (line 56) | def puncs(self, value):
method strip (line 62) | def strip(self, text):
method strip_to_restore (line 74) | def strip_to_restore(self, text):
method _strip_to_restore (line 88) | def _strip_to_restore(self, text):
method restore (line 118) | def restore(cls, text, puncs):
method _restore (line 133) | def _restore(cls, text, puncs, num): # pylint: disable=too-many-retur...
FILE: TTS/tts/utils/text/tokenizer.py
class TTSTokenizer (line 10) | class TTSTokenizer:
method __init__ (line 38) | def __init__(
method characters (line 56) | def characters(self):
method characters (line 60) | def characters(self, new_characters):
method encode (line 65) | def encode(self, text: str) -> List[int]:
method decode (line 80) | def decode(self, token_ids: List[int]) -> str:
method text_to_ids (line 87) | def text_to_ids(self, text: str, language: str = None) -> List[int]: ...
method ids_to_text (line 117) | def ids_to_text(self, id_sequence: List[int]) -> str:
method pad_with_bos_eos (line 121) | def pad_with_bos_eos(self, char_sequence: List[str]):
method intersperse_blank_char (line 125) | def intersperse_blank_char(self, char_sequence: List[str], use_blank_c...
method print_logs (line 135) | def print_logs(self, level: int = 0):
method init_from_config (line 149) | def init_from_config(config: "Coqpit", characters: "BaseCharacters" = ...
FILE: TTS/tts/utils/visual.py
function plot_alignment (line 11) | def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, ...
function plot_spectrogram (line 36) | def plot_spectrogram(spectrogram, ap=None, fig_size=(16, 10), output_fig...
function plot_pitch (line 53) | def plot_pitch(pitch, spectrogram, ap=None, fig_size=(30, 10), output_fi...
function plot_avg_pitch (line 93) | def plot_avg_pitch(pitch, chars, fig_size=(30, 10), output_fig=False):
function plot_avg_energy (line 126) | def plot_avg_energy(energy, chars, fig_size=(30, 10), output_fig=False):
function visualize (line 159) | def visualize(
FILE: TTS/utils/audio/numpy_transforms.py
function build_mel_basis (line 13) | def build_mel_basis(
function millisec_to_length (line 33) | def millisec_to_length(
function _log (line 48) | def _log(x, base):
function _exp (line 54) | def _exp(x, base):
function amp_to_db (line 60) | def amp_to_db(*, x: np.ndarray = None, gain: float = 1, base: int = 10, ...
function db_to_amp (line 76) | def db_to_amp(*, x: np.ndarray = None, gain: float = 1, base: int = 10, ...
function preemphasis (line 90) | def preemphasis(*, x: np.ndarray, coef: float = 0.97, **kwargs) -> np.nd...
function deemphasis (line 107) | def deemphasis(*, x: np.ndarray = None, coef: float = 0.97, **kwargs) ->...
function spec_to_mel (line 114) | def spec_to_mel(*, spec: np.ndarray, mel_basis: np.ndarray = None, **kwa...
function mel_to_spec (line 129) | def mel_to_spec(*, mel: np.ndarray = None, mel_basis: np.ndarray = None,...
function wav_to_spec (line 136) | def wav_to_spec(*, wav: np.ndarray = None, **kwargs) -> np.ndarray:
function wav_to_mel (line 150) | def wav_to_mel(*, wav: np.ndarray = None, mel_basis=None, **kwargs) -> n...
function spec_to_wav (line 157) | def spec_to_wav(*, spec: np.ndarray, power: float = 1.5, **kwargs) -> np...
function mel_to_wav (line 163) | def mel_to_wav(*, mel: np.ndarray = None, power: float = 1.5, **kwargs) ...
function stft (line 171) | def stft(
function istft (line 200) | def istft(
function griffin_lim (line 220) | def griffin_lim(*, spec: np.ndarray = None, num_iter=60, **kwargs) -> np...
function compute_stft_paddings (line 233) | def compute_stft_paddings(
function compute_f0 (line 244) | def compute_f0(
function compute_energy (line 306) | def compute_energy(y: np.ndarray, **kwargs) -> np.ndarray:
function find_endpoint (line 328) | def find_endpoint(
function trim_silence (line 359) | def trim_silence(
function volume_norm (line 374) | def volume_norm(*, x: np.ndarray = None, coef: float = 0.95, **kwargs) -...
function rms_norm (line 387) | def rms_norm(*, wav: np.ndarray = None, db_level: float = -27.0, **kwarg...
function rms_volume_norm (line 393) | def rms_volume_norm(*, x: np.ndarray, db_level: float = -27.0, **kwargs)...
function load_wav (line 408) | def load_wav(*, filename: str, sample_rate: int = None, resample: bool =...
function save_wav (line 430) | def save_wav(*, wav: np.ndarray, path: str, sample_rate: int = None, **k...
function mulaw_encode (line 442) | def mulaw_encode(*, wav: np.ndarray, mulaw_qc: int, **kwargs) -> np.ndar...
function mulaw_decode (line 451) | def mulaw_decode(*, wav, mulaw_qc: int, **kwargs) -> np.ndarray:
function encode_16bits (line 458) | def encode_16bits(*, x: np.ndarray, **kwargs) -> np.ndarray:
function quantize (line 462) | def quantize(*, x: np.ndarray, quantize_bits: int, **kwargs) -> np.ndarray:
function dequantize (line 475) | def dequantize(*, x, quantize_bits, **kwargs) -> np.ndarray:
FILE: TTS/utils/audio/processor.py
class AudioProcessor (line 15) | class AudioProcessor(object):
method __init__ (line 124) | def __init__(
method init_from_config (line 229) | def init_from_config(config: "Coqpit", verbose=True):
method _build_mel_basis (line 235) | def _build_mel_basis(
method _stft_parameters (line 249) | def _stft_parameters(
method normalize (line 264) | def normalize(self, S: np.ndarray) -> np.ndarray:
method denormalize (line 305) | def denormalize(self, S: np.ndarray) -> np.ndarray:
method load_stats (line 344) | def load_stats(self, stats_path: str) -> Tuple[np.array, np.array, np....
method setup_scaler (line 372) | def setup_scaler(
method _amp_to_db (line 390) | def _amp_to_db(self, x: np.ndarray) -> np.ndarray:
method _db_to_amp (line 402) | def _db_to_amp(self, x: np.ndarray) -> np.ndarray:
method apply_preemphasis (line 414) | def apply_preemphasis(self, x: np.ndarray) -> np.ndarray:
method apply_inv_preemphasis (line 430) | def apply_inv_preemphasis(self, x: np.ndarray) -> np.ndarray:
method _linear_to_mel (line 437) | def _linear_to_mel(self, spectrogram: np.ndarray) -> np.ndarray:
method _mel_to_linear (line 448) | def _mel_to_linear(self, mel_spec: np.ndarray) -> np.ndarray:
method spectrogram (line 452) | def spectrogram(self, y: np.ndarray) -> np.ndarray:
method melspectrogram (line 471) | def melspectrogram(self, y: np.ndarray) -> np.ndarray:
method inv_spectrogram (line 483) | def inv_spectrogram(self, spectrogram: np.ndarray) -> np.ndarray:
method inv_melspectrogram (line 492) | def inv_melspectrogram(self, mel_spectrogram: np.ndarray) -> np.ndarray:
method out_linear_to_mel (line 501) | def out_linear_to_mel(self, linear_spec: np.ndarray) -> np.ndarray:
method _stft (line 518) | def _stft(self, y: np.ndarray) -> np.ndarray:
method _istft (line 537) | def _istft(self, y: np.ndarray) -> np.ndarray:
method _griffin_lim (line 541) | def _griffin_lim(self, S):
method compute_stft_paddings (line 553) | def compute_stft_paddings(self, x, pad_sides=1):
method compute_f0 (line 562) | def compute_f0(self, x: np.ndarray) -> np.ndarray:
method find_endpoint (line 600) | def find_endpoint(self, wav: np.ndarray, min_silence_sec=0.8) -> int:
method trim_silence (line 619) | def trim_silence(self, wav):
method sound_norm (line 628) | def sound_norm(x: np.ndarray) -> np.ndarray:
method _rms_norm (line 640) | def _rms_norm(wav, db_level=-27):
method rms_volume_norm (line 645) | def rms_volume_norm(self, x: np.ndarray, db_level: float = None) -> np...
method load_wav (line 661) | def load_wav(self, filename: str, sr: int = None) -> np.ndarray:
method save_wav (line 693) | def save_wav(self, wav: np.ndarray, path: str, sr: int = None) -> None:
method get_duration (line 708) | def get_duration(self, filename: str) -> float:
method mulaw_encode (line 717) | def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray:
method mulaw_decode (line 728) | def mulaw_decode(wav, qc):
method encode_16bits (line 735) | def encode_16bits(x):
method quantize (line 739) | def quantize(x: np.ndarray, bits: int) -> np.ndarray:
method dequantize (line 752) | def dequantize(x, bits):
function _log (line 757) | def _log(x, base):
function _exp (line 763) | def _exp(x, base):
FILE: TTS/utils/audio/torch_transforms.py
class TorchSTFT (line 6) | class TorchSTFT(nn.Module): # pylint: disable=abstract-method
method __init__ (line 64) | def __init__(
method __call__ (line 102) | def __call__(self, x):
method _build_mel_basis (line 145) | def _build_mel_basis(self):
method _amp_to_db (line 158) | def _amp_to_db(x, spec_gain=1.0):
method _db_to_amp (line 162) | def _db_to_amp(x, spec_gain=1.0):
FILE: TTS/utils/callbacks.py
class TrainerCallback (line 1) | class TrainerCallback:
method on_init_start (line 3) | def on_init_start(trainer) -> None:
method on_init_end (line 18) | def on_init_end(trainer) -> None:
method on_epoch_start (line 33) | def on_epoch_start(trainer) -> None:
method on_epoch_end (line 48) | def on_epoch_end(trainer) -> None:
method on_train_step_start (line 63) | def on_train_step_start(trainer) -> None:
method on_train_step_end (line 78) | def on_train_step_end(trainer) -> None:
method on_keyboard_interrupt (line 93) | def on_keyboard_interrupt(trainer) -> None:
FILE: TTS/utils/capacitron_optimizer.py
class CapacitronOptimizer (line 6) | class CapacitronOptimizer:
method __init__ (line 9) | def __init__(self, config: dict, model_params: Generator) -> None:
method first_step (line 31) | def first_step(self):
method step (line 36) | def step(self):
method zero_grad (line 41) | def zero_grad(self, set_to_none=False):
method load_state_dict (line 45) | def load_state_dict(self, state_dict):
method state_dict (line 49) | def state_dict(self):
method split_model_parameters (line 53) | def split_model_parameters(model_params: Generator) -> list:
method extract_optimizer_parameters (line 65) | def extract_optimizer_parameters(params: dict) -> dict:
FILE: TTS/utils/distribute.py
function reduce_tensor (line 6) | def reduce_tensor(tensor, num_gpus):
function init_distributed (line 13) | def init_distributed(rank, num_gpus, group_name, dist_backend, dist_url):
FILE: TTS/utils/download.py
function stream_url (line 16) | def stream_url(
function download_url (line 56) | def download_url(
function validate_file (line 109) | def validate_file(file_obj: Any, hash_value: str, hash_type: str = "sha2...
function extract_archive (line 138) | def extract_archive(from_path: str, to_path: Optional[str] = None, overw...
function download_kaggle_dataset (line 188) | def download_kaggle_dataset(dataset_path: str, dataset_name: str, output...
FILE: TTS/utils/downloaders.py
function download_ljspeech (line 7) | def download_ljspeech(path: str):
function download_vctk (line 22) | def download_vctk(path: str, use_kaggle: Optional[bool] = False):
function download_tweb (line 42) | def download_tweb(path: str):
function download_libri_tts (line 51) | def download_libri_tts(path: str, subset: Optional[str] = "all"):
function download_thorsten_de (line 90) | def download_thorsten_de(path: str):
function download_mailabs (line 105) | def download_mailabs(path: str, language: str = "english"):
FILE: TTS/utils/generic_utils.py
function to_cuda (line 15) | def to_cuda(x: torch.Tensor) -> torch.Tensor:
function get_cuda (line 25) | def get_cuda():
function get_git_branch (line 31) | def get_git_branch():
function get_commit_hash (line 43) | def get_commit_hash():
function get_experiment_folder_path (line 59) | def get_experiment_folder_path(root_path, model_name):
function remove_experiment_folder (line 67) | def remove_experiment_folder(experiment_path):
function count_parameters (line 79) | def count_parameters(model):
function to_camel (line 84) | def to_camel(text):
function find_module (line 92) | def find_module(module_path: str, module_name: str) -> object:
function import_class (line 99) | def import_class(module_path: str) -> object:
function get_import_path (line 114) | def get_import_path(obj: object) -> str:
function get_user_data_dir (line 126) | def get_user_data_dir(appname):
function set_init_dict (line 142) | def set_init_dict(model_dict, checkpoint_state, c):
function format_aux_input (line 161) | def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict:
class KeepAverage (line 178) | class KeepAverage:
method __init__ (line 179) | def __init__(self):
method __getitem__ (line 183) | def __getitem__(self, key):
method items (line 186) | def items(self):
method add_value (line 189) | def add_value(self, name, init_val=0, init_iter=0):
method update_value (line 193) | def update_value(self, name, value, weighted_avg=False):
method add_values (line 207) | def add_values(self, name_dict):
method update_values (line 211) | def update_values(self, value_dict):
FILE: TTS/utils/io.py
class RenamingUnpickler (line 15) | class RenamingUnpickler(pickle_tts.Unpickler):
method find_class (line 18) | def find_class(self, module, name):
class AttrDict (line 22) | class AttrDict(dict):
method __init__ (line 26) | def __init__(self, *args, **kwargs):
function copy_model_files (line 31) | def copy_model_files(config: Coqpit, out_path, new_fields=None):
function load_fsspec (line 59) | def load_fsspec(
function load_checkpoint (line 89) | def load_checkpoint(
function save_fsspec (line 105) | def save_fsspec(state: Any, path: str, **kwargs):
function save_model (line 117) | def save_model(config, model, optimizer, scaler, current_step, epoch, ou...
function save_checkpoint (line 150) | def save_checkpoint(
function save_best_model (line 175) | def save_best_model(
FILE: TTS/utils/manage.py
class ModelManager (line 26) | class ModelManager(object):
method __init__ (line 41) | def __init__(self, models_file=None, output_prefix=None, progress_bar=...
method read_models_file (line 57) | def read_models_file(self, file_path):
method _list_models (line 66) | def _list_models(self, model_type, model_count=0):
method _list_for_model_type (line 84) | def _list_for_model_type(self, model_type):
method list_models (line 91) | def list_models(self):
method model_info_by_idx (line 99) | def model_info_by_idx(self, model_query):
method model_info_by_full_name (line 140) | def model_info_by_full_name(self, model_query_name):
method list_tts_models (line 174) | def list_tts_models(self):
method list_vocoder_models (line 181) | def list_vocoder_models(self):
method list_vc_models (line 188) | def list_vc_models(self):
method list_langs (line 195) | def list_langs(self):
method list_datasets (line 202) | def list_datasets(self):
method print_model_license (line 211) | def print_model_license(model_item: Dict):
method download_model (line 226) | def download_model(self, model_name):
method _find_files (line 262) | def _find_files(output_path: str) -> Tuple[str, str]:
method _find_speaker_encoder (line 285) | def _find_speaker_encoder(output_path: str) -> str:
method _update_paths (line 300) | def _update_paths(self, output_path: str, config_path: str) -> None:
method _update_path (line 337) | def _update_path(field_name, new_path, config_path):
method _download_zip_file (line 365) | def _download_zip_file(file_url, output_folder, progress_bar):
method _check_dict_key (line 397) | def _check_dict_key(my_dict, key):
FILE: TTS/utils/radam.py
class RAdam (line 9) | class RAdam(Optimizer):
method __init__ (line 10) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weig...
method __setstate__ (line 30) | def __setstate__(self, state): # pylint: disable=useless-super-delega...
method step (line 33) | def step(self, closure=None):
FILE: TTS/utils/samplers.py
class SubsetSampler (line 8) | class SubsetSampler(Sampler):
method __init__ (line 16) | def __init__(self, indices):
method __iter__ (line 20) | def __iter__(self):
method __len__ (line 23) | def __len__(self):
class PerfectBatchSampler (line 27) | class PerfectBatchSampler(Sampler):
method __init__ (line 40) | def __init__(
method __iter__ (line 74) | def __iter__(self):
method __len__ (line 113) | def __len__(self):
function identity (line 118) | def identity(x):
class SortedSampler (line 122) | class SortedSampler(Sampler):
method __init__ (line 138) | def __init__(self, data, sort_key: Callable = identity):
method __iter__ (line 146) | def __iter__(self):
method __len__ (line 149) | def __len__(self):
class BucketBatchSampler (line 153) | class BucketBatchSampler(BatchSampler):
method __init__ (line 173) | def __init__(
method __iter__ (line 190) | def __iter__(self):
method __len__ (line 198) | def __len__(self):
FILE: TTS/utils/synthesizer.py
class Synthesizer (line 21) | class Synthesizer(object):
method __init__ (line 22) | def __init__(
method _get_segmenter (line 98) | def _get_segmenter(lang: str):
method _load_vc (line 109) | def _load_vc(self, vc_checkpoint: str, vc_config_path: str, use_cuda: ...
method _load_tts (line 129) | def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cud...
method _set_speaker_encoder_paths_from_tts_config (line 160) | def _set_speaker_encoder_paths_from_tts_config(self):
method _load_vocoder (line 168) | def _load_vocoder(self, model_file: str, model_config: str, use_cuda: ...
method split_into_sentences (line 188) | def split_into_sentences(self, text) -> List[str]:
method save_wav (line 199) | def save_wav(self, wav: List[int], path: str) -> None:
method voice_conversion (line 209) | def voice_conversion(self, source_wav: str, target_wav: str) -> List[i...
method tts (line 213) | def tts(
FILE: TTS/utils/training.py
function check_update (line 5) | def check_update(model, grad_clip, ignore_stopnet=False, amp_opt_params=...
function gradual_training_scheduler (line 33) | def gradual_training_scheduler(global_step, config):
FILE: TTS/utils/vad.py
function read_audio (line 6) | def read_audio(path):
function resample_wav (line 15) | def resample_wav(wav, sr, new_sr):
function map_timestamps_to_new_sr (line 22) | def map_timestamps_to_new_sr(vad_sr, new_sr, timestamps, just_begging_en...
function get_vad_model_and_utils (line 38) | def get_vad_model_and_utils(use_cuda=False):
function remove_silence (line 47) | def remove_silence(
FILE: TTS/vc/configs/shared_configs.py
class BaseVCConfig (line 10) | class BaseVCConfig(BaseTrainingConfig):
FILE: TTS/vc/models/__init__.py
function to_camel (line 6) | def to_camel(text):
function setup_model (line 11) | def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]]...
FILE: TTS/vc/models/base_vc.py
class BaseVC (line 24) | class BaseVC(BaseTrainerModel):
method __init__ (line 32) | def __init__(
method _set_model_args (line 46) | def _set_model_args(self, config: Coqpit):
method init_multispeaker (line 67) | def init_multispeaker(self, config: Coqpit, data: List = None):
method get_aux_input (line 100) | def get_aux_input(self, **kwargs) -> Dict:
method get_aux_input_from_test_sentences (line 104) | def get_aux_input_from_test_sentences(self, sentence_info):
method format_batch (line 151) | def format_batch(self, batch: Dict) -> Dict:
method get_sampler (line 230) | def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1):
method get_data_loader (line 268) | def get_data_loader(
method _get_test_aux_input (line 351) | def _get_test_aux_input(
method test_run (line 368) | def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
method on_init_start (line 408) | def on_init_start(self, trainer):
FILE: TTS/vc/models/freevc.py
class ResidualCouplingBlock (line 25) | class ResidualCouplingBlock(nn.Module):
method __init__ (line 26) | def __init__(self, channels, hidden_channels, kernel_size, dilation_ra...
method forward (line 51) | def forward(self, x, x_mask, g=None, reverse=False):
class Encoder (line 61) | class Encoder(nn.Module):
method __init__ (line 62) | def __init__(
method forward (line 78) | def forward(self, x, x_lengths, g=None):
class Generator (line 88) | class Generator(torch.nn.Module):
method __init__ (line 89) | def __init__(
method forward (line 132) | def forward(self, x, g=None):
method remove_weight_norm (line 153) | def remove_weight_norm(self):
class DiscriminatorP (line 161) | class DiscriminatorP(torch.nn.Module):
method __init__ (line 162) | def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=...
method forward (line 178) | def forward(self, x):
class DiscriminatorS (line 200) | class DiscriminatorS(torch.nn.Module):
method __init__ (line 201) | def __init__(self, use_spectral_norm=False):
method forward (line 216) | def forward(self, x):
class MultiPeriodDiscriminator (line 230) | class MultiPeriodDiscriminator(torch.nn.Module):
method __init__ (line 231) | def __init__(self, use_spectral_norm=False):
method forward (line 239) | def forward(self, y, y_hat):
class SpeakerEncoder (line 255) | class SpeakerEncoder(torch.nn.Module):
method __init__ (line 256) | def __init__(self, mel_n_channels=80, model_num_layers=3, model_hidden...
method forward (line 262) | def forward(self, mels):
method compute_partial_slices (line 268) | def compute_partial_slices(self, total_frames, partial_frames, partial...
method embed_utterance (line 276) | def embed_utterance(self, mel, partial_frames=128, partial_hop=64):
class FreeVCAudioConfig (line 298) | class FreeVCAudioConfig(Coqpit):
class FreeVCArgs (line 342) | class FreeVCArgs(Coqpit):
class FreeVC (line 427) | class FreeVC(BaseVC):
method __init__ (line 455) | def __init__(self, config: Coqpit, speaker_manager: SpeakerManager = N...
method device (line 504) | def device(self):
method load_pretrained_speaker_encoder (line 507) | def load_pretrained_speaker_encoder(self):
method init_multispeaker (line 514) | def init_multispeaker(self, config: Coqpit):
method forward (line 528) | def forward(
method inference (line 587) | def inference(self, c, g=None, mel=None, c_lengths=None):
method extract_wavlm_features (line 610) | def extract_wavlm_features(self, y):
method load_audio (line 622) | def load_audio(self, wav):
method voice_conversion (line 635) | def voice_conversion(self, src, tgt):
method eval_step (line 676) | def eval_step():
method init_from_config (line 680) | def init_from_config(config: "VitsConfig", samples: Union[List[List], ...
method load_checkpoint (line 684) | def load_checkpoint(self, config, checkpoint_path, eval=False, strict=...
method train_step (line 690) | def train_step():
class FreeVCConfig (line 695) | class FreeVCConfig(BaseVCConfig):
method __post_init__ (line 830) | def __post_init__(self):
FILE: TTS/vc/modules/freevc/commons.py
function init_weights (line 9) | def init_weights(m, mean=0.0, std=0.01):
function get_padding (line 15) | def get_padding(kernel_size, dilation=1):
function convert_pad_shape (line 19) | def convert_pad_shape(pad_shape):
function intersperse (line 25) | def intersperse(lst, item):
function kl_divergence (line 31) | def kl_divergence(m_p, logs_p, m_q, logs_q):
function rand_gumbel (line 38) | def rand_gumbel(shape):
function rand_gumbel_like (line 44) | def rand_gumbel_like(x):
function slice_segments (line 49) | def slice_segments(x, ids_str, segment_size=4):
function rand_slice_segments (line 58) | def rand_slice_segments(x, x_lengths=None, segment_size=4):
function rand_spec_segments (line 68) | def rand_spec_segments(x, x_lengths=None, segment_size=4):
function get_timing_signal_1d (line 78) | def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timesc...
function add_timing_signal_1d (line 92) | def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4):
function cat_timing_signal_1d (line 98) | def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis...
function subsequent_mask (line 104) | def subsequent_mask(length):
function fused_add_tanh_sigmoid_multiply (line 110) | def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
function convert_pad_shape (line 119) | def convert_pad_shape(pad_shape):
function shift_1d (line 125) | def shift_1d(x):
function sequence_mask (line 130) | def sequence_mask(length, max_length=None):
function generate_path (line 137) | def generate_path(duration, mask):
function clip_grad_value_ (line 155) | def clip_grad_value_(parameters, clip_value, norm_type=2):
FILE: TTS/vc/modules/freevc/mel_processing.py
function dynamic_range_compression_torch (line 8) | def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
function dynamic_range_decompression_torch (line 17) | def dynamic_range_decompression_torch(x, C=1):
function spectral_normalize_torch (line 26) | def spectral_normalize_torch(magnitudes):
function spectral_de_normalize_torch (line 31) | def spectral_de_normalize_torch(magnitudes):
function spectrogram_torch (line 40) | def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, cente...
function spec_to_mel_torch (line 74) | def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax):
function mel_spectrogram_torch (line 86) | def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, w...
FILE: TTS/vc/modules/freevc/modules.py
class LayerNorm (line 18) | class LayerNorm(nn.Module):
method __init__ (line 19) | def __init__(self, channels, eps=1e-5):
method forward (line 27) | def forward(self, x):
class ConvReluNorm (line 33) | class ConvReluNorm(nn.Module):
method __init__ (line 34) | def __init__(self, in_channels, hidden_channels, out_channels, kernel_...
method forward (line 56) | def forward(self, x, x_mask):
class DDSConv (line 66) | class DDSConv(nn.Module):
method __init__ (line 71) | def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0):
method forward (line 93) | def forward(self, x, x_mask, g=None):
class WN (line 108) | class WN(torch.nn.Module):
method __init__ (line 109) | def __init__(self, hidden_channels, kernel_size, dilation_rate, n_laye...
method forward (line 146) | def forward(self, x, x_mask, g=None, **kwargs):
method remove_weight_norm (line 173) | def remove_weight_norm(self):
class ResBlock1 (line 182) | class ResBlock1(torch.nn.Module):
method __init__ (line 183) | def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
method forward (line 236) | def forward(self, x, x_mask=None):
method remove_weight_norm (line 251) | def remove_weight_norm(self):
class ResBlock2 (line 258) | class ResBlock2(torch.nn.Module):
method __init__ (line 259) | def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
method forward (line 287) | def forward(self, x, x_mask=None):
method remove_weight_norm (line 298) | def remove_weight_norm(self):
class Log (line 303) | class Log(nn.Module):
method forward (line 304) | def forward(self, x, x_mask, reverse=False, **kwargs):
class Flip (line 314) | class Flip(nn.Module):
method forward (line 315) | def forward(self, x, *args, reverse=False, **kwargs):
class ElementwiseAffine (line 324) | class ElementwiseAffine(nn.Module):
method __init__ (line 325) | def __init__(self, channels):
method forward (line 331) | def forward(self, x, x_mask, reverse=False, **kwargs):
class ResidualCouplingLayer (line 342) | class ResidualCouplingLayer(nn.Module):
method __init__ (line 343) | def __init__(
method forward (line 372) | def forward(self, x, x_mask, g=None, reverse=False):
FILE: TTS/vc/modules/freevc/speaker_encoder/audio.py
function preprocess_wav (line 15) | def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray], source_sr...
function wav_to_mel_spectrogram (line 44) | def wav_to_mel_spectrogram(wav):
function normalize_volume (line 59) | def normalize_volume(wav, target_dBFS, increase_only=False, decrease_onl...
FILE: TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py
class SpeakerEncoder (line 14) | class SpeakerEncoder(nn.Module):
method __init__ (line 15) | def __init__(self, weights_fpath, device: Union[str, torch.device] = N...
method forward (line 50) | def forward(self, mels: torch.FloatTensor):
method compute_partial_slices (line 65) | def compute_partial_slices(n_samples: int, rate, min_coverage):
method embed_utterance (line 118) | def embed_utterance(self, wav: np.ndarray, return_partials=False, rate...
method embed_speaker (line 165) | def embed_speaker(self, wavs: List[np.ndarray], **kwargs):
FILE: TTS/vc/modules/freevc/wavlm/__init__.py
function get_wavlm (line 12) | def get_wavlm(device="cpu"):
FILE: TTS/vc/modules/freevc/wavlm/modules.py
class TransposeLast (line 20) | class TransposeLast(nn.Module):
method __init__ (line 21) | def __init__(self, deconstruct_idx=None):
method forward (line 25) | def forward(self, x):
class Fp32LayerNorm (line 31) | class Fp32LayerNorm(nn.LayerNorm):
method __init__ (line 32) | def __init__(self, *args, **kwargs):
method forward (line 35) | def forward(self, input):
class Fp32GroupNorm (line 46) | class Fp32GroupNorm(nn.GroupNorm):
method __init__ (line 47) | def __init__(self, *args, **kwargs):
method forward (line 50) | def forward(self, input):
class GradMultiply (line 61) | class GradMultiply(torch.autograd.Function):
method forward (line 63) | def forward(ctx, x, scale):
method backward (line 69) | def backward(ctx, grad):
class SamePad (line 73) | class SamePad(nn.Module):
method __init__ (line 74) | def __init__(self, kernel_size, causal=False):
method forward (line 81) | def forward(self, x):
class Swish (line 87) | class Swish(nn.Module):
method __init__ (line 90) | def __init__(self):
method forward (line 95) | def forward(self, x):
class GLU_Linear (line 99) | class GLU_Linear(nn.Module):
method __init__ (line 100) | def __init__(self, input_dim, output_dim, glu_type="sigmoid", bias_in_...
method forward (line 120) | def forward(self, x):
function gelu_accurate (line 132) | def gelu_accurate(x):
function gelu (line 138) | def gelu(x: torch.Tensor) -> torch.Tensor:
function get_activation_fn (line 142) | def get_activation_fn(activation: str):
function init_bert_params (line 164) | def init_bert_params(module):
function quant_noise (line 197) | def quant_noise(module, p, block_size):
class MultiheadAttention (line 283) | class MultiheadAttention(nn.Module):
method __init__ (line 289) | def __init__(
method reset_parameters (line 365) | def reset_parameters(self):
method _relative_positions_bucket (line 387) | def _relative_positions_bucket(self, relative_positions, bidirectional...
method compute_bias (line 414) | def compute_bias(self, query_length, key_length):
method forward (line 424) | def forward(
method _append_prev_key_padding_mask (line 713) | def _append_prev_key_padding_mask(
method _get_input_buffer (line 750) | def _get_input_buffer(
method _set_input_buffer (line 760) | def _set_input_buffer(
method apply_sparse_mask (line 767) | def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, ...
FILE: TTS/vc/modules/freevc/wavlm/wavlm.py
function compute_mask_indices (line 35) | def compute_mask_indices(
class WavLMConfig (line 156) | class WavLMConfig:
method __init__ (line 157) | def __init__(self, cfg=None):
method update (line 214) | def update(self, cfg: dict):
class WavLM (line 218) | class WavLM(nn.Module):
method __init__ (line 219) | def __init__(
method apply_mask (line 265) | def apply_mask(self, x, padding_mask):
method forward_padding_mask (line 300) | def forward_padding_mask(
method extract_features (line 313) | def extract_features(
class ConvFeatureExtractionModel (line 363) | class ConvFeatureExtractionModel(nn.Module):
method __init__ (line 364) | def __init__(
method forward (line 460) | def forward(self, x, mask=None):
class TransformerEncoder (line 481) | class TransformerEncoder(nn.Module):
method __init__ (line 482) | def __init__(self, args):
method forward (line 538) | def forward(self, x, padding_mask=None, streaming_mask=None, layer=None):
method extract_features (line 546) | def extract_features(self, x, padding_mask=None, streaming_mask=None, ...
class TransformerSentenceEncoderLayer (line 593) | class TransformerSentenceEncoderLayer(nn.Module):
method __init__ (line 599) | def __init__(
method forward (line 654) | def forward(
FILE: TTS/vocoder/configs/fullband_melgan_config.py
class FullbandMelganConfig (line 7) | class FullbandMelganConfig(BaseGANVocoderConfig):
FILE: TTS/vocoder/configs/hifigan_config.py
class HifiganConfig (line 7) | class HifiganConfig(BaseGANVocoderConfig):
FILE: TTS/vocoder/configs/melgan_config.py
class MelganConfig (line 7) | class MelganConfig(BaseGANVocoderConfig):
FILE: TTS/vocoder/configs/multiband_melgan_config.py
class MultibandMelganConfig (line 7) | class MultibandMelganConfig(BaseGANVocoderConfig):
FILE: TTS/vocoder/configs/parallel_wavegan_config.py
class ParallelWaveganConfig (line 7) | class ParallelWaveganConfig(BaseGANVocoderConfig):
FILE: TTS/vocoder/configs/shared_configs.py
class BaseVocoderConfig (line 7) | class BaseVocoderConfig(BaseTrainingConfig):
class BaseGANVocoderConfig (line 60) | class BaseGANVocoderConfig(BaseVocoderConfig):
FILE: TTS/vocoder/configs/univnet_config.py
class UnivnetConfig (line 8) | class UnivnetConfig(BaseGANVocoderConfig):
method __post_init__ (line 159) | def __post_init__(self):
FILE: TTS/vocoder/configs/wavegrad_config.py
class WavegradConfig (line 8) | class WavegradConfig(BaseVocoderConfig):
FILE: TTS/vocoder/configs/wavernn_config.py
class WavernnConfig (line 8) | class WavernnConfig(BaseVocoderConfig):
FILE: TTS/vocoder/datasets/__init__.py
function setup_dataset (line 13) | def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, dat...
FILE: TTS/vocoder/datasets/gan_dataset.py
class GANDataset (line 11) | class GANDataset(Dataset):
method __init__ (line 18) | def __init__(
method create_feature_cache (line 59) | def create_feature_cache(self):
method find_wav_files (line 65) | def find_wav_files(path):
method __len__ (line 68) | def __len__(self):
method __getitem__ (line 71) | def __getitem__(self, idx):
method _pad_short_samples (line 89) | def _pad_short_samples(self, audio, mel=None):
method shuffle_mapping (line 104) | def shuffle_mapping(self):
method load_item (line 107) | def load_item(self, idx):
FILE: TTS/vocoder/datasets/preprocess.py
function preprocess_wav_files (line 12) | def preprocess_wav_files(out_path: str, config: Coqpit, ap: AudioProcess...
function find_wav_files (line 36) | def find_wav_files(data_path, file_ext="wav"):
function find_feat_files (line 41) | def find_feat_files(data_path):
function load_wav_data (line 46) | def load_wav_data(data_path, eval_split_size, file_ext="wav"):
function load_wav_feat_data (line 54) | def load_wav_feat_data(data_path, feat_path, eval_split_size):
FILE: TTS/vocoder/datasets/wavegrad_dataset.py
class WaveGradDataset (line 12) | class WaveGradDataset(Dataset):
method __init__ (line 19) | def __init__(
method create_feature_cache (line 54) | def create_feature_cache(self):
method find_wav_files (line 60) | def find_wav_files(path):
method __len__ (line 63) | def __len__(self):
method __getitem__ (line 66) | def __getitem__(self, idx):
method load_test_samples (line 70) | def load_test_samples(self, num_samples: int) -> List[Tuple]:
method load_item (line 92) | def load_item(self, idx):
method collate_full_clips (line 136) | def collate_full_clips(batch):
FILE: TTS/vocoder/datasets/wavernn_dataset.py
class WaveRNNDataset (line 6) | class WaveRNNDataset(Dataset):
method __init__ (line 12) | def __init__(
method __len__ (line 31) | def __len__(self):
method __getitem__ (line 34) | def __getitem__(self, index):
method load_test_samples (line 38) | def load_test_samples(self, num_samples):
method load_item (line 48) | def load_item(self, index):
method collate (line 92) | def collate(self, batch):
FILE: TTS/vocoder/layers/hifigan.py
class ResStack (line 5) | class ResStack(nn.Module):
method __init__ (line 6) | def __init__(self, kernel, channel, padding, dilations=[1, 3, 5]):
method forward (line 22) | def forward(self, x):
method remove_weight_norm (line 27) | def remove_weight_norm(self):
class MRF (line 37) | class MRF(nn.Module):
method __init__ (line 38) | def __init__(self, kernels, channel, dilations=[1, 3, 5]): # # pylint...
method forward (line 44) | def forward(self, x):
method remove_weight_norm (line 50) | def remove_weight_norm(self):
FILE: TTS/vocoder/layers/losses.py
class STFTLoss (line 15) | class STFTLoss(nn.Module):
method __init__ (line 20) | def __init__(self, n_fft, hop_length, win_length):
method forward (line 27) | def forward(self, y_hat, y):
class MultiScaleSTFTLoss (line 37) | class MultiScaleSTFTLoss(torch.nn.Module):
method __init__ (line 42) | def __init__(self, n_ffts=(1024, 2048, 512), hop_lengths=(120, 240, 50...
method forward (line 48) | def forward(self, y_hat, y):
class L1SpecLoss (line 61) | class L1SpecLoss(nn.Module):
method __init__ (line 64) | def __init__(
method forward (line 80) | def forward(self, y_hat, y):
class MultiScaleSubbandSTFTLoss (line 88) | class MultiScaleSubbandSTFTLoss(MultiScaleSTFTLoss):
method forward (line 93) | def forward(self, y_hat, y):
class MSEGLoss (line 99) | class MSEGLoss(nn.Module):
method forward (line 103) | def forward(self, score_real):
class HingeGLoss (line 108) | class HingeGLoss(nn.Module):
method forward (line 112) | def forward(self, score_real):
class MSEDLoss (line 123) | class MSEDLoss(nn.Module):
method __init__ (line 126) | def __init__(
method forward (line 133) | def forward(self, score_fake, score_real):
class HingeDLoss (line 140) | class HingeDLoss(nn.Module):
method forward (line 144) | def forward(self, score_fake, score_real):
class MelganFeatureLoss (line 151) | class MelganFeatureLoss(nn.Module):
method __init__ (line 152) | def __init__(
method forward (line 159) | def forward(self, fake_feats, real_feats):
function _apply_G_adv_loss (line 175) | def _apply_G_adv_loss(scores_fake, loss_func):
function _apply_D_loss (line 190) | def _apply_D_loss(scores_fake, scores_real, loss_func):
class GeneratorLoss (line 218) | class GeneratorLoss(nn.Module):
method __init__ (line 227) | def __init__(self, C):
method forward (line 261) | def forward(
class DiscriminatorLoss (line 311) | class DiscriminatorLoss(nn.Module):
method __init__ (line 314) | def __init__(self, C):
method forward (line 328) | def forward(self, scores_fake, scores_real):
class WaveRNNLoss (line 354) | class WaveRNNLoss(nn.Module):
method __init__ (line 355) | def __init__(self, wave_rnn_mode: Union[str, int]):
method forward (line 366) | def forward(self, y_hat, y) -> Dict:
FILE: TTS/vocoder/layers/lvc_block.py
class KernelPredictor (line 5) | class KernelPredictor(torch.nn.Module):
method __init__ (line 8) | def __init__( # pylint: disable=dangerous-default-value
method forward (line 66) | def forward(self, c):
class LVCBlock (line 86) | class LVCBlock(torch.nn.Module):
method __init__ (line 89) | def __init__(
method forward (line 136) | def forward(self, x, c):
method location_variable_convolution (line 163) | def location_variable_convolution(x, kernel, bias, dilation, hop_size):
FILE: TTS/vocoder/layers/melgan.py
class ResidualStack (line 5) | class ResidualStack(nn.Module):
method __init__ (line 6) | def __init__(self, channels, num_res_blocks, kernel_size):
method forward (line 33) | def forward(self, x):
method remove_weight_norm (line 38) | def remove_weight_norm(self):
FILE: TTS/vocoder/layers/parallel_wavegan.py
class ResidualBlock (line 5) | class ResidualBlock(torch.nn.Module):
method __init__ (line 8) | def __init__(
method forward (line 46) | def forward(self, x, c):
FILE: TTS/vocoder/layers/pqmf.py
class PQMF (line 9) | class PQMF(torch.nn.Module):
method __init__ (line 10) | def __init__(self, N=4, taps=62, cutoff=0.15, beta=9.0):
method forward (line 44) | def forward(self, x):
method analysis (line 47) | def analysis(self, x):
method synthe
Condensed preview — 339 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,073K chars).
[
{
"path": ".gitignore",
"chars": 3144,
"preview": "jarvis/\nchatgptwrapper/\noutput.wav\n.env\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C e"
},
{
"path": "Assistant/Agents.py",
"chars": 6224,
"preview": "from langchain import OpenAI, LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.agents import Tool, AgentExecuto"
},
{
"path": "Assistant/Chat.py",
"chars": 1,
"preview": "\n"
},
{
"path": "Assistant/VirtualAssistant.py",
"chars": 30015,
"preview": "# import for prompt routing\r\nfrom langchain import OpenAI\r\nfrom langchain.agents import Tool\r\nfrom langchain.agents impo"
},
{
"path": "Assistant/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "Assistant/get_audio.py",
"chars": 2270,
"preview": "import whisper\r\nimport pyaudio\r\n\r\n# CHUNK = 1024\r\n# FORMAT = pyaudio.paInt16\r\n# CHANNELS = 2\r\n# RATE = 44100\r\n# SILENCE_"
},
{
"path": "Assistant/research_mode.py",
"chars": 8206,
"preview": "# AGENT\nfrom langchain import OpenAI, LLMChain, PromptTemplate\nfrom langchain.llms import OpenAI\nfrom langchain.agents i"
},
{
"path": "Assistant/semantic_scholar/S2_tools.py",
"chars": 12689,
"preview": "import csv\nimport re\nfrom time import time\nimport requests\nimport dotenv\nimport aspose.pdf as ap\ndotenv.load_dotenv()\n\ni"
},
{
"path": "Assistant/semantic_scholar/__init__.py",
"chars": 1,
"preview": "#"
},
{
"path": "Assistant/semantic_scholar/agent_tools.py",
"chars": 18434,
"preview": "from contextlib import contextmanager\nimport uuid\nimport os\nimport tiktoken\n\nfrom . import S2_tools as scholar\n\nimport c"
},
{
"path": "Assistant/semantic_scholar/simple.py",
"chars": 4334,
"preview": "#!/usr/bin/env python3\nimport dotenv\ndotenv.load_dotenv()\nimport re\nimport argparse\nimport os\nfrom requests import Sessi"
},
{
"path": "Assistant/tools.py",
"chars": 15742,
"preview": "# imports for Local Search Engine\nimport openai\nimport os\nimport pandas as pd\nimport numpy as np\nfrom openai.embeddings_"
},
{
"path": "Assistant/voice.py",
"chars": 7359,
"preview": "# imports\nimport pyttsx3\nfrom ibm_watson import TextToSpeechV1\nfrom ibm_cloud_sdk_core.authenticators import IAMAuthenti"
},
{
"path": "Assistant/webui.py",
"chars": 2522,
"preview": "import json \nimport requests\nimport re\nimport langid\n\nSERVER = 'localhost'\nTEXT_GEN_PARAMS = {\n 'max_new_tokens': 200"
},
{
"path": "LICENSE",
"chars": 1075,
"preview": "MIT License\n\nCopyright (c) 2023 Gianmarco Guarnier\n\nPermission is hereby granted, free of charge, to any person obtainin"
},
{
"path": "README.md",
"chars": 17465,
"preview": "# JARVIS-ChatGPT: A conversational assistant equipped with J.A.R.V.I.S's voice\r\n**A voice-based interactive assistant eq"
},
{
"path": "TTS/.models.json",
"chars": 37226,
"preview": "{\n \"tts_models\": {\n \"multilingual\":{\n \"multi-dataset\":{\n \"your_tts\":{\n "
},
{
"path": "TTS/VERSION",
"chars": 7,
"preview": "0.12.0\n"
},
{
"path": "TTS/__init__.py",
"chars": 156,
"preview": "import os\n\nwith open(os.path.join(os.path.dirname(__file__), \"VERSION\"), \"r\", encoding=\"utf-8\") as f:\n version = f.re"
},
{
"path": "TTS/api.py",
"chars": 13688,
"preview": "import tempfile\nfrom pathlib import Path\n\nfrom TTS.utils.audio.numpy_transforms import save_wav\nfrom TTS.utils.manage im"
},
{
"path": "TTS/bin/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/bin/collect_env_info.py",
"chars": 1067,
"preview": "\"\"\"Get detailed info about the working environment.\"\"\"\nimport os\nimport platform\nimport sys\n\nimport numpy\nimport torch\n\n"
},
{
"path": "TTS/bin/compute_attention_masks.py",
"chars": 6256,
"preview": "import argparse\nimport importlib\nimport os\nfrom argparse import RawTextHelpFormatter\n\nimport numpy as np\nimport torch\nfr"
},
{
"path": "TTS/bin/compute_embeddings.py",
"chars": 6804,
"preview": "import argparse\nimport os\nfrom argparse import RawTextHelpFormatter\n\nimport torch\nfrom tqdm import tqdm\n\nfrom TTS.config"
},
{
"path": "TTS/bin/compute_statistics.py",
"chars": 3175,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nimport argparse\nimport glob\nimport os\n\nimport numpy as np\nfrom tqdm impo"
},
{
"path": "TTS/bin/eval_encoder.py",
"chars": 3127,
"preview": "import argparse\nfrom argparse import RawTextHelpFormatter\n\nimport torch\nfrom tqdm import tqdm\n\nfrom TTS.config import lo"
},
{
"path": "TTS/bin/extract_tts_spectrograms.py",
"chars": 9402,
"preview": "#!/usr/bin/env python3\n\"\"\"Extract Mel spectrograms with teacher forcing.\"\"\"\n\nimport argparse\nimport os\n\nimport numpy as "
},
{
"path": "TTS/bin/find_unique_chars.py",
"chars": 1486,
"preview": "\"\"\"Find all the unique characters in a dataset\"\"\"\nimport argparse\nfrom argparse import RawTextHelpFormatter\n\nfrom TTS.co"
},
{
"path": "TTS/bin/find_unique_phonemes.py",
"chars": 2534,
"preview": "\"\"\"Find all the unique characters in a dataset\"\"\"\nimport argparse\nimport multiprocessing\nfrom argparse import RawTextHel"
},
{
"path": "TTS/bin/remove_silence_using_vad.py",
"chars": 3320,
"preview": "import argparse\nimport glob\nimport os\nimport pathlib\n\nfrom tqdm import tqdm\n\nfrom TTS.utils.vad import get_vad_model_and"
},
{
"path": "TTS/bin/resample.py",
"chars": 2776,
"preview": "import argparse\nimport glob\nimport os\nfrom argparse import RawTextHelpFormatter\nfrom multiprocessing import Pool\nfrom sh"
},
{
"path": "TTS/bin/synthesize.py",
"chars": 13412,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nimport argparse\nimport sys\nfrom argparse import RawTextHelpFormatter\n\n# "
},
{
"path": "TTS/bin/train_encoder.py",
"chars": 11906,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nimport os\nimport sys\nimport time\nimport traceback\n\nimport torch\nfrom tor"
},
{
"path": "TTS/bin/train_tts.py",
"chars": 2279,
"preview": "import os\nfrom dataclasses import dataclass, field\n\nfrom trainer import Trainer, TrainerArgs\n\nfrom TTS.config import loa"
},
{
"path": "TTS/bin/train_vocoder.py",
"chars": 2654,
"preview": "import os\nfrom dataclasses import dataclass, field\n\nfrom trainer import Trainer, TrainerArgs\n\nfrom TTS.config import loa"
},
{
"path": "TTS/bin/tune_wavegrad.py",
"chars": 3732,
"preview": "\"\"\"Search a good noise schedule for WaveGrad for a given number of inference iterations\"\"\"\nimport argparse\nfrom itertool"
},
{
"path": "TTS/config/__init__.py",
"chars": 4268,
"preview": "import json\nimport os\nimport re\nfrom typing import Dict\n\nimport fsspec\nimport yaml\nfrom coqpit import Coqpit\n\nfrom TTS.c"
},
{
"path": "TTS/config/shared_configs.py",
"chars": 9846,
"preview": "from dataclasses import asdict, dataclass\nfrom typing import List\n\nfrom coqpit import Coqpit, check_argument\nfrom traine"
},
{
"path": "TTS/encoder/README.md",
"chars": 1340,
"preview": "### Speaker Encoder\n\nThis is an implementation of https://arxiv.org/abs/1710.10467. This model can be used for voice and"
},
{
"path": "TTS/encoder/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/encoder/configs/base_encoder_config.py",
"chars": 1851,
"preview": "from dataclasses import asdict, dataclass, field\nfrom typing import Dict, List\n\nfrom coqpit import MISSING\n\nfrom TTS.con"
},
{
"path": "TTS/encoder/configs/emotion_encoder_config.py",
"chars": 348,
"preview": "from dataclasses import asdict, dataclass\n\nfrom TTS.encoder.configs.base_encoder_config import BaseEncoderConfig\n\n\n@data"
},
{
"path": "TTS/encoder/configs/speaker_encoder_config.py",
"chars": 306,
"preview": "from dataclasses import asdict, dataclass\n\nfrom TTS.encoder.configs.base_encoder_config import BaseEncoderConfig\n\n\n@data"
},
{
"path": "TTS/encoder/dataset.py",
"chars": 5022,
"preview": "import random\n\nimport torch\nfrom torch.utils.data import Dataset\n\nfrom TTS.encoder.utils.generic_utils import AugmentWAV"
},
{
"path": "TTS/encoder/losses.py",
"chars": 8160,
"preview": "import torch\nimport torch.nn.functional as F\nfrom torch import nn\n\n\n# adapted from https://github.com/cvqluu/GE2E-Loss\nc"
},
{
"path": "TTS/encoder/models/base_encoder.py",
"chars": 5472,
"preview": "import numpy as np\nimport torch\nimport torchaudio\nfrom coqpit import Coqpit\nfrom torch import nn\n\nfrom TTS.encoder.losse"
},
{
"path": "TTS/encoder/models/lstm.py",
"chars": 3375,
"preview": "import torch\nfrom torch import nn\n\nfrom TTS.encoder.models.base_encoder import BaseEncoder\n\n\nclass LSTMWithProjection(nn"
},
{
"path": "TTS/encoder/models/resnet.py",
"chars": 6552,
"preview": "import torch\nfrom torch import nn\n\n# from TTS.utils.audio.torch_transforms import TorchSTFT\nfrom TTS.encoder.models.base"
},
{
"path": "TTS/encoder/requirements.txt",
"chars": 25,
"preview": "umap-learn\nnumpy>=1.17.0\n"
},
{
"path": "TTS/encoder/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/encoder/utils/generic_utils.py",
"chars": 6853,
"preview": "import datetime\nimport glob\nimport os\nimport random\nimport re\n\nimport numpy as np\nfrom scipy import signal\n\nfrom TTS.enc"
},
{
"path": "TTS/encoder/utils/io.py",
"chars": 1360,
"preview": "import datetime\nimport os\n\nfrom TTS.utils.io import save_fsspec\n\n\ndef save_checkpoint(model, optimizer, model_loss, out_"
},
{
"path": "TTS/encoder/utils/prepare_voxceleb.py",
"chars": 8758,
"preview": "# coding=utf-8\n# Copyright (C) 2020 ATHENA AUTHORS; Yiping Peng; Ne Luo\n# All rights reserved.\n#\n# Licensed under the Ap"
},
{
"path": "TTS/encoder/utils/training.py",
"chars": 4059,
"preview": "import os\nfrom dataclasses import dataclass, field\n\nfrom coqpit import Coqpit\nfrom trainer import TrainerArgs, get_last_"
},
{
"path": "TTS/encoder/utils/visual.py",
"chars": 1321,
"preview": "import matplotlib\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport umap\n\nmatplotlib.use(\"Agg\")\n\n\ncolormap = (\n "
},
{
"path": "TTS/model.py",
"chars": 2056,
"preview": "from abc import abstractmethod\nfrom typing import Dict\n\nimport torch\nfrom coqpit import Coqpit\nfrom trainer import Train"
},
{
"path": "TTS/server/README.md",
"chars": 1075,
"preview": "# :frog: TTS demo server\nBefore you use the server, make sure you [install](https://github.com/coqui-ai/TTS/tree/dev#ins"
},
{
"path": "TTS/server/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/server/conf.json",
"chars": 456,
"preview": "{\n \"tts_path\":\"/media/erogol/data_ssd/Models/libri_tts/5049/\", // tts model root folder\n \"tts_file\":\"best_model.p"
},
{
"path": "TTS/server/server.py",
"chars": 8414,
"preview": "#!flask/bin/python\nimport argparse\nimport io\nimport json\nimport os\nimport sys\nfrom pathlib import Path\nfrom threading im"
},
{
"path": "TTS/server/templates/details.html",
"chars": 2578,
"preview": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n\n <meta charset=\"utf-8\">\n <meta name=\"viewport\" content=\"width=device-width, "
},
{
"path": "TTS/server/templates/index.html",
"chars": 5864,
"preview": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n\n <meta charset=\"utf-8\">\n <meta name=\"viewport\" content=\"width=device-wid"
},
{
"path": "TTS/tts/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/configs/__init__.py",
"chars": 749,
"preview": "import importlib\nimport os\nfrom inspect import isclass\n\n# import all files under configs/\n# configs_dir = os.path.dirnam"
},
{
"path": "TTS/tts/configs/align_tts_config.py",
"chars": 4913,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/fast_pitch_config.py",
"chars": 6786,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/fast_speech_config.py",
"chars": 6633,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/fastspeech2_config.py",
"chars": 7278,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/glow_tts_config.py",
"chars": 7999,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/neuralhmm_tts_config.py",
"chars": 7910,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/overflow_config.py",
"chars": 9273,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/shared_configs.py",
"chars": 14025,
"preview": "from dataclasses import asdict, dataclass, field\nfrom typing import Dict, List\n\nfrom coqpit import Coqpit, check_argumen"
},
{
"path": "TTS/tts/configs/speedy_speech_config.py",
"chars": 7070,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/tacotron2_config.py",
"chars": 517,
"preview": "from dataclasses import dataclass\n\nfrom TTS.tts.configs.tacotron_config import TacotronConfig\n\n\n@dataclass\nclass Tacotro"
},
{
"path": "TTS/tts/configs/tacotron_config.py",
"chars": 11528,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/configs/vits_config.py",
"chars": 6870,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.tts.configs.shared_configs import BaseTTSConf"
},
{
"path": "TTS/tts/datasets/__init__.py",
"chars": 7960,
"preview": "import os\nimport sys\nfrom collections import Counter\nfrom pathlib import Path\nfrom typing import Callable, Dict, List, T"
},
{
"path": "TTS/tts/datasets/dataset.py",
"chars": 37690,
"preview": "import base64\nimport collections\nimport os\nimport random\nfrom typing import Dict, List, Union\n\nimport numpy as np\nimport"
},
{
"path": "TTS/tts/datasets/formatters.py",
"chars": 25479,
"preview": "import os\nimport re\nimport xml.etree.ElementTree as ET\nfrom glob import glob\nfrom pathlib import Path\nfrom typing import"
},
{
"path": "TTS/tts/layers/__init__.py",
"chars": 36,
"preview": "from TTS.tts.layers.losses import *\n"
},
{
"path": "TTS/tts/layers/align_tts/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/layers/align_tts/duration_predictor.py",
"chars": 838,
"preview": "from torch import nn\n\nfrom TTS.tts.layers.generic.pos_encoding import PositionalEncoding\nfrom TTS.tts.layers.generic.tra"
},
{
"path": "TTS/tts/layers/align_tts/mdn.py",
"chars": 975,
"preview": "from torch import nn\n\n\nclass MDNBlock(nn.Module):\n \"\"\"Mixture of Density Network implementation\n https://arxiv.org"
},
{
"path": "TTS/tts/layers/feed_forward/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/layers/feed_forward/decoder.py",
"chars": 8297,
"preview": "import torch\nfrom torch import nn\n\nfrom TTS.tts.layers.generic.res_conv_bn import Conv1dBN, Conv1dBNBlock, ResidualConv1"
},
{
"path": "TTS/tts/layers/feed_forward/duration_predictor.py",
"chars": 1099,
"preview": "from torch import nn\n\nfrom TTS.tts.layers.generic.res_conv_bn import Conv1dBN\n\n\nclass DurationPredictor(nn.Module):\n "
},
{
"path": "TTS/tts/layers/feed_forward/encoder.py",
"chars": 5913,
"preview": "from torch import nn\n\nfrom TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock\nfrom TTS.tts.layers.generic.t"
},
{
"path": "TTS/tts/layers/generic/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/layers/generic/aligner.py",
"chars": 3057,
"preview": "from typing import Tuple\n\nimport torch\nfrom torch import nn\n\n\nclass AlignmentNetwork(torch.nn.Module):\n \"\"\"Aligner Ne"
},
{
"path": "TTS/tts/layers/generic/gated_conv.py",
"chars": 1305,
"preview": "from torch import nn\n\nfrom .normalization import LayerNorm\n\n\nclass GatedConvBlock(nn.Module):\n \"\"\"Gated convolutional"
},
{
"path": "TTS/tts/layers/generic/normalization.py",
"chars": 4055,
"preview": "import torch\nfrom torch import nn\n\n\nclass LayerNorm(nn.Module):\n def __init__(self, channels, eps=1e-4):\n \"\"\"L"
},
{
"path": "TTS/tts/layers/generic/pos_encoding.py",
"chars": 2471,
"preview": "import math\n\nimport torch\nfrom torch import nn\n\n\nclass PositionalEncoding(nn.Module):\n \"\"\"Sinusoidal positional encod"
},
{
"path": "TTS/tts/layers/generic/res_conv_bn.py",
"chars": 4594,
"preview": "from torch import nn\n\n\nclass ZeroTemporalPad(nn.Module):\n \"\"\"Pad sequences to equal lentgh in the temporal dimension\""
},
{
"path": "TTS/tts/layers/generic/time_depth_sep_conv.py",
"chars": 2561,
"preview": "import torch\nfrom torch import nn\n\n\nclass TimeDepthSeparableConv(nn.Module):\n \"\"\"Time depth separable convolution as "
},
{
"path": "TTS/tts/layers/generic/transformer.py",
"chars": 3293,
"preview": "import torch\nimport torch.nn.functional as F\nfrom torch import nn\n\n\nclass FFTransformer(nn.Module):\n def __init__(sel"
},
{
"path": "TTS/tts/layers/generic/wavenet.py",
"chars": 6789,
"preview": "import torch\nfrom torch import nn\n\n\n@torch.jit.script\ndef fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):"
},
{
"path": "TTS/tts/layers/glow_tts/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/layers/glow_tts/decoder.py",
"chars": 4677,
"preview": "import torch\nfrom torch import nn\n\nfrom TTS.tts.layers.generic.normalization import ActNorm\nfrom TTS.tts.layers.glow_tts"
},
{
"path": "TTS/tts/layers/glow_tts/duration_predictor.py",
"chars": 2341,
"preview": "import torch\nfrom torch import nn\n\nfrom ..generic.normalization import LayerNorm\n\n\nclass DurationPredictor(nn.Module):\n "
},
{
"path": "TTS/tts/layers/glow_tts/encoder.py",
"chars": 6876,
"preview": "import math\n\nimport torch\nfrom torch import nn\n\nfrom TTS.tts.layers.generic.gated_conv import GatedConvBlock\nfrom TTS.tt"
},
{
"path": "TTS/tts/layers/glow_tts/glow.py",
"chars": 8343,
"preview": "import torch\nfrom packaging.version import Version\nfrom torch import nn\nfrom torch.nn import functional as F\n\nfrom TTS.t"
},
{
"path": "TTS/tts/layers/glow_tts/transformer.py",
"chars": 17585,
"preview": "import math\n\nimport torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\nfrom TTS.tts.layers.generic.normali"
},
{
"path": "TTS/tts/layers/losses.py",
"chars": 35523,
"preview": "import math\n\nimport numpy as np\nimport torch\nfrom coqpit import Coqpit\nfrom torch import nn\nfrom torch.nn import functio"
},
{
"path": "TTS/tts/layers/overflow/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/layers/overflow/common_layers.py",
"chars": 11736,
"preview": "from typing import List, Tuple\n\nimport torch\nimport torch.nn.functional as F\nfrom torch import nn\nfrom tqdm.auto import "
},
{
"path": "TTS/tts/layers/overflow/decoder.py",
"chars": 2618,
"preview": "import torch\nfrom torch import nn\n\nfrom TTS.tts.layers.glow_tts.decoder import Decoder as GlowDecoder\nfrom TTS.tts.utils"
},
{
"path": "TTS/tts/layers/overflow/neural_hmm.py",
"chars": 24712,
"preview": "from typing import List\n\nimport torch\nimport torch.distributions as tdist\nimport torch.nn.functional as F\nfrom torch imp"
},
{
"path": "TTS/tts/layers/overflow/plotting_utils.py",
"chars": 2676,
"preview": "from typing import Any\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch\n\n\ndef validate_numpy_array(value"
},
{
"path": "TTS/tts/layers/tacotron/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/layers/tacotron/attentions.py",
"chars": 19352,
"preview": "import torch\nfrom scipy.stats import betabinom\nfrom torch import nn\nfrom torch.nn import functional as F\n\nfrom TTS.tts.l"
},
{
"path": "TTS/tts/layers/tacotron/capacitron_layers.py",
"chars": 9388,
"preview": "import torch\nfrom torch import nn\nfrom torch.distributions.multivariate_normal import MultivariateNormal as MVN\nfrom tor"
},
{
"path": "TTS/tts/layers/tacotron/common_layers.py",
"chars": 4732,
"preview": "import torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\n\nclass Linear(nn.Module):\n \"\"\"Linear layer wi"
},
{
"path": "TTS/tts/layers/tacotron/gst_layers.py",
"chars": 5836,
"preview": "import torch\nimport torch.nn.functional as F\nfrom torch import nn\n\n\nclass GST(nn.Module):\n \"\"\"Global Style Token Modu"
},
{
"path": "TTS/tts/layers/tacotron/tacotron.py",
"chars": 18785,
"preview": "# coding: utf-8\n# adapted from https://github.com/r9y9/tacotron_pytorch\n\nimport torch\nfrom torch import nn\n\nfrom .attent"
},
{
"path": "TTS/tts/layers/tacotron/tacotron2.py",
"chars": 15855,
"preview": "import torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\nfrom .attentions import init_attn\nfrom .common_l"
},
{
"path": "TTS/tts/layers/vits/discriminator.py",
"chars": 3232,
"preview": "import torch\nfrom torch import nn\nfrom torch.nn.modules.conv import Conv1d\n\nfrom TTS.vocoder.models.hifigan_discriminato"
},
{
"path": "TTS/tts/layers/vits/networks.py",
"chars": 9680,
"preview": "import math\n\nimport torch\nfrom torch import nn\n\nfrom TTS.tts.layers.glow_tts.glow import WN\nfrom TTS.tts.layers.glow_tts"
},
{
"path": "TTS/tts/layers/vits/stochastic_duration_predictor.py",
"chars": 10913,
"preview": "import math\n\nimport torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\nfrom TTS.tts.layers.generic.normali"
},
{
"path": "TTS/tts/layers/vits/transforms.py",
"chars": 7234,
"preview": "# adopted from https://github.com/bayesiains/nflows\n\nimport numpy as np\nimport torch\nfrom torch.nn import functional as "
},
{
"path": "TTS/tts/models/__init__.py",
"chars": 574,
"preview": "from typing import Dict, List, Union\n\nfrom TTS.utils.generic_utils import find_module\n\n\ndef setup_model(config: \"Coqpit\""
},
{
"path": "TTS/tts/models/align_tts.py",
"chars": 19052,
"preview": "from dataclasses import dataclass, field\nfrom typing import Dict, List, Union\n\nimport torch\nfrom coqpit import Coqpit\nfr"
},
{
"path": "TTS/tts/models/base_tacotron.py",
"chars": 12118,
"preview": "import copy\nfrom abc import abstractmethod\nfrom typing import Dict, Tuple\n\nimport torch\nfrom coqpit import Coqpit\nfrom t"
},
{
"path": "TTS/tts/models/base_tts.py",
"chars": 19389,
"preview": "import os\nimport random\nfrom typing import Dict, List, Tuple, Union\n\nimport torch\nimport torch.distributed as dist\nfrom "
},
{
"path": "TTS/tts/models/forward_tts.py",
"chars": 35341,
"preview": "from dataclasses import dataclass, field\nfrom typing import Dict, List, Tuple, Union\n\nimport torch\nfrom coqpit import Co"
},
{
"path": "TTS/tts/models/glow_tts.py",
"chars": 24314,
"preview": "import math\nfrom typing import Dict, List, Tuple, Union\n\nimport torch\nfrom coqpit import Coqpit\nfrom torch import nn\nfro"
},
{
"path": "TTS/tts/models/neuralhmm_tts.py",
"chars": 17346,
"preview": "import os\nfrom typing import Dict, List, Union\n\nimport torch\nfrom coqpit import Coqpit\nfrom torch import nn\nfrom trainer"
},
{
"path": "TTS/tts/models/overflow.py",
"chars": 17644,
"preview": "import os\nfrom typing import Dict, List, Union\n\nimport torch\nfrom coqpit import Coqpit\nfrom torch import nn\nfrom trainer"
},
{
"path": "TTS/tts/models/tacotron.py",
"chars": 18779,
"preview": "# coding: utf-8\n\nfrom typing import Dict, List, Tuple, Union\n\nimport torch\nfrom torch import nn\nfrom torch.cuda.amp.auto"
},
{
"path": "TTS/tts/models/tacotron2.py",
"chars": 19525,
"preview": "# coding: utf-8\n\nfrom typing import Dict, List, Union\n\nimport torch\nfrom torch import nn\nfrom torch.cuda.amp.autocast_mo"
},
{
"path": "TTS/tts/models/vits.py",
"chars": 74930,
"preview": "import math\nimport os\nfrom dataclasses import dataclass, field, replace\nfrom itertools import chain\nfrom typing import D"
},
{
"path": "TTS/tts/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/utils/data.py",
"chars": 2924,
"preview": "import bisect\n\nimport numpy as np\nimport torch\n\n\ndef _pad_data(x, length):\n _pad = 0\n assert x.ndim == 1\n retur"
},
{
"path": "TTS/tts/utils/helpers.py",
"chars": 8044,
"preview": "import numpy as np\nimport torch\nfrom torch.nn import functional as F\n\ntry:\n from TTS.tts.utils.monotonic_align.core i"
},
{
"path": "TTS/tts/utils/languages.py",
"chars": 4376,
"preview": "import os\nfrom typing import Any, Dict, List\n\nimport fsspec\nimport numpy as np\nimport torch\nfrom coqpit import Coqpit\n\nf"
},
{
"path": "TTS/tts/utils/managers.py",
"chars": 12888,
"preview": "import json\nimport random\nfrom typing import Any, Dict, List, Tuple, Union\n\nimport fsspec\nimport numpy as np\nimport torc"
},
{
"path": "TTS/tts/utils/measures.py",
"chars": 533,
"preview": "def alignment_diagonal_score(alignments, binary=False):\n \"\"\"\n Compute how diagonal alignment predictions are. It i"
},
{
"path": "TTS/tts/utils/monotonic_align/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/utils/monotonic_align/core.c",
"chars": 859808,
"preview": "/* Generated by Cython 0.29.28 */\n\n/* BEGIN: Cython Metadata\n{\n \"distutils\": {\n \"depends\": [],\n \"name\":"
},
{
"path": "TTS/tts/utils/monotonic_align/core.pyx",
"chars": 1236,
"preview": "import numpy as np\n\ncimport cython\ncimport numpy as np\n\nfrom cython.parallel import prange\n\n\n@cython.boundscheck(False)\n"
},
{
"path": "TTS/tts/utils/monotonic_align/setup.py",
"chars": 207,
"preview": "# from distutils.core import setup\n# from Cython.Build import cythonize\n# import numpy\n\n# setup(name='monotonic_align',\n"
},
{
"path": "TTS/tts/utils/speakers.py",
"chars": 9622,
"preview": "import json\nimport os\nfrom typing import Any, Dict, List, Union\n\nimport fsspec\nimport numpy as np\nimport torch\nfrom coqp"
},
{
"path": "TTS/tts/utils/ssim.py",
"chars": 14961,
"preview": "# Adopted from https://github.com/photosynthesis-team/piq\n\nfrom typing import List, Optional, Tuple, Union\n\nimport torch"
},
{
"path": "TTS/tts/utils/synthesis.py",
"chars": 10728,
"preview": "from typing import Dict\n\nimport numpy as np\nimport torch\nfrom torch import nn\n\n\ndef numpy_to_torch(np_array, dtype, cuda"
},
{
"path": "TTS/tts/utils/text/__init__.py",
"chars": 54,
"preview": "from TTS.tts.utils.text.tokenizer import TTSTokenizer\n"
},
{
"path": "TTS/tts/utils/text/characters.py",
"chars": 15493,
"preview": "from dataclasses import replace\nfrom typing import Dict\n\nfrom TTS.tts.configs.shared_configs import CharactersConfig\n\n\nd"
},
{
"path": "TTS/tts/utils/text/chinese_mandarin/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/utils/text/chinese_mandarin/numbers.py",
"chars": 4258,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\n# Licensed under WTFPL or the Unlicense or CC0.\n# This uses Python 3, bu"
},
{
"path": "TTS/tts/utils/text/chinese_mandarin/phonemizer.py",
"chars": 1138,
"preview": "from typing import List\n\nimport jieba\nimport pypinyin\n\nfrom .pinyinToPhonemes import PINYIN_DICT\n\n\ndef _chinese_characte"
},
{
"path": "TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py",
"chars": 8550,
"preview": "PINYIN_DICT = {\n \"a\": [\"a\"],\n \"ai\": [\"ai\"],\n \"an\": [\"an\"],\n \"ang\": [\"ɑŋ\"],\n \"ao\": [\"aʌ\"],\n \"ba\": [\"ba\""
},
{
"path": "TTS/tts/utils/text/cleaners.py",
"chars": 4520,
"preview": "\"\"\"Set of default text cleaners\"\"\"\n# TODO: pick the cleaner for languages dynamically\n\nimport re\n\nfrom anyascii import a"
},
{
"path": "TTS/tts/utils/text/cmudict.py",
"chars": 2911,
"preview": "# -*- coding: utf-8 -*-\n\nimport re\n\nVALID_SYMBOLS = [\n \"AA\",\n \"AA0\",\n \"AA1\",\n \"AA2\",\n \"AE\",\n \"AE0\",\n "
},
{
"path": "TTS/tts/utils/text/english/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/utils/text/english/abbreviations.py",
"chars": 686,
"preview": "import re\n\n# List of (regular expression, replacement) pairs for abbreviations in english:\nabbreviations_en = [\n (re."
},
{
"path": "TTS/tts/utils/text/english/number_norm.py",
"chars": 2805,
"preview": "\"\"\" from https://github.com/keithito/tacotron \"\"\"\n\nimport re\nfrom typing import Dict\n\nimport inflect\n\n_inflect = inflect"
},
{
"path": "TTS/tts/utils/text/english/time_norm.py",
"chars": 1174,
"preview": "import re\n\nimport inflect\n\n_inflect = inflect.engine()\n\n_time_re = re.compile(\n r\"\"\"\\b\n ((0?"
},
{
"path": "TTS/tts/utils/text/french/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/utils/text/french/abbreviations.py",
"chars": 1361,
"preview": "import re\n\n# List of (regular expression, replacement) pairs for abbreviations in french:\nabbreviations_fr = [\n (re.c"
},
{
"path": "TTS/tts/utils/text/japanese/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/utils/text/japanese/phonemizer.py",
"chars": 8494,
"preview": "# Convert Japanese text to phonemes which is\n# compatible with Julius https://github.com/julius-speech/segmentation-kit\n"
},
{
"path": "TTS/tts/utils/text/korean/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/tts/utils/text/korean/ko_dictionary.py",
"chars": 712,
"preview": "# coding: utf-8\n# Add the word you want to the dictionary.\netc_dictionary = {\"1+1\": \"원플러스원\", \"2+1\": \"투플러스원\"}\n\n\nenglish_d"
},
{
"path": "TTS/tts/utils/text/korean/korean.py",
"chars": 950,
"preview": "# coding: utf-8\n# Code based on https://github.com/carpedm20/multi-speaker-tacotron-tensorflow/blob/master/text/korean."
},
{
"path": "TTS/tts/utils/text/korean/phonemizer.py",
"chars": 880,
"preview": "from jamo import hangul_to_jamo\n\nfrom TTS.tts.utils.text.korean.korean import normalize\n\ng2p = None\n\n\ndef korean_text_to"
},
{
"path": "TTS/tts/utils/text/phonemizers/__init__.py",
"chars": 2013,
"preview": "from TTS.tts.utils.text.phonemizers.base import BasePhonemizer\nfrom TTS.tts.utils.text.phonemizers.espeak_wrapper import"
},
{
"path": "TTS/tts/utils/text/phonemizers/base.py",
"chars": 4358,
"preview": "import abc\nfrom typing import List, Tuple\n\nfrom TTS.tts.utils.text.punctuation import Punctuation\n\n\nclass BasePhonemizer"
},
{
"path": "TTS/tts/utils/text/phonemizers/espeak_wrapper.py",
"chars": 8682,
"preview": "import logging\nimport re\nimport subprocess\nfrom typing import Dict, List\n\nfrom packaging.version import Version\n\nfrom TT"
},
{
"path": "TTS/tts/utils/text/phonemizers/gruut_wrapper.py",
"chars": 5125,
"preview": "import importlib\nfrom typing import List\n\nimport gruut\nfrom gruut_ipa import IPA\n\nfrom TTS.tts.utils.text.phonemizers.ba"
},
{
"path": "TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py",
"chars": 2048,
"preview": "from typing import Dict\n\nfrom TTS.tts.utils.text.japanese.phonemizer import japanese_text_to_phonemes\nfrom TTS.tts.utils"
},
{
"path": "TTS/tts/utils/text/phonemizers/ko_kr_phonemizer.py",
"chars": 2319,
"preview": "from typing import Dict\n\nfrom TTS.tts.utils.text.korean.phonemizer import korean_text_to_phonemes\nfrom TTS.tts.utils.tex"
},
{
"path": "TTS/tts/utils/text/phonemizers/multi_phonemizer.py",
"chars": 2559,
"preview": "from typing import Dict, List\n\nfrom TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name"
},
{
"path": "TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py",
"chars": 1805,
"preview": "from typing import Dict\n\nfrom TTS.tts.utils.text.chinese_mandarin.phonemizer import chinese_text_to_phonemes\nfrom TTS.tt"
},
{
"path": "TTS/tts/utils/text/punctuation.py",
"chars": 5443,
"preview": "import collections\nimport re\nfrom enum import Enum\n\nimport six\n\n_DEF_PUNCS = ';:,.!?¡¿—…\"«»“”'\n\n_PUNC_IDX = collections."
},
{
"path": "TTS/tts/utils/text/tokenizer.py",
"chars": 8988,
"preview": "from typing import Callable, Dict, List, Union\n\nfrom TTS.tts.utils.text import cleaners\nfrom TTS.tts.utils.text.characte"
},
{
"path": "TTS/tts/utils/visual.py",
"chars": 6672,
"preview": "import librosa\nimport matplotlib\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch\nfrom matplotlib.colors "
},
{
"path": "TTS/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/utils/audio/__init__.py",
"chars": 53,
"preview": "from TTS.utils.audio.processor import AudioProcessor\n"
},
{
"path": "TTS/utils/audio/numpy_transforms.py",
"chars": 15240,
"preview": "from typing import Tuple\n\nimport librosa\nimport numpy as np\nimport scipy\nimport soundfile as sf\nfrom librosa import magp"
},
{
"path": "TTS/utils/audio/processor.py",
"chars": 28378,
"preview": "from typing import Dict, Tuple\n\nimport librosa\nimport numpy as np\nimport scipy.io.wavfile\nimport scipy.signal\nimport sou"
},
{
"path": "TTS/utils/audio/torch_transforms.py",
"chars": 5118,
"preview": "import librosa\nimport torch\nfrom torch import nn\n\n\nclass TorchSTFT(nn.Module): # pylint: disable=abstract-method\n \"\""
},
{
"path": "TTS/utils/callbacks.py",
"chars": 4156,
"preview": "class TrainerCallback:\n @staticmethod\n def on_init_start(trainer) -> None:\n if hasattr(trainer.model, \"modu"
},
{
"path": "TTS/utils/capacitron_optimizer.py",
"chars": 2434,
"preview": "from typing import Generator\n\nfrom trainer.trainer_utils import get_optimizer\n\n\nclass CapacitronOptimizer:\n \"\"\"Double"
},
{
"path": "TTS/utils/distribute.py",
"chars": 716,
"preview": "# edited from https://github.com/fastai/imagenet-fast/blob/master/imagenet_nv/distributed.py\nimport torch\nimport torch.d"
},
{
"path": "TTS/utils/download.py",
"chars": 7413,
"preview": "# Adapted from https://github.com/pytorch/audio/\n\nimport hashlib\nimport logging\nimport os\nimport tarfile\nimport urllib\ni"
},
{
"path": "TTS/utils/downloaders.py",
"chars": 4737,
"preview": "import os\nfrom typing import Optional\n\nfrom TTS.utils.download import download_kaggle_dataset, download_url, extract_arc"
},
{
"path": "TTS/utils/generic_utils.py",
"chars": 7145,
"preview": "# -*- coding: utf-8 -*-\nimport datetime\nimport importlib\nimport os\nimport re\nimport subprocess\nimport sys\nfrom pathlib i"
},
{
"path": "TTS/utils/io.py",
"chars": 6964,
"preview": "import datetime\nimport json\nimport os\nimport pickle as pickle_tts\nimport shutil\nfrom typing import Any, Callable, Dict, "
},
{
"path": "TTS/utils/manage.py",
"chars": 18297,
"preview": "import json\nimport os\nimport zipfile\nfrom pathlib import Path\nfrom shutil import copyfile, rmtree\nfrom typing import Dic"
},
{
"path": "TTS/utils/radam.py",
"chars": 4575,
"preview": "# modified from https://github.com/LiyuanLucasLiu/RAdam\n\nimport math\n\nimport torch\nfrom torch.optim.optimizer import Opt"
},
{
"path": "TTS/utils/samplers.py",
"chars": 6778,
"preview": "import math\nimport random\nfrom typing import Callable, List, Union\n\nfrom torch.utils.data.sampler import BatchSampler, S"
},
{
"path": "TTS/utils/synthesizer.py",
"chars": 18938,
"preview": "import time\nfrom typing import List\n\nimport numpy as np\nimport pysbd\nimport torch\n\nfrom TTS.config import load_config\nfr"
},
{
"path": "TTS/utils/training.py",
"chars": 1538,
"preview": "import numpy as np\nimport torch\n\n\ndef check_update(model, grad_clip, ignore_stopnet=False, amp_opt_params=None):\n r\"\""
},
{
"path": "TTS/utils/vad.py",
"chars": 2786,
"preview": "import soundfile as sf\nimport torch\nimport torchaudio\n\n\ndef read_audio(path):\n wav, sr = torchaudio.load(path)\n\n i"
},
{
"path": "TTS/vc/configs/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/vc/configs/freevc_config.py",
"chars": 198,
"preview": "from dataclasses import dataclass, field\nfrom typing import List\n\nfrom TTS.vc.configs.shared_configs import BaseVCConfig"
},
{
"path": "TTS/vc/configs/shared_configs.py",
"chars": 6514,
"preview": "from dataclasses import asdict, dataclass, field\nfrom typing import Dict, List\n\nfrom coqpit import Coqpit, check_argumen"
},
{
"path": "TTS/vc/models/__init__.py",
"chars": 597,
"preview": "import importlib\nimport re\nfrom typing import Dict, List, Union\n\n\ndef to_camel(text):\n text = text.capitalize()\n r"
},
{
"path": "TTS/vc/models/base_vc.py",
"chars": 18726,
"preview": "import os\nimport random\nfrom typing import Dict, List, Tuple, Union\n\nimport torch\nimport torch.distributed as dist\nfrom "
},
{
"path": "TTS/vc/models/freevc.py",
"chars": 31242,
"preview": "from dataclasses import dataclass, field\nfrom typing import Dict, List, Optional, Tuple, Union\n\nimport librosa\nimport nu"
},
{
"path": "TTS/vc/modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "TTS/vc/modules/freevc/__init__.py",
"chars": 0,
"preview": ""
}
]
// ... and 139 more files (download for full content)
About this extraction
This page contains the full source code of the gia-guar/JARVIS-ChatGPT GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 339 files (2.8 MB), approximately 753.0k tokens, and a symbol index with 2609 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.