[
  {
    "path": ".gitattributes",
    "content": "# Auto detect text files and perform LF normalization\n* text=auto\n"
  },
  {
    "path": ".gitignore",
    "content": ".idea\naudio/*.mp3\naudio/*.wav"
  },
  {
    "path": "AniVoiceChanger_colab.ipynb",
    "content": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SociallyIneptWeeb/AniVoiceChanger/blob/main/AniVoiceChanger_colab.ipynb)\"\n      ],\n      \"metadata\": {\n        \"id\": \"wEsZxTheMO8_\"\n      }\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"jwu07JgqoFON\",\n        \"cellView\": \"form\"\n      },\n      \"outputs\": [],\n      \"source\": [\n        \"#@title Mount Drive\\n\",\n        \"\\n\",\n        \"from google.colab import drive\\n\",\n        \"drive.mount('/content/drive')\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"ge_97mfpgqTm\",\n        \"cellView\": \"form\"\n      },\n      \"outputs\": [],\n      \"source\": [\n        \"#@title Clone repository\\n\",\n        \"!git init\\n\",\n        \"!git remote add origin https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git\\n\",\n        \"!git fetch origin 195a14e5c51ec02774c4d1961d0a4b67755e25c8 --depth=1\\n\",\n        \"!git reset --hard FETCH_HEAD\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#@title Set Mode and Parameters\\n\",\n        \"#@markdown ## Mode\\n\",\n        \"#@markdown To run the WebUI for training a voice model, set the mode to Training.\\n\",\n        \"\\n\",\n        \"#@markdown To run the voice changer server for main_colab.py to connect to, set the mode to Inference.\\n\",\n        \"MODE = 'Training' #@param ['Training', 'Inference']\\n\",\n        \"\\n\",\n        \"#@markdown If MODE: Training, specify the path to a zip file containing the voice clips in your google drive to be used for training. If MODE: Inference, ignore.\\n\",\n        \"DATASET = 'char_voice_lines.zip' #@param {type:\\\"string\\\"}\\n\",\n        \"DATASET = '/content/drive/MyDrive/' + DATASET\\n\",\n        \"\\n\",\n        \"if MODE == 'Training':\\n\",\n        \"  !mkdir -p dataset\\n\",\n        \"  !unzip -d dataset -B {DATASET}\\n\",\n        \"  # rename duplicate filenames in dataset\\n\",\n        \"  !ls -a /content/dataset/\\n\",\n        \"  !rename 's/(\\\\w+)\\\\.(\\\\w+)~(\\\\d*)/$1_$3.$2/' /content/dataset/*.*~*\\n\",\n        \"\\n\",\n        \"#@markdown ## Upload a trained model\\n\",\n        \"#@markdown Only fill the below fields if you would like to continue training a previously trained model or use it for inference. Specify the name and epoch number of the model to be used or trained. The folder containing the trained files in your google drive will be used.\\n\",\n        \"\\n\",\n        \"MODELNAME = \\\"\\\"  #@param {type:\\\"string\\\"}\\n\",\n        \"MODELEPOCH = 2333333  #@param {type:\\\"integer\\\"}\\n\",\n        \"if MODELNAME:\\n\",\n        \"  !mkdir -p /content/logs/{MODELNAME}\\n\",\n        \"  !cp /content/drive/MyDrive/{MODELNAME}_files/*.index /content/logs/{MODELNAME}/\\n\",\n        \"  !cp /content/drive/MyDrive/{MODELNAME}_files/{MODELNAME}.pth /content/weights/\\n\",\n        \"  if MODE == 'Training':\\n\",\n        \"    !cp /content/drive/MyDrive/{MODELNAME}_files/D_{MODELEPOCH}.pth /content/logs/{MODELNAME}/\\n\",\n        \"    !cp /content/drive/MyDrive/{MODELNAME}_files/G_{MODELEPOCH}.pth /content/logs/{MODELNAME}/\\n\",\n        \"    !cp /content/drive/MyDrive/{MODELNAME}_files/*.npy /content/logs/{MODELNAME}/\"\n      ],\n      \"metadata\": {\n        \"id\": \"S8wlJabmgeBK\",\n        \"cellView\": \"form\"\n      },\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"pqE0PrnuRqI2\",\n        \"cellView\": \"form\"\n      },\n      \"outputs\": [],\n      \"source\": [\n        \"#@title Install requirements\\n\",\n        \"!pip install -r requirements.txt\\n\",\n        \"!apt -y install -qq aria2\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"UG3XpUwEomUz\",\n        \"cellView\": \"form\"\n      },\n      \"outputs\": [],\n      \"source\": [\n        \"#@title Download pretrained models\\n\",\n        \"\\n\",\n        \"# v1\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/pretrained -o D32k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/pretrained -o D40k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/pretrained -o D48k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/pretrained -o G32k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/pretrained -o G40k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/pretrained -o G48k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/pretrained -o f0D32k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/pretrained -o f0D40k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/pretrained -o f0D48k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/pretrained -o f0G32k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/pretrained -o f0G40k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/pretrained -o f0G48k.pth\\n\",\n        \"\\n\",\n        \"# v2\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d /content/pretrained_v2 -o D40k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d /content/pretrained_v2 -o G40k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d /content/pretrained_v2 -o f0D40k.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d /content/pretrained_v2 -o f0G40k.pth\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"HugjmZqZRuiF\",\n        \"cellView\": \"form\"\n      },\n      \"outputs\": [],\n      \"source\": [\n        \"#@title Download Vocal Separation model\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"2RCaT9FTR0ej\",\n        \"cellView\": \"form\"\n      },\n      \"outputs\": [],\n      \"source\": [\n        \"#@title Download hubert_base model\\n\",\n        \"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content -o hubert_base.pt\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"7vh6vphDwO0b\",\n        \"cellView\": \"form\"\n      },\n      \"outputs\": [],\n      \"source\": [\n        \"#@title Run WebUI for Training, skip if Inference\\n\",\n        \"if MODE == 'Training':\\n\",\n        \"  # %load_ext tensorboard\\n\",\n        \"  # %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\\n\",\n        \"  !python3 infer-web.py --colab --pycmd python3\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"execution_count\": null,\n      \"metadata\": {\n        \"id\": \"FgJuNeAwx5Y_\",\n        \"cellView\": \"form\"\n      },\n      \"outputs\": [],\n      \"source\": [\n        \"#@title Manually back up the trained model files to Google Drive for Mode: \\\"Training\\\"\\n\",\n        \"#@markdown MODELNAME should be the EXPERIMENT_NAME that you typed.\\n\",\n        \"\\n\",\n        \"#@markdown If the name of the model is john, and in the logs/john folder is a file called D_2333333.pth, set the MODELNAME: john and MODELEPOCH: 2333333\\n\",\n        \"\\n\",\n        \"if MODE == 'Training':\\n\",\n        \"\\n\",\n        \"#@markdown Model name\\n\",\n        \"  MODELNAME = \\\"\\\"  #@param {type:\\\"string\\\"}\\n\",\n        \"  if MODELNAME:\\n\",\n        \"#@markdown Epoch number\\n\",\n        \"    MODELEPOCH = 2333333  #@param {type:\\\"integer\\\"}\\n\",\n        \"#@markdown Save intermediate models if you would like to continue training the model later\\n\",\n        \"    SAVE_INTERMEDIATE = True  #@param {type:\\\"boolean\\\"}\\n\",\n        \"\\n\",\n        \"    !mkdir -p /content/drive/MyDrive/{MODELNAME}_files\\n\",\n        \"\\n\",\n        \"    if SAVE_INTERMEDIATE:\\n\",\n        \"      !cp /content/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_files/\\n\",\n        \"      !cp /content/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_files/\\n\",\n        \"      !cp /content/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/{MODELNAME}_files/\\n\",\n        \"\\n\",\n        \"    !cp /content/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/{MODELNAME}_files/\\n\",\n        \"    !cp /content/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}_files/\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"source\": [\n        \"# Inference\\n\",\n        \"\\n\",\n        \"The code below is to be run for Mode: Inference\\n\",\n        \"\\n\",\n        \"When prompted `Proceed (Y/n)?`, click beside it, type `Y` and press `Enter`.\\n\",\n        \"\\n\",\n        \"If `WARNING: The following packages were previously imported in this runtime: [numpy] You must restart the runtime in order to use newly installed versions.` is seen in the output, click `Restart Runtime` and then continue running the next cell.\"\n      ],\n      \"metadata\": {\n        \"id\": \"0JTjkTndoHb6\"\n      }\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"#@title Install specific numpy version. If needed, click Restart Runtime before running the bottom two cells.\\n\",\n        \"if MODE == 'Inference':\\n\",\n        \"  !pip uninstall numpy\\n\",\n        \"  !pip install numpy==1.23.5\"\n      ],\n      \"metadata\": {\n        \"id\": \"cfJV7kbKqtnw\",\n        \"cellView\": \"form\"\n      },\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"try:\\n\",\n        \"  MODE\\n\",\n        \"except NameError:\\n\",\n        \"  MODE = 'Inference'\\n\",\n        \"\\n\",\n        \"#@title Set your NGROK_AUTH_TOKEN.\\n\",\n        \"if MODE == 'Inference':\\n\",\n        \"  !pip install flask-ngrok3 -q\\n\",\n        \"\\n\",\n        \"#@markdown Obtain your Ngrok auth token from [here](https://dashboard.ngrok.com/get-started/your-authtoken)\\n\",\n        \"  NGROK_AUTH_TOKEN = '' #@param {type:\\\"string\\\"}\"\n      ],\n      \"metadata\": {\n        \"id\": \"OL-1YvcbOiWd\",\n        \"cellView\": \"form\"\n      },\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"source\": [\n        \"if MODE != 'Inference':\\n\",\n        \"  raise Exception('Mode is not set to Inference.')\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"#@title Run RVC Inference server\\n\",\n        \"import json\\n\",\n        \"import sys\\n\",\n        \"import wave\\n\",\n        \"from pathlib import Path\\n\",\n        \"\\n\",\n        \"BASE_DIR = Path('/content')\\n\",\n        \"sys.path.append(str(BASE_DIR))\\n\",\n        \"\\n\",\n        \"import torch\\n\",\n        \"from multiprocessing import cpu_count\\n\",\n        \"from flask_ngrok3 import run_with_ngrok\\n\",\n        \"from flask import Flask, request, send_file\\n\",\n        \"\\n\",\n        \"from vc_infer_pipeline import VC\\n\",\n        \"from infer_pack.models import (\\n\",\n        \"    SynthesizerTrnMs256NSFsid,\\n\",\n        \"    SynthesizerTrnMs256NSFsid_nono,\\n\",\n        \"    SynthesizerTrnMs768NSFsid,\\n\",\n        \"    SynthesizerTrnMs768NSFsid_nono,\\n\",\n        \")\\n\",\n        \"from my_utils import load_audio\\n\",\n        \"from fairseq import checkpoint_utils\\n\",\n        \"from scipy.io import wavfile\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"INPUT_VOICE_PATH = 'input.mp3'\\n\",\n        \"OUTPUT_VOICE_PATH = 'output.wav'\\n\",\n        \"MODEL_NAME = ''\\n\",\n        \"DEVICE = 'cuda:0'\\n\",\n        \"cpt = None\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"class Config:\\n\",\n        \"  def __init__(self, device, is_half):\\n\",\n        \"    self.device = device\\n\",\n        \"    self.is_half = is_half\\n\",\n        \"    self.n_cpu = 0\\n\",\n        \"    self.gpu_name = None\\n\",\n        \"    self.gpu_mem = None\\n\",\n        \"    self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()\\n\",\n        \"\\n\",\n        \"  def device_config(self) -> tuple:\\n\",\n        \"    if torch.cuda.is_available():\\n\",\n        \"      i_device = int(self.device.split(\\\":\\\")[-1])\\n\",\n        \"      self.gpu_name = torch.cuda.get_device_name(i_device)\\n\",\n        \"\\n\",\n        \"      if (\\n\",\n        \"        (\\\"16\\\" in self.gpu_name and \\\"V100\\\" not in self.gpu_name.upper())\\n\",\n        \"        or \\\"P40\\\" in self.gpu_name.upper()\\n\",\n        \"        or \\\"1060\\\" in self.gpu_name\\n\",\n        \"        or \\\"1070\\\" in self.gpu_name\\n\",\n        \"        or \\\"1080\\\" in self.gpu_name\\n\",\n        \"      ):\\n\",\n        \"        print(\\\"16 series/10 series P40 forced single precision\\\")\\n\",\n        \"        self.is_half = False\\n\",\n        \"        for config_file in [\\\"32k.json\\\", \\\"40k.json\\\", \\\"48k.json\\\"]:\\n\",\n        \"          with open(f\\\"configs/{config_file}\\\", \\\"r\\\") as f:\\n\",\n        \"            strr = f.read().replace(\\\"true\\\", \\\"false\\\")\\n\",\n        \"          with open(f\\\"configs/{config_file}\\\", \\\"w\\\") as f:\\n\",\n        \"            f.write(strr)\\n\",\n        \"        with open(\\\"trainset_preprocess_pipeline_print.py\\\", \\\"r\\\") as f:\\n\",\n        \"          strr = f.read().replace(\\\"3.7\\\", \\\"3.0\\\")\\n\",\n        \"        with open(\\\"trainset_preprocess_pipeline_print.py\\\", \\\"w\\\") as f:\\n\",\n        \"          f.write(strr)\\n\",\n        \"      else:\\n\",\n        \"        self.gpu_name = None\\n\",\n        \"\\n\",\n        \"      self.gpu_mem = int(\\n\",\n        \"        torch.cuda.get_device_properties(i_device).total_memory\\n\",\n        \"        / 1024\\n\",\n        \"        / 1024\\n\",\n        \"        / 1024\\n\",\n        \"        + 0.4\\n\",\n        \"      )\\n\",\n        \"      if self.gpu_mem <= 4:\\n\",\n        \"        with open(\\\"trainset_preprocess_pipeline_print.py\\\", \\\"r\\\") as f:\\n\",\n        \"          strr = f.read().replace(\\\"3.7\\\", \\\"3.0\\\")\\n\",\n        \"        with open(\\\"trainset_preprocess_pipeline_print.py\\\", \\\"w\\\") as f:\\n\",\n        \"          f.write(strr)\\n\",\n        \"\\n\",\n        \"    elif torch.backends.mps.is_available():\\n\",\n        \"      print(\\\"No supported N-card found, use MPS for inference\\\")\\n\",\n        \"      self.device = \\\"mps\\\"\\n\",\n        \"    else:\\n\",\n        \"      print(\\\"No supported N-card found, use CPU for inference\\\")\\n\",\n        \"      self.device = \\\"cpu\\\"\\n\",\n        \"      self.is_half = True\\n\",\n        \"\\n\",\n        \"    if self.n_cpu == 0:\\n\",\n        \"      self.n_cpu = cpu_count()\\n\",\n        \"\\n\",\n        \"    if self.is_half:\\n\",\n        \"      # 6G memory config\\n\",\n        \"      x_pad = 3\\n\",\n        \"      x_query = 10\\n\",\n        \"      x_center = 60\\n\",\n        \"      x_max = 65\\n\",\n        \"    else:\\n\",\n        \"      # 5G memory config\\n\",\n        \"      x_pad = 1\\n\",\n        \"      x_query = 6\\n\",\n        \"      x_center = 38\\n\",\n        \"      x_max = 41\\n\",\n        \"\\n\",\n        \"    if self.gpu_mem != None and self.gpu_mem <= 4:\\n\",\n        \"      x_pad = 1\\n\",\n        \"      x_query = 5\\n\",\n        \"      x_center = 30\\n\",\n        \"      x_max = 32\\n\",\n        \"\\n\",\n        \"    return x_pad, x_query, x_center, x_max\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"CONFIG = Config(DEVICE, True)\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def load_hubert():\\n\",\n        \"  models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(['hubert_base.pt'], suffix='', )\\n\",\n        \"  hubert = models[0]\\n\",\n        \"  hubert = hubert.to(DEVICE)\\n\",\n        \"\\n\",\n        \"  if True:\\n\",\n        \"    hubert = hubert.half()\\n\",\n        \"  else:\\n\",\n        \"    hubert = hubert.float()\\n\",\n        \"\\n\",\n        \"  hubert.eval()\\n\",\n        \"  return hubert\\n\",\n        \"\\n\",\n        \"HUBERT_MODEL = load_hubert()\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def get_vc(device, is_half, config):\\n\",\n        \"  global cpt, version, net_g, tgt_sr, vc\\n\",\n        \"  model_path = BASE_DIR / 'weights' / f'{MODEL_NAME}.pth'\\n\",\n        \"  if not model_path.exists():\\n\",\n        \"    print(f'The model {model_path} does not exist. Please ensure that you have filled in the proper MODEL_NAME in your .env file.')\\n\",\n        \"    return None\\n\",\n        \"\\n\",\n        \"  model_path = str(model_path)\\n\",\n        \"  print(f'loading pth {model_path}')\\n\",\n        \"  cpt = torch.load(model_path, map_location='cpu')\\n\",\n        \"  tgt_sr = cpt[\\\"config\\\"][-1]\\n\",\n        \"  cpt[\\\"config\\\"][-3] = cpt[\\\"weight\\\"][\\\"emb_g.weight\\\"].shape[0]\\n\",\n        \"  if_f0 = cpt.get(\\\"f0\\\", 1)\\n\",\n        \"  version = cpt.get(\\\"version\\\", \\\"v1\\\")\\n\",\n        \"\\n\",\n        \"  if version == \\\"v1\\\":\\n\",\n        \"    if if_f0 == 1:\\n\",\n        \"      net_g = SynthesizerTrnMs256NSFsid(*cpt[\\\"config\\\"], is_half=is_half)\\n\",\n        \"    else:\\n\",\n        \"      net_g = SynthesizerTrnMs256NSFsid_nono(*cpt[\\\"config\\\"])\\n\",\n        \"  elif version == \\\"v2\\\":\\n\",\n        \"    if if_f0 == 1:\\n\",\n        \"      net_g = SynthesizerTrnMs768NSFsid(*cpt[\\\"config\\\"], is_half=is_half)\\n\",\n        \"    else:\\n\",\n        \"      net_g = SynthesizerTrnMs768NSFsid_nono(*cpt[\\\"config\\\"])\\n\",\n        \"\\n\",\n        \"  del net_g.enc_q\\n\",\n        \"  print(net_g.load_state_dict(cpt[\\\"weight\\\"], strict=False))\\n\",\n        \"  net_g.eval().to(device)\\n\",\n        \"\\n\",\n        \"  if is_half:\\n\",\n        \"    net_g = net_g.half()\\n\",\n        \"  else:\\n\",\n        \"    net_g = net_g.float()\\n\",\n        \"\\n\",\n        \"  vc = VC(tgt_sr, config)\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"def rvc_infer(pitch_change, pitch_extraction_algo, volume_envelope, index_rate):\\n\",\n        \"  logs_dir = BASE_DIR / 'logs' / MODEL_NAME\\n\",\n        \"  index_path = ''\\n\",\n        \"  for file in logs_dir.iterdir():\\n\",\n        \"    if file.suffix == '.index':\\n\",\n        \"      index_path = str(logs_dir / file.name)\\n\",\n        \"      break\\n\",\n        \"\\n\",\n        \"  # vc single\\n\",\n        \"  audio = load_audio(INPUT_VOICE_PATH, 16000)\\n\",\n        \"  times = [0, 0, 0]\\n\",\n        \"  if_f0 = cpt.get('f0', 1)\\n\",\n        \"  audio_opt = vc.pipeline(HUBERT_MODEL, net_g, 0, audio, INPUT_VOICE_PATH, times, pitch_change, pitch_extraction_algo, index_path, index_rate, if_f0, 3, tgt_sr, 0, volume_envelope, version, 0.33, f0_file=None)\\n\",\n        \"  wavfile.write(OUTPUT_VOICE_PATH, tgt_sr, audio_opt)\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"app = Flask(__name__)\\n\",\n        \"run_with_ngrok(app, auth_token=NGROK_AUTH_TOKEN)\\n\",\n        \"\\n\",\n        \"@app.route('/', methods=['GET'])\\n\",\n        \"def test():\\n\",\n        \"  response = {'status':'OK','message':'Test'}\\n\",\n        \"  return json.dumps(response)\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"@app.route('/infer', methods=['POST'])\\n\",\n        \"def infer():\\n\",\n        \"  global MODEL_NAME, cpt\\n\",\n        \"  model_name = request.args.get('model')\\n\",\n        \"  if MODEL_NAME != model_name:\\n\",\n        \"    MODEL_NAME = model_name\\n\",\n        \"    if cpt:\\n\",\n        \"      del cpt\\n\",\n        \"    get_vc(DEVICE, True, CONFIG)\\n\",\n        \"\\n\",\n        \"  pitch_change = int(request.args.get('pitch'))\\n\",\n        \"  pitch_extraction_algo = request.args.get('algo')\\n\",\n        \"  volume_envelope = float(request.args.get('volume'))\\n\",\n        \"  index_rate = float(request.args.get('index_rate'))\\n\",\n        \"  audio_data = request.files['audio_file']\\n\",\n        \"  audio_data.save(INPUT_VOICE_PATH)\\n\",\n        \"  rvc_infer(pitch_change, pitch_extraction_algo, volume_envelope, index_rate)\\n\",\n        \"  return send_file(OUTPUT_VOICE_PATH, mimetype=\\\"audio/wav\\\")\\n\",\n        \"\\n\",\n        \"app.run()\"\n      ],\n      \"metadata\": {\n        \"id\": \"3qcqylOdMDFg\",\n        \"cellView\": \"form\"\n      },\n      \"execution_count\": null,\n      \"outputs\": []\n    }\n  ],\n  \"metadata\": {\n    \"accelerator\": \"GPU\",\n    \"colab\": {\n      \"private_outputs\": true,\n      \"provenance\": [],\n      \"gpuType\": \"T4\"\n    },\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"name\": \"python3\"\n    },\n    \"language_info\": {\n      \"name\": \"python\"\n    }\n  },\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0\n}\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2023 SociallyIneptWeeb\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# AniVoiceChanger\n\n![](thumbnail.jpg?raw=true)\n\nAn \"extension\" for Retrieval-based Voice Conversion WebUI. Provides a way to record your voice, convert it using a trained voice model, and output it in voice-chat of any application without running the webui.\n\nShowcase: https://www.youtube.com/watch?v=C-PqTbh0LxY\n\nSetup Guide: https://www.youtube.com/watch?v=K4vVW7iA1w8\n\n## Setup\n\n### Prerequisites\n\n#### Install Git\n\nFollow the instructions [here](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) to install Git on your computer.\n\n#### Install 7-Zip\n\nDownload and Install the 7-Zip application from [here](https://www.7-zip.org/download.html).\n\nThis is used to extract the zipped RVC WebUI application after it has been downloaded.\n\n#### Install Virtual Audio Cable\n\nDownload and Install VB-CABLE Driver from [here](https://vb-audio.com/Cable/) by extracting all files and Run Setup Program in administrator mode. Reboot after installation.\n\nThis is used to pipe the converted voice audio into the audio input of apps.\n\n\n### Install RVC WebUI\n\nIf you haven't installed the RVC WebUI, download the RVC-beta.7z file from [here](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/RVC-beta.7z) and extract it using 7-Zip into a folder of your choosing. It will take around 8GB of space, not including any voice models that you may train later on.\n\n\n### Clone AniVoiceChanger repository\n\nWithin the extracted RVC-beta folder (Should have a bunch of folders and files), open a command line window and run this command to clone this entire repository and install the additional dependencies required for this extension.\n\n```\ngit clone https://github.com/SociallyIneptWeeb/AniVoiceChanger\nruntime\\python.exe -m pip install -r AniVoiceChanger\\extra_requirements.txt\n```\n\n### Filling in your Environment Variables in the .env file\n\nFollow the instructions written in the .env file and fill in the appropriate values. If unsure, please refer to [this section](https://www.youtube.com/watch?v=K4vVW7iA1w8&t=728s) of the setup video.\n\n\n## Usage\n\nThis program assumes that you have already trained a voice model, with the model file in the weights folder. If you have not done so, please refer to [this section](https://www.youtube.com/watch?v=K4vVW7iA1w8&t=457s) of the setup video.\n\nRemember to change the audio input device of the game or application you are using to Cable Output (VB-Audio Virtual Cable).\n\nThere are 2 ways to run this program, either locally or using google colab. If you have around 5 GB of GPU VRAM to spare, feel free to run this locally while using the `crepe` pitch extraction algorithm. If you only have around 3 GB of GPU VRAM, you can also run this locally while using the `pm` pitch extraction algorithm. If none of these requirements are met, you should run this using Google Colab.\n\n### Local\n\nTo start the program, open a command line window in the extracted RVC-beta folder (Should have a bunch of folders and files) and run this command.\n\n```runtime\\python.exe AniVoiceChanger\\main_local.py```\n> Do note that every time a variable is updated in the `.env` file, you will have to rerun this command for the changes to take into effect. E.g. when changing the model name.\n\nNow, hold the RECORD_KEY as defined in your .env file on your keyboard and speak into your mic. For the first time, this might take around 5 seconds to generate and play the voice. For consecutive uses, the time taken will be drastically reduced with caching. The voice will be played into the Cable Output audio device and your speakers as defined in the `.env` file. The generated voice will also be written into [this folder](audio/) as `output.wav` file. \n\n### Google Colab\n\nGo to [AniVoiceChanger_colab.ipynb](AniVoiceChanger_colab.ipynb) file in Github and click on `Open in Colab` badge. This will open a Colab notebook. Follow the instructions in the notebook to either train a voice model, or run the RVC Inference server.\n\nIf you have already uploaded a trained voice model to the Colab runtime and it has started running the inference server, the output of the last cell should display a Ngrok public url. Copy and paste this url into the `COLAB_URL` environment variable in your `.env` file. After all your environment variables are properly set, open a command line window in the extracted RVC-beta folder (Should have a bunch of folders and files) and run this command. Do note that every time a variable is updated in the `.env` file, you will have to rerun this command for the changes to take into effect. E.g. when changing the model name.\n\n```runtime\\python.exe AniVoiceChanger\\main_colab.py```\n> Do note that every time a variable is updated in the `.env` file, you will have to rerun this command for the changes to take into effect. E.g. when changing the model name.\n\nNow, hold the RECORD_KEY as defined in your .env file on your keyboard and speak into your mic. For the first time, this might take around 10 seconds to generate and play the voice. For consecutive uses, the time taken will be drastically reduced with caching. The voice will be played into the Cable Output audio device and your speakers as defined in the `.env` file. The generated voice will also be written into [this folder](audio/) as `output.wav` file.\n\n## Terms of Use\n\nThe use of the converted voice for the following purposes is prohibited.\n\n* Criticizing or attacking individuals.\n\n* Advocating for or opposing specific political positions, religions, or ideologies.\n\n* Publicly displaying strongly stimulating expressions without proper zoning.\n\n* Selling of voice models and generated voice clips.\n\n* Impersonation of the original owner of the voice with malicious intentions to harm/hurt others.\n\n* Fraudulent purposes that lead to identity theft or fraudulent phone calls.\n\n## Disclaimer\n\nI am not liable for any direct, indirect, consequential, incidental, or special damages arising out of or in any way connected with the use/misuse or inability to use this software.\n"
  },
  {
    "path": "audio/AUDIO.txt",
    "content": "Input and Output audio files are created and stored here."
  },
  {
    "path": "extra_requirements.txt",
    "content": "keyboard\npyaudio\npython-dotenv"
  },
  {
    "path": "get_audio_devices.py",
    "content": "import sounddevice as sd\n\n\nif __name__ == '__main__':\n    for device in sd.query_devices():\n        print(f\"{device['index']}: {device['name']}\")"
  },
  {
    "path": "main_colab.py",
    "content": "import sys\nimport wave\nfrom pathlib import Path\nfrom time import sleep, time\nfrom os import getenv\nfrom urllib.parse import urlencode\n\nimport keyboard\nfrom threading import Thread\n\nBASE_DIR = Path(__file__).resolve().parent.parent\nsys.path.append(str(BASE_DIR))\n\nimport pyaudio\nimport requests\nimport sounddevice as sd\nimport soundfile as sf\nfrom dotenv import load_dotenv\n\n\n# load environment variables\nload_dotenv()\nCOLAB_URL = getenv('COLAB_URL')\nMODEL_NAME = getenv('MODEL_NAME')\nif MODEL_NAME.endswith('.pth'):\n    MODEL_NAME = MODEL_NAME[:-4]\nPITCH_CHANGE = int(getenv('PITCH_CHANGE'))\nVOLUME_ENVELOPE = float(getenv('VOLUME_ENVELOPE'))\nINDEX_RATE = float(getenv('INDEX_RATE')) if getenv('INDEX_RATE') else 0\nPITCH_EXTRACTION_ALGO = getenv('PITCH_EXTRACTION_ALGO')\nGPU_INDEX = getenv('GPU_INDEX')\nMIC_RECORD_KEY = getenv('MIC_RECORD_KEY')\nINGAME_PUSH_TO_TALK_KEY = getenv('INGAME_PUSH_TO_TALK_KEY')\nMICROPHONE_ID = int(getenv('MICROPHONE_ID')) if getenv('MICROPHONE_ID') else None\nSPEAKERS_INPUT_ID = int(getenv('SPEAKERS_INPUT_ID')) if getenv('SPEAKERS_INPUT_ID') else None\n\n\ndef rvc_infer_colab():\n    params_encoded = urlencode({'model': MODEL_NAME, 'pitch': PITCH_CHANGE, 'algo': PITCH_EXTRACTION_ALGO, 'volume': VOLUME_ENVELOPE, 'index_rate': INDEX_RATE})\n\n    with open(INPUT_VOICE_PATH, 'rb') as infile:\n        files = {'audio_file': infile}\n        r = requests.post(f'{COLAB_URL}/infer?{params_encoded}', files=files)\n    \n    with open(OUTPUT_VOICE_PATH, 'wb') as outfile:\n        outfile.write(r.content)\n\n\ndef play_voice(device_id):\n    data, fs = sf.read(OUTPUT_VOICE_PATH, dtype='float32')\n\n    if INGAME_PUSH_TO_TALK_KEY:\n        keyboard.press(INGAME_PUSH_TO_TALK_KEY)\n\n    sd.play(data, fs, device=device_id)\n    sd.wait()\n\n    if INGAME_PUSH_TO_TALK_KEY:\n        keyboard.release(INGAME_PUSH_TO_TALK_KEY)\n\n\ndef on_press_key(_):\n    global frames, recording, stream\n    if not recording:\n        print('\\nRecording has started.')\n        frames = []\n        recording = True\n        stream = p.open(format=FORMAT,\n                        channels=MIC_CHANNELS,\n                        rate=MIC_SAMPLING_RATE,\n                        input=True,\n                        frames_per_buffer=CHUNK,\n                        input_device_index=MICROPHONE_ID)\n\n\ndef on_release_key(_):\n    global recording, stream\n    recording = False\n    stream.stop_stream()\n    stream.close()\n    stream = None\n\n    # if key not held down for long enough\n    if not frames or len(frames) < 20:\n        print('No audio file to transcribe detected. Hold down the key for a longer time.')\n        return\n\n    print('Converting voice...')\n\n    start_time = time()\n    # write microphone audio to file\n    wf = wave.open(str(INPUT_VOICE_PATH), 'wb')\n    wf.setnchannels(MIC_CHANNELS)\n    wf.setsampwidth(p.get_sample_size(FORMAT))\n    wf.setframerate(MIC_SAMPLING_RATE)\n    wf.writeframes(b''.join(frames))\n    wf.close()\n\n    # voice change\n    rvc_infer_colab()\n    print(f'Time taken for RVC voice conversion: {time() - start_time}s')\n\n    # play to both app mic input and speakers\n    threads = [Thread(target=play_voice, args=[CABLE_INPUT_ID]), Thread(target=play_voice, args=[SPEAKERS_INPUT_ID])]\n    [t.start() for t in threads]\n    [t.join() for t in threads]\n\n\nif __name__ == '__main__':\n    INPUT_VOICE_PATH = str(BASE_DIR / 'AniVoiceChanger' / 'audio' / 'input.mp3')\n    OUTPUT_VOICE_PATH = str(BASE_DIR / 'AniVoiceChanger' / 'audio' / 'output.wav')\n    CHUNK = 1024\n    FORMAT = pyaudio.paInt16\n\n    p = pyaudio.PyAudio()\n    if MICROPHONE_ID is None:\n        MICROPHONE_ID = p.get_default_input_device_info()['index']\n\n    if SPEAKERS_INPUT_ID is None:\n        SPEAKERS_INPUT_ID = p.get_default_output_device_info()['index']\n\n    CABLE_INPUT_ID = None\n    for audio_device in sd.query_devices():\n        if 'CABLE Input' in audio_device['name']:\n            CABLE_INPUT_ID = audio_device['index']\n            break\n\n    if not CABLE_INPUT_ID:\n        print('Virtual audio cable was not found. Please download and install it.')\n        sys.exit()\n\n    # get channels and sampling rate of mic\n    mic_info = p.get_device_info_by_index(MICROPHONE_ID)\n    MIC_CHANNELS = mic_info['maxInputChannels']\n    MIC_SAMPLING_RATE = 40000\n\n    print('Voice changer is booting up...')\n\n    frames = []\n    recording = False\n    stream = None\n\n    keyboard.on_press_key(MIC_RECORD_KEY, on_press_key)\n    keyboard.on_release_key(MIC_RECORD_KEY, on_release_key)\n\n    try:\n        print('Voice changer is ready.')\n        while True:\n            if recording and stream:\n                data = stream.read(CHUNK)\n                frames.append(data)\n            else:\n                sleep(0.2)\n\n    except KeyboardInterrupt:\n        print('Closing voice changer...')\n"
  },
  {
    "path": "main_local.py",
    "content": "import sys\nimport wave\nfrom pathlib import Path\nfrom time import sleep, time\nfrom os import getenv\n\nimport keyboard\nfrom threading import Thread\n\nBASE_DIR = Path(__file__).resolve().parent.parent\nsys.path.append(str(BASE_DIR))\n\nimport torch\nimport pyaudio\nimport sounddevice as sd\nimport soundfile as sf\nfrom multiprocessing import cpu_count\nfrom dotenv import load_dotenv\n\nfrom vc_infer_pipeline import VC\nfrom infer_pack.models import (\n    SynthesizerTrnMs256NSFsid,\n    SynthesizerTrnMs256NSFsid_nono,\n    SynthesizerTrnMs768NSFsid,\n    SynthesizerTrnMs768NSFsid_nono,\n)\nfrom my_utils import load_audio\nfrom fairseq import checkpoint_utils\nfrom scipy.io import wavfile\n\n\n# load environment variables\nload_dotenv()\nMODEL_NAME = getenv('MODEL_NAME')\nif MODEL_NAME.endswith('.pth'):\n    MODEL_NAME = MODEL_NAME[:-4]\nPITCH_CHANGE = int(getenv('PITCH_CHANGE'))\nVOLUME_ENVELOPE = float(getenv('VOLUME_ENVELOPE'))\nINDEX_RATE = float(getenv('INDEX_RATE')) if getenv('INDEX_RATE') else 0\nPITCH_EXTRACTION_ALGO = getenv('PITCH_EXTRACTION_ALGO')\nGPU_INDEX = getenv('GPU_INDEX')\nMIC_RECORD_KEY = getenv('MIC_RECORD_KEY')\nINGAME_PUSH_TO_TALK_KEY = getenv('INGAME_PUSH_TO_TALK_KEY')\nMICROPHONE_ID = int(getenv('MICROPHONE_ID')) if getenv('MICROPHONE_ID') else None\nSPEAKERS_INPUT_ID = int(getenv('SPEAKERS_INPUT_ID')) if getenv('SPEAKERS_INPUT_ID') else None\n\n\nclass Config:\n    def __init__(self, device, is_half):\n        self.device = device\n        self.is_half = is_half\n        self.n_cpu = 0\n        self.gpu_name = None\n        self.gpu_mem = None\n        self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()\n\n    def device_config(self) -> tuple:\n        if torch.cuda.is_available():\n            i_device = int(self.device.split(\":\")[-1])\n            self.gpu_name = torch.cuda.get_device_name(i_device)\n            if (\n                    (\"16\" in self.gpu_name and \"V100\" not in self.gpu_name.upper())\n                    or \"P40\" in self.gpu_name.upper()\n                    or \"1060\" in self.gpu_name\n                    or \"1070\" in self.gpu_name\n                    or \"1080\" in self.gpu_name\n            ):\n                print(\"16 series/10 series P40 forced single precision\")\n                self.is_half = False\n                for config_file in [\"32k.json\", \"40k.json\", \"48k.json\"]:\n                    with open(f\"configs/{config_file}\", \"r\") as f:\n                        strr = f.read().replace(\"true\", \"false\")\n                    with open(f\"configs/{config_file}\", \"w\") as f:\n                        f.write(strr)\n                with open(\"trainset_preprocess_pipeline_print.py\", \"r\") as f:\n                    strr = f.read().replace(\"3.7\", \"3.0\")\n                with open(\"trainset_preprocess_pipeline_print.py\", \"w\") as f:\n                    f.write(strr)\n            else:\n                self.gpu_name = None\n            self.gpu_mem = int(\n                torch.cuda.get_device_properties(i_device).total_memory\n                / 1024\n                / 1024\n                / 1024\n                + 0.4\n            )\n            if self.gpu_mem <= 4:\n                with open(\"trainset_preprocess_pipeline_print.py\", \"r\") as f:\n                    strr = f.read().replace(\"3.7\", \"3.0\")\n                with open(\"trainset_preprocess_pipeline_print.py\", \"w\") as f:\n                    f.write(strr)\n        elif torch.backends.mps.is_available():\n            print(\"No supported N-card found, use MPS for inference\")\n            self.device = \"mps\"\n        else:\n            print(\"No supported N-card found, use CPU for inference\")\n            self.device = \"cpu\"\n            self.is_half = True\n\n        if self.n_cpu == 0:\n            self.n_cpu = cpu_count()\n\n        if self.is_half:\n            # 6G memory config\n            x_pad = 3\n            x_query = 10\n            x_center = 60\n            x_max = 65\n        else:\n            # 5G memory config\n            x_pad = 1\n            x_query = 6\n            x_center = 38\n            x_max = 41\n\n        if self.gpu_mem != None and self.gpu_mem <= 4:\n            x_pad = 1\n            x_query = 5\n            x_center = 30\n            x_max = 32\n\n        return x_pad, x_query, x_center, x_max\n\n\ndef load_hubert():\n    models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(['hubert_base.pt'], suffix='', )\n    hubert = models[0]\n    hubert = hubert.to(device)\n\n    if is_half:\n        hubert = hubert.half()\n    else:\n        hubert = hubert.float()\n\n    hubert.eval()\n    return hubert\n\n\ndef get_vc():\n    model_path = BASE_DIR / 'weights' / f'{MODEL_NAME}.pth'\n    if not model_path.exists():\n        print(f'The model {model_path} does not exist. Please ensure that you have filled in the proper MODEL_NAME in your .env file.')\n        raise Exception()\n\n    model_path = str(model_path)\n    print(f'loading pth {model_path}')\n    cpt = torch.load(model_path, map_location='cpu')\n    tgt_sr = cpt[\"config\"][-1]\n    cpt[\"config\"][-3] = cpt[\"weight\"][\"emb_g.weight\"].shape[0]\n    if_f0 = cpt.get(\"f0\", 1)\n    version = cpt.get(\"version\", \"v1\")\n\n    if version == \"v1\":\n        if if_f0 == 1:\n            net_g = SynthesizerTrnMs256NSFsid(*cpt[\"config\"], is_half=is_half)\n        else:\n            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt[\"config\"])\n    elif version == \"v2\":\n        if if_f0 == 1:\n            net_g = SynthesizerTrnMs768NSFsid(*cpt[\"config\"], is_half=is_half)\n        else:\n            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt[\"config\"])\n\n    del net_g.enc_q\n    print(net_g.load_state_dict(cpt[\"weight\"], strict=False))\n    net_g.eval().to(device)\n\n    if is_half:\n        net_g = net_g.half()\n    else:\n        net_g = net_g.float()\n\n    vc = VC(tgt_sr, config)\n    return cpt, version, net_g, tgt_sr, vc\n\n\ndef rvc_infer():\n    logs_dir = BASE_DIR / 'logs' / MODEL_NAME\n    index_path = ''\n    for file in logs_dir.iterdir():\n        if file.suffix == '.index':\n            index_path = str(logs_dir / file.name)\n            break\n    \n    # vc single\n    audio = load_audio(INPUT_VOICE_PATH, 16000)\n    times = [0, 0, 0]\n    if_f0 = cpt.get('f0', 1)\n    audio_opt = vc.pipeline(hubert_model, net_g, 0, audio, INPUT_VOICE_PATH, times, PITCH_CHANGE, PITCH_EXTRACTION_ALGO, index_path, INDEX_RATE, if_f0, 3, tgt_sr, 0, VOLUME_ENVELOPE, version, 0.33, f0_file=None)\n    wavfile.write(OUTPUT_VOICE_PATH, tgt_sr, audio_opt)\n\n\ndef play_voice(device_id):\n    data, fs = sf.read(OUTPUT_VOICE_PATH, dtype='float32')\n\n    if INGAME_PUSH_TO_TALK_KEY:\n        keyboard.press(INGAME_PUSH_TO_TALK_KEY)\n\n    sd.play(data, fs, device=device_id)\n    sd.wait()\n\n    if INGAME_PUSH_TO_TALK_KEY:\n        keyboard.release(INGAME_PUSH_TO_TALK_KEY)\n\n\ndef on_press_key(_):\n    global frames, recording, stream\n    if not recording:\n        print('\\nRecording has started.')\n        frames = []\n        recording = True\n        stream = p.open(format=FORMAT,\n                        channels=MIC_CHANNELS,\n                        rate=MIC_SAMPLING_RATE,\n                        input=True,\n                        frames_per_buffer=CHUNK,\n                        input_device_index=MICROPHONE_ID)\n\n\ndef on_release_key(_):\n    global recording, stream\n    recording = False\n    stream.stop_stream()\n    stream.close()\n    stream = None\n\n    # if key not held down for long enough\n    if not frames or len(frames) < 20:\n        print('No audio file to transcribe detected. Hold down the key for a longer time.')\n        return\n    \n    print('Converting voice...')\n\n    start_time = time()\n    # write microphone audio to file\n    wf = wave.open(str(INPUT_VOICE_PATH), 'wb')\n    wf.setnchannels(MIC_CHANNELS)\n    wf.setsampwidth(p.get_sample_size(FORMAT))\n    wf.setframerate(MIC_SAMPLING_RATE)\n    wf.writeframes(b''.join(frames))\n    wf.close()\n\n    # voice change\n    rvc_infer()\n    print(f'Time taken for RVC voice conversion: {time() - start_time}s')\n\n    # play to both app mic input and speakers\n    threads = [Thread(target=play_voice, args=[CABLE_INPUT_ID]), Thread(target=play_voice, args=[SPEAKERS_INPUT_ID])]\n    [t.start() for t in threads]\n    [t.join() for t in threads]\n\n\nif __name__ == '__main__':\n    device = f'cuda:{GPU_INDEX}'\n    is_half = True\n    config = Config(device, is_half)\n    INPUT_VOICE_PATH = str(BASE_DIR / 'AniVoiceChanger' / 'audio' / 'input.mp3')\n    OUTPUT_VOICE_PATH = str(BASE_DIR / 'AniVoiceChanger' / 'audio' / 'output.wav')\n    CHUNK = 1024\n    FORMAT = pyaudio.paInt16\n\n    p = pyaudio.PyAudio()\n    if MICROPHONE_ID is None:\n        MICROPHONE_ID = p.get_default_input_device_info()['index']\n\n    if SPEAKERS_INPUT_ID is None:\n        SPEAKERS_INPUT_ID = p.get_default_output_device_info()['index']\n\n    CABLE_INPUT_ID = None\n    for audio_device in sd.query_devices():\n        if 'CABLE Input' in audio_device['name']:\n            CABLE_INPUT_ID = audio_device['index']\n            break\n\n    if not CABLE_INPUT_ID:\n        print('Virtual audio cable was not found. Please download and install it.')\n        sys.exit()\n\n    # get channels and sampling rate of mic\n    mic_info = p.get_device_info_by_index(MICROPHONE_ID)\n    MIC_CHANNELS = mic_info['maxInputChannels']\n    MIC_SAMPLING_RATE = 40000\n\n    print('Voice changer is booting up...')\n\n    # load hubert model\n    hubert_model = load_hubert()\n\n    # get vc\n    cpt, version, net_g, tgt_sr, vc = get_vc()\n\n    frames = []\n    recording = False\n    stream = None\n\n    keyboard.on_press_key(MIC_RECORD_KEY, on_press_key)\n    keyboard.on_release_key(MIC_RECORD_KEY, on_release_key)\n\n    try:\n        print('Voice changer is ready.')\n        while True:\n            if recording and stream:\n                data = stream.read(CHUNK)\n                frames.append(data)\n            else:\n                sleep(0.2)\n\n    except KeyboardInterrupt:\n        print('Closing voice changer...')\n"
  }
]