Repository: pranauv1/AI-Video-Translation
Branch: main
Commit: 4c118612c174
Files: 2
Total size: 104.8 KB
Directory structure:
gitextract_a9rgujrs/
├── README.md
└── Video_Translation_&_Lipsync.ipynb
================================================
FILE CONTENTS
================================================
================================================
FILE: README.md
================================================
# AI-Video-Translation
(This project is not maintained anymore, feel free to fork and modify it!)<br>
A simple Google Colab notebook which can translate an original video into multiple languages along with lip sync.
<b>How it works?</b>
1: Upload video<br>
2: Extract audio and get text from the audio (OpenAI Whisper)<br>
3: Translate the text (Google Translate)<br>
4: Synthesize the translated text with the original voice. a.k.a Voice Cloning (coqui-ai TTS)<br>
5: Lip sync the synthesized audio with the original video clip (OpenTalker video-retalking || Wav2Lip)<br>
<i>The links for the repos mentioned above are given in the notebook itself.</i>
================================================
FILE: Video_Translation_&_Lipsync.ipynb
================================================
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"collapsed_sections": [
"0ByUfwRZvZ3V"
],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"3468a1aeea494572a0f1e12d73091dbc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "CheckboxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "CheckboxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "CheckboxView",
"description": "Resize to 720p (better results)",
"description_tooltip": null,
"disabled": false,
"indent": true,
"layout": "IPY_MODEL_b7dd8443fb7b49ecad505cc59ea81cd7",
"style": "IPY_MODEL_9bc52945fbbc4a5abb8a7ffc2833f3e5",
"value": false
}
},
"b7dd8443fb7b49ecad505cc59ea81cd7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9bc52945fbbc4a5abb8a7ffc2833f3e5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3268d0213a3f47ad90fe7188dcf122d0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Upload Video",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_77005cb45c154a1fa1cb7c3685bef50f",
"style": "IPY_MODEL_4d1d24afab4b49cd93a6fcfbc5e38887",
"tooltip": ""
}
},
"77005cb45c154a1fa1cb7c3685bef50f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4d1d24afab4b49cd93a6fcfbc5e38887": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"c6ce3c2d1bf34fce90117bc738b84320": {
"model_module": "@jupyter-widgets/output",
"model_name": "OutputModel",
"model_module_version": "1.0.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/output",
"_model_module_version": "1.0.0",
"_model_name": "OutputModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/output",
"_view_module_version": "1.0.0",
"_view_name": "OutputView",
"layout": "IPY_MODEL_37a21aa630f14cb0be6917bd853a4828",
"msg_id": "",
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "<IPython.core.display.HTML object>",
"text/html": "\n <input type=\"file\" id=\"files-a3c19670-c63b-4a3b-8e84-37d36ac78d70\" name=\"files[]\" multiple disabled\n style=\"border:none\" />\n <output id=\"result-a3c19670-c63b-4a3b-8e84-37d36ac78d70\">\n Upload widget is only available when the cell has been executed in the\n current browser session. Please rerun this cell to enable.\n </output>\n <script>// Copyright 2017 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n// http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n/**\n * @fileoverview Helpers for google.colab Python module.\n */\n(function(scope) {\nfunction span(text, styleAttributes = {}) {\n const element = document.createElement('span');\n element.textContent = text;\n for (const key of Object.keys(styleAttributes)) {\n element.style[key] = styleAttributes[key];\n }\n return element;\n}\n\n// Max number of bytes which will be uploaded at a time.\nconst MAX_PAYLOAD_SIZE = 100 * 1024;\n\nfunction _uploadFiles(inputId, outputId) {\n const steps = uploadFilesStep(inputId, outputId);\n const outputElement = document.getElementById(outputId);\n // Cache steps on the outputElement to make it available for the next call\n // to uploadFilesContinue from Python.\n outputElement.steps = steps;\n\n return _uploadFilesContinue(outputId);\n}\n\n// This is roughly an async generator (not supported in the browser yet),\n// where there are multiple asynchronous steps and the Python side is going\n// to poll for completion of each step.\n// This uses a Promise to block the python side on completion of each step,\n// then passes the result of the previous step as the input to the next step.\nfunction _uploadFilesContinue(outputId) {\n const outputElement = document.getElementById(outputId);\n const steps = outputElement.steps;\n\n const next = steps.next(outputElement.lastPromiseValue);\n return Promise.resolve(next.value.promise).then((value) => {\n // Cache the last promise value to make it available to the next\n // step of the generator.\n outputElement.lastPromiseValue = value;\n return next.value.response;\n });\n}\n\n/**\n * Generator function which is called between each async step of the upload\n * process.\n * @param {string} inputId Element ID of the input file picker element.\n * @param {string} outputId Element ID of the output display.\n * @return {!Iterable<!Object>} Iterable of next steps.\n */\nfunction* uploadFilesStep(inputId, outputId) {\n const inputElement = document.getElementById(inputId);\n inputElement.disabled = false;\n\n const outputElement = document.getElementById(outputId);\n outputElement.innerHTML = '';\n\n const pickedPromise = new Promise((resolve) => {\n inputElement.addEventListener('change', (e) => {\n resolve(e.target.files);\n });\n });\n\n const cancel = document.createElement('button');\n inputElement.parentElement.appendChild(cancel);\n cancel.textContent = 'Cancel upload';\n const cancelPromise = new Promise((resolve) => {\n cancel.onclick = () => {\n resolve(null);\n };\n });\n\n // Wait for the user to pick the files.\n const files = yield {\n promise: Promise.race([pickedPromise, cancelPromise]),\n response: {\n action: 'starting',\n }\n };\n\n cancel.remove();\n\n // Disable the input element since further picks are not allowed.\n inputElement.disabled = true;\n\n if (!files) {\n return {\n response: {\n action: 'complete',\n }\n };\n }\n\n for (const file of files) {\n const li = document.createElement('li');\n li.append(span(file.name, {fontWeight: 'bold'}));\n li.append(span(\n `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n `last modified: ${\n file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n 'n/a'} - `));\n const percent = span('0% done');\n li.appendChild(percent);\n\n outputElement.appendChild(li);\n\n const fileDataPromise = new Promise((resolve) => {\n const reader = new FileReader();\n reader.onload = (e) => {\n resolve(e.target.result);\n };\n reader.readAsArrayBuffer(file);\n });\n // Wait for the data to be ready.\n let fileData = yield {\n promise: fileDataPromise,\n response: {\n action: 'continue',\n }\n };\n\n // Use a chunked sending to avoid message size limits. See b/62115660.\n let position = 0;\n do {\n const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n const chunk = new Uint8Array(fileData, position, length);\n position += length;\n\n const base64 = btoa(String.fromCharCode.apply(null, chunk));\n yield {\n response: {\n action: 'append',\n file: file.name,\n data: base64,\n },\n };\n\n let percentDone = fileData.byteLength === 0 ?\n 100 :\n Math.round((position / fileData.byteLength) * 100);\n percent.textContent = `${percentDone}% done`;\n\n } while (position < fileData.byteLength);\n }\n\n // All done.\n yield {\n response: {\n action: 'complete',\n }\n };\n}\n\nscope.google = scope.google || {};\nscope.google.colab = scope.google.colab || {};\nscope.google.colab._files = {\n _uploadFiles,\n _uploadFilesContinue,\n};\n})(self);\n</script> "
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Saving Musk tells boycotting advertisers to ‘go f--- yourself’.mp4 to Musk tells boycotting advertisers to ‘go f--- yourself’.mp4\n",
"Uploaded Musk tells boycotting advertisers to ‘go f--- yourself’.mp4\n"
]
}
]
}
},
"37a21aa630f14cb0be6917bd853a4828": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"#Install the below and click \"Restart Session\" button.\n"
],
"metadata": {
"id": "NG9k9wHkDglO"
}
},
{
"cell_type": "code",
"source": [
"!pip install TTS\n",
"!pip install numpy==1.24.0"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "QepqAojrDfJR",
"outputId": "b570278f-5f76-4db9-fd8d-749d9651e121"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting TTS\n",
" Using cached TTS-0.22.0-cp310-cp310-manylinux1_x86_64.whl (938 kB)\n",
"Requirement already satisfied: cython>=0.29.30 in /usr/local/lib/python3.10/dist-packages (from TTS) (3.0.6)\n",
"Requirement already satisfied: scipy>=1.11.2 in /usr/local/lib/python3.10/dist-packages (from TTS) (1.11.4)\n",
"Requirement already satisfied: torch>=2.1 in /usr/local/lib/python3.10/dist-packages (from TTS) (2.1.0+cu121)\n",
"Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (from TTS) (2.1.0+cu121)\n",
"Requirement already satisfied: soundfile>=0.12.0 in /usr/local/lib/python3.10/dist-packages (from TTS) (0.12.1)\n",
"Requirement already satisfied: librosa>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from TTS) (0.10.1)\n",
"Collecting scikit-learn>=1.3.0 (from TTS)\n",
" Using cached scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)\n",
"Requirement already satisfied: inflect>=5.6.0 in /usr/local/lib/python3.10/dist-packages (from TTS) (7.0.0)\n",
"Requirement already satisfied: tqdm>=4.64.1 in /usr/local/lib/python3.10/dist-packages (from TTS) (4.66.1)\n",
"Collecting anyascii>=0.3.0 (from TTS)\n",
" Using cached anyascii-0.3.2-py3-none-any.whl (289 kB)\n",
"Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from TTS) (6.0.1)\n",
"Requirement already satisfied: fsspec>=2023.6.0 in /usr/local/lib/python3.10/dist-packages (from TTS) (2023.6.0)\n",
"Requirement already satisfied: aiohttp>=3.8.1 in /usr/local/lib/python3.10/dist-packages (from TTS) (3.9.1)\n",
"Requirement already satisfied: packaging>=23.1 in /usr/local/lib/python3.10/dist-packages (from TTS) (23.2)\n",
"Requirement already satisfied: flask>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from TTS) (2.2.5)\n",
"Collecting pysbd>=0.3.4 (from TTS)\n",
" Using cached pysbd-0.3.4-py3-none-any.whl (71 kB)\n",
"Collecting umap-learn>=0.5.1 (from TTS)\n",
" Using cached umap-learn-0.5.5.tar.gz (90 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: pandas<2.0,>=1.4 in /usr/local/lib/python3.10/dist-packages (from TTS) (1.5.3)\n",
"Requirement already satisfied: matplotlib>=3.7.0 in /usr/local/lib/python3.10/dist-packages (from TTS) (3.7.1)\n",
"Collecting trainer>=0.0.32 (from TTS)\n",
" Using cached trainer-0.0.36-py3-none-any.whl (51 kB)\n",
"Collecting coqpit>=0.0.16 (from TTS)\n",
" Using cached coqpit-0.0.17-py3-none-any.whl (13 kB)\n",
"Requirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from TTS) (0.42.1)\n",
"Collecting pypinyin (from TTS)\n",
" Using cached pypinyin-0.50.0-py2.py3-none-any.whl (1.4 MB)\n",
"Collecting hangul-romanize (from TTS)\n",
" Using cached hangul_romanize-0.1.0-py3-none-any.whl (4.6 kB)\n",
"Collecting gruut[de,es,fr]==2.2.3 (from TTS)\n",
" Using cached gruut-2.2.3.tar.gz (73 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting jamo (from TTS)\n",
" Using cached jamo-0.4.1-py3-none-any.whl (9.5 kB)\n",
"Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from TTS) (3.8.1)\n",
"Collecting g2pkk>=0.1.1 (from TTS)\n",
" Using cached g2pkk-0.1.2-py3-none-any.whl (25 kB)\n",
"Collecting bangla (from TTS)\n",
" Using cached bangla-0.0.2-py2.py3-none-any.whl (6.2 kB)\n",
"Collecting bnnumerizer (from TTS)\n",
" Using cached bnnumerizer-0.0.2.tar.gz (4.7 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting bnunicodenormalizer (from TTS)\n",
" Using cached bnunicodenormalizer-0.1.6.tar.gz (39 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting einops>=0.6.0 (from TTS)\n",
" Using cached einops-0.7.0-py3-none-any.whl (44 kB)\n",
"Requirement already satisfied: transformers>=4.33.0 in /usr/local/lib/python3.10/dist-packages (from TTS) (4.35.2)\n",
"Collecting encodec>=0.1.1 (from TTS)\n",
" Using cached encodec-0.1.1.tar.gz (3.7 MB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting unidecode>=1.3.2 (from TTS)\n",
" Using cached Unidecode-1.3.7-py3-none-any.whl (235 kB)\n",
"Collecting num2words (from TTS)\n",
" Using cached num2words-0.5.13-py3-none-any.whl (143 kB)\n",
"Requirement already satisfied: spacy[ja]>=3 in /usr/local/lib/python3.10/dist-packages (from TTS) (3.6.1)\n",
"Collecting numpy==1.22.0 (from TTS)\n",
" Downloading numpy-1.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.8/16.8 MB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: numba>=0.57.0 in /usr/local/lib/python3.10/dist-packages (from TTS) (0.58.1)\n",
"Requirement already satisfied: Babel<3.0.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from gruut[de,es,fr]==2.2.3->TTS) (2.14.0)\n",
"Collecting dateparser~=1.1.0 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached dateparser-1.1.8-py2.py3-none-any.whl (293 kB)\n",
"Collecting gruut-ipa<1.0,>=0.12.0 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached gruut-ipa-0.13.0.tar.gz (101 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting gruut_lang_en~=2.0.0 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached gruut_lang_en-2.0.0.tar.gz (15.2 MB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting jsonlines~=1.2.0 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached jsonlines-1.2.0-py2.py3-none-any.whl (7.6 kB)\n",
"Collecting networkx<3.0.0,>=2.5.0 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached networkx-2.8.8-py3-none-any.whl (2.0 MB)\n",
"Collecting python-crfsuite~=0.9.7 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached python_crfsuite-0.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
"Collecting gruut_lang_de~=2.0.0 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached gruut_lang_de-2.0.0.tar.gz (18.1 MB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting gruut_lang_es~=2.0.0 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached gruut_lang_es-2.0.0.tar.gz (31.4 MB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting gruut_lang_fr~=2.0.0 (from gruut[de,es,fr]==2.2.3->TTS)\n",
" Using cached gruut_lang_fr-2.0.2.tar.gz (10.9 MB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp>=3.8.1->TTS) (23.1.0)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp>=3.8.1->TTS) (6.0.4)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp>=3.8.1->TTS) (1.9.4)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp>=3.8.1->TTS) (1.4.1)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp>=3.8.1->TTS) (1.3.1)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp>=3.8.1->TTS) (4.0.3)\n",
"Requirement already satisfied: Werkzeug>=2.2.2 in /usr/local/lib/python3.10/dist-packages (from flask>=2.0.1->TTS) (3.0.1)\n",
"Requirement already satisfied: Jinja2>=3.0 in /usr/local/lib/python3.10/dist-packages (from flask>=2.0.1->TTS) (3.1.2)\n",
"Requirement already satisfied: itsdangerous>=2.0 in /usr/local/lib/python3.10/dist-packages (from flask>=2.0.1->TTS) (2.1.2)\n",
"Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.10/dist-packages (from flask>=2.0.1->TTS) (8.1.7)\n",
"Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect>=5.6.0->TTS) (1.10.13)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from inflect>=5.6.0->TTS) (4.5.0)\n",
"Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->TTS) (3.0.1)\n",
"INFO: pip is looking at multiple versions of librosa to determine which version is compatible with other requirements. This could take a while.\n",
"Collecting librosa>=0.10.0 (from TTS)\n",
" Using cached librosa-0.10.0.post2-py3-none-any.whl (253 kB)\n",
" Using cached librosa-0.10.0.post1-py3-none-any.whl (252 kB)\n",
" Using cached librosa-0.10.0-py3-none-any.whl (252 kB)\n",
"Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->TTS) (1.3.2)\n",
"Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->TTS) (4.4.2)\n",
"Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->TTS) (1.8.0)\n",
"Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->TTS) (0.3.7)\n",
"Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->TTS) (0.3)\n",
"Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->TTS) (1.0.7)\n",
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->TTS) (1.2.0)\n",
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->TTS) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->TTS) (4.46.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->TTS) (1.4.5)\n",
"Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->TTS) (9.4.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->TTS) (3.1.1)\n",
"Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->TTS) (2.8.2)\n",
"Collecting docopt>=0.6.2 (from num2words->TTS)\n",
" Downloading docopt-0.6.2.tar.gz (25 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.57.0->TTS) (0.41.1)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.0,>=1.4->TTS) (2023.3.post1)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.3.0->TTS) (3.2.0)\n",
"Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.0->TTS) (1.16.0)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (3.0.12)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (1.0.5)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (1.0.10)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (2.0.8)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (3.0.9)\n",
"Requirement already satisfied: thinc<8.2.0,>=8.1.8 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (8.1.12)\n",
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (1.1.2)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (2.4.8)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (2.0.10)\n",
"Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (0.9.0)\n",
"Requirement already satisfied: pathy>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (0.10.3)\n",
"Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (6.4.0)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (2.31.0)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (67.7.2)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy[ja]>=3->TTS) (3.3.0)\n",
"Collecting sudachipy!=0.6.1,>=0.5.2 (from spacy[ja]>=3->TTS)\n",
" Downloading SudachiPy-0.6.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m71.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting sudachidict-core>=20211220 (from spacy[ja]>=3->TTS)\n",
" Downloading SudachiDict_core-20230927-py3-none-any.whl (71.7 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.7/71.7 MB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=2.1->TTS) (3.13.1)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=2.1->TTS) (1.12)\n",
"Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=2.1->TTS) (2.1.0)\n",
"Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from trainer>=0.0.32->TTS) (5.9.5)\n",
"Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from trainer>=0.0.32->TTS) (2.15.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.33.0->TTS) (0.19.4)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.33.0->TTS) (2023.6.3)\n",
"Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.33.0->TTS) (0.15.0)\n",
"Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.33.0->TTS) (0.4.1)\n",
"Collecting pynndescent>=0.5 (from umap-learn>=0.5.1->TTS)\n",
" Downloading pynndescent-0.5.11-py3-none-any.whl (55 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.8/55.8 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.0->TTS) (2.21)\n",
"Requirement already satisfied: tzlocal in /usr/local/lib/python3.10/dist-packages (from dateparser~=1.1.0->gruut[de,es,fr]==2.2.3->TTS) (5.2)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from Jinja2>=3.0->flask>=2.0.1->TTS) (2.1.3)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from jsonlines~=1.2.0->gruut[de,es,fr]==2.2.3->TTS) (1.16.0)\n",
"Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.10.0->TTS) (4.1.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy[ja]>=3->TTS) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy[ja]>=3->TTS) (3.6)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy[ja]>=3->TTS) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy[ja]>=3->TTS) (2023.11.17)\n",
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.2.0,>=8.1.8->spacy[ja]>=3->TTS) (0.7.11)\n",
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.2.0,>=8.1.8->spacy[ja]>=3->TTS) (0.1.4)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=2.1->TTS) (1.3.0)\n",
"Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->trainer>=0.0.32->TTS) (1.4.0)\n",
"Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->trainer>=0.0.32->TTS) (1.60.0)\n",
"Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->trainer>=0.0.32->TTS) (2.17.3)\n",
"Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->trainer>=0.0.32->TTS) (1.2.0)\n",
"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->trainer>=0.0.32->TTS) (3.5.1)\n",
"Requirement already satisfied: protobuf<4.24,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->trainer>=0.0.32->TTS) (3.20.3)\n",
"Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->trainer>=0.0.32->TTS) (0.7.2)\n",
"Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->trainer>=0.0.32->TTS) (5.3.2)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->trainer>=0.0.32->TTS) (0.3.0)\n",
"Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->trainer>=0.0.32->TTS) (4.9)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard->trainer>=0.0.32->TTS) (1.3.1)\n",
"Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->trainer>=0.0.32->TTS) (0.5.1)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard->trainer>=0.0.32->TTS) (3.2.2)\n",
"Building wheels for collected packages: encodec, umap-learn, bnnumerizer, bnunicodenormalizer, docopt, gruut-ipa, gruut_lang_de, gruut_lang_en, gruut_lang_es, gruut_lang_fr, gruut\n",
" Building wheel for encodec (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for encodec: filename=encodec-0.1.1-py3-none-any.whl size=45759 sha256=3c74952dadaacc172b5e203a196ac28c847ea69769de94bbe22adca31c52c48d\n",
" Stored in directory: /root/.cache/pip/wheels/fc/36/cb/81af8b985a5f5e0815312d5e52b41263237af07b977e6bcbf3\n",
" Building wheel for umap-learn (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for umap-learn: filename=umap_learn-0.5.5-py3-none-any.whl size=86832 sha256=414eb608a67681ea0626cb130a7c1c98fd08dd10ad40b181b04bc44e2a3bf221\n",
" Stored in directory: /root/.cache/pip/wheels/3a/70/07/428d2b58660a1a3b431db59b806a10da736612ebbc66c1bcc5\n",
" Building wheel for bnnumerizer (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for bnnumerizer: filename=bnnumerizer-0.0.2-py3-none-any.whl size=5259 sha256=21df3912b72444aa7d22c287fc6e3865866b5108896d4892de7081ee38b97f00\n",
" Stored in directory: /root/.cache/pip/wheels/59/6b/e8/223172e7d5c9f72df3ea1a0d9258f3a8ab5b28e827728edef5\n",
" Building wheel for bnunicodenormalizer (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for bnunicodenormalizer: filename=bnunicodenormalizer-0.1.6-py3-none-any.whl size=22779 sha256=dd9dbfb9da2b944d8e85b1e512a514608ec21695d31367b59a9308d7b0ebc242\n",
" Stored in directory: /root/.cache/pip/wheels/f4/d7/e9/16732a619cbf5a63fdc9f6e2f9eb5fcf73fa023735237330e9\n",
" Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=4195c523fb9d97941983c0984fe30f915df1954163925f6dadbb71ae74c04260\n",
" Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n",
" Building wheel for gruut-ipa (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for gruut-ipa: filename=gruut_ipa-0.13.0-py3-none-any.whl size=104873 sha256=b2ba500eae7b7b3315ae88264752e6e986997e71c277ddd6029e131eeb5525da\n",
" Stored in directory: /root/.cache/pip/wheels/7b/18/49/e4f500ecdf0babe757953f844e4d7cd1ea81c5503c09bfe984\n",
" Building wheel for gruut_lang_de (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for gruut_lang_de: filename=gruut_lang_de-2.0.0-py3-none-any.whl size=18498182 sha256=f8af7a165782344362fd0fd421721c9c5358bd9050891b3fa22ae6a7b1db4861\n",
" Stored in directory: /root/.cache/pip/wheels/95/9a/05/cfce98f0c41a1a540f15708c4a02df190b82d84cf91ef6bc7f\n",
" Building wheel for gruut_lang_en (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for gruut_lang_en: filename=gruut_lang_en-2.0.0-py3-none-any.whl size=15297178 sha256=91aa403c9ce95e6ce08e53b144883f1361d415713f9bb26dcdba7b435f4174db\n",
" Stored in directory: /root/.cache/pip/wheels/10/9c/fb/77c655a9fbd78cdb9935d0ab65d80ddd0a3bcf7dbe18261650\n",
" Building wheel for gruut_lang_es (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for gruut_lang_es: filename=gruut_lang_es-2.0.0-py3-none-any.whl size=32173796 sha256=bf4af1137c067d1d280d6b874fa2d01c1fb87a6ea221c2f6ff12da49c5f202dd\n",
" Stored in directory: /root/.cache/pip/wheels/9b/0a/90/788d92c07744b329b9283e37b29b064f5db6b1bb0442a1a19b\n",
" Building wheel for gruut_lang_fr (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for gruut_lang_fr: filename=gruut_lang_fr-2.0.2-py3-none-any.whl size=10968766 sha256=2c60a0308168e2465af6e88528e7a443441ca33ae4e5ab1a28d2fb7497fa6207\n",
" Stored in directory: /root/.cache/pip/wheels/db/21/be/d0436e3f1cf9bf38b9bb9b4a476399c77a1ab19f7172b45e19\n",
" Building wheel for gruut (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for gruut: filename=gruut-2.2.3-py3-none-any.whl size=75792 sha256=62708a2da48c94ce80871be3d905740bc65611ca58f69cc8ae6d21fe74b58d85\n",
" Stored in directory: /root/.cache/pip/wheels/fc/57/a8/f9de532daf5214f53644f20f3a9e6f69269453c87df9c0a817\n",
"Successfully built encodec umap-learn bnnumerizer bnunicodenormalizer docopt gruut-ipa gruut_lang_de gruut_lang_en gruut_lang_es gruut_lang_fr gruut\n",
"Installing collected packages: sudachipy, python-crfsuite, jamo, hangul-romanize, gruut_lang_fr, gruut_lang_es, gruut_lang_en, gruut_lang_de, docopt, bnunicodenormalizer, bnnumerizer, bangla, unidecode, sudachidict-core, pysbd, pypinyin, numpy, num2words, networkx, jsonlines, gruut-ipa, einops, coqpit, anyascii, g2pkk, dateparser, scikit-learn, gruut, pynndescent, librosa, encodec, umap-learn, trainer, TTS\n",
" Attempting uninstall: numpy\n",
" Found existing installation: numpy 1.23.5\n",
" Uninstalling numpy-1.23.5:\n",
" Successfully uninstalled numpy-1.23.5\n",
" Attempting uninstall: networkx\n",
" Found existing installation: networkx 3.2.1\n",
" Uninstalling networkx-3.2.1:\n",
" Successfully uninstalled networkx-3.2.1\n",
" Attempting uninstall: scikit-learn\n",
" Found existing installation: scikit-learn 1.2.2\n",
" Uninstalling scikit-learn-1.2.2:\n",
" Successfully uninstalled scikit-learn-1.2.2\n",
" Attempting uninstall: librosa\n",
" Found existing installation: librosa 0.10.1\n",
" Uninstalling librosa-0.10.1:\n",
" Successfully uninstalled librosa-0.10.1\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"lida 0.0.10 requires fastapi, which is not installed.\n",
"lida 0.0.10 requires kaleido, which is not installed.\n",
"lida 0.0.10 requires python-multipart, which is not installed.\n",
"lida 0.0.10 requires uvicorn, which is not installed.\n",
"plotnine 0.12.4 requires numpy>=1.23.0, but you have numpy 1.22.0 which is incompatible.\n",
"pywavelets 1.5.0 requires numpy<2.0,>=1.22.4, but you have numpy 1.22.0 which is incompatible.\n",
"tensorflow 2.15.0 requires numpy<2.0.0,>=1.23.5, but you have numpy 1.22.0 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed TTS-0.22.0 anyascii-0.3.2 bangla-0.0.2 bnnumerizer-0.0.2 bnunicodenormalizer-0.1.6 coqpit-0.0.17 dateparser-1.1.8 docopt-0.6.2 einops-0.7.0 encodec-0.1.1 g2pkk-0.1.2 gruut-2.2.3 gruut-ipa-0.13.0 gruut_lang_de-2.0.0 gruut_lang_en-2.0.0 gruut_lang_es-2.0.0 gruut_lang_fr-2.0.2 hangul-romanize-0.1.0 jamo-0.4.1 jsonlines-1.2.0 librosa-0.10.0 networkx-2.8.8 num2words-0.5.13 numpy-1.22.0 pynndescent-0.5.11 pypinyin-0.50.0 pysbd-0.3.4 python-crfsuite-0.9.10 scikit-learn-1.3.2 sudachidict-core-20230927 sudachipy-0.6.8 trainer-0.0.36 umap-learn-0.5.5 unidecode-1.3.7\n"
]
},
{
"output_type": "display_data",
"data": {
"application/vnd.colab-display-data+json": {
"pip_warning": {
"packages": [
"numpy"
]
}
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting numpy==1.24.0\n",
" Using cached numpy-1.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
"Installing collected packages: numpy\n",
" Attempting uninstall: numpy\n",
" Found existing installation: numpy 1.22.0\n",
" Uninstalling numpy-1.22.0:\n",
" Successfully uninstalled numpy-1.22.0\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"lida 0.0.10 requires fastapi, which is not installed.\n",
"lida 0.0.10 requires kaleido, which is not installed.\n",
"lida 0.0.10 requires python-multipart, which is not installed.\n",
"lida 0.0.10 requires uvicorn, which is not installed.\n",
"tts 0.22.0 requires numpy==1.22.0; python_version <= \"3.10\", but you have numpy 1.24.0 which is incompatible.\n",
"seaborn 0.12.2 requires numpy!=1.24.0,>=1.17, but you have numpy 1.24.0 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed numpy-1.24.0\n"
]
},
{
"output_type": "display_data",
"data": {
"application/vnd.colab-display-data+json": {
"pip_warning": {
"packages": [
"numpy"
]
}
}
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": [
"#Upload Video - Only supports 60 seconds max\n",
"\n"
],
"metadata": {
"id": "natonJEOr369"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "W2-ItJEbm73O",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 138,
"referenced_widgets": [
"3468a1aeea494572a0f1e12d73091dbc",
"b7dd8443fb7b49ecad505cc59ea81cd7",
"9bc52945fbbc4a5abb8a7ffc2833f3e5",
"3268d0213a3f47ad90fe7188dcf122d0",
"77005cb45c154a1fa1cb7c3685bef50f",
"4d1d24afab4b49cd93a6fcfbc5e38887",
"c6ce3c2d1bf34fce90117bc738b84320",
"37a21aa630f14cb0be6917bd853a4828"
]
},
"outputId": "3a0e363c-7a93-45da-c614-bfad2034c732"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Checkbox(value=False, description='Resize to 720p (better results)')"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "3468a1aeea494572a0f1e12d73091dbc"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Button(description='Upload Video', style=ButtonStyle())"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "3268d0213a3f47ad90fe7188dcf122d0"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Output()"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "c6ce3c2d1bf34fce90117bc738b84320"
}
},
"metadata": {}
}
],
"source": [
"from google.colab import files\n",
"import os\n",
"import subprocess\n",
"\n",
"uploaded = None\n",
"resize_to_720p = False\n",
"\n",
"def upload_video():\n",
" global uploaded\n",
" global video_path\n",
" uploaded = files.upload()\n",
" for filename in uploaded.keys():\n",
" print(f'Uploaded {filename}')\n",
" if resize_to_720p:\n",
" filename = resize_video(filename)\n",
" video_path = filename\n",
" return filename\n",
"\n",
"\n",
"def resize_video(filename):\n",
" output_filename = f\"resized_{filename}\"\n",
" cmd = f\"ffmpeg -i {filename} -vf scale=-1:720 {output_filename}\"\n",
" subprocess.run(cmd, shell=True)\n",
" print(f'Resized video saved as {output_filename}')\n",
" return output_filename\n",
"\n",
"import ipywidgets as widgets\n",
"from IPython.display import display\n",
"\n",
"button = widgets.Button(description=\"Upload Video\")\n",
"checkbox = widgets.Checkbox(value=False, description='Resize to 720p (better results)')\n",
"output = widgets.Output()\n",
"\n",
"def on_button_clicked(b):\n",
" with output:\n",
" global video_path\n",
" global resize_to_720p\n",
" resize_to_720p = checkbox.value\n",
" video_path = upload_video()\n",
"\n",
"button.on_click(on_button_clicked)\n",
"display(checkbox, button, output)"
]
},
{
"cell_type": "markdown",
"source": [
"#Extract Audio Text From Video - Whisper\n"
],
"metadata": {
"id": "sFcXWp3nsnAB"
}
},
{
"cell_type": "markdown",
"source": [
"https://github.com/openai/whisper"
],
"metadata": {
"id": "B6EJG8sGuggc"
}
},
{
"cell_type": "code",
"source": [
"!pip install git+https://github.com/openai/whisper.git"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pI0C4MjMs4AR",
"outputId": "177d539d-410c-4fce-867a-009bbd221efb"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting git+https://github.com/openai/whisper.git\n",
" Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-6c4e00ml\n",
" Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-6c4e00ml\n",
" Resolved https://github.com/openai/whisper.git to commit ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from openai-whisper==20231117) (0.58.1)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from openai-whisper==20231117) (1.22.0)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from openai-whisper==20231117) (2.1.0+cu121)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai-whisper==20231117) (4.66.1)\n",
"Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from openai-whisper==20231117) (10.1.0)\n",
"Collecting tiktoken (from openai-whisper==20231117)\n",
" Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m25.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: triton<3,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from openai-whisper==20231117) (2.1.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from triton<3,>=2.0.0->openai-whisper==20231117) (3.13.1)\n",
"Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper==20231117) (0.41.1)\n",
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->openai-whisper==20231117) (2023.6.3)\n",
"Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken->openai-whisper==20231117) (2.31.0)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper==20231117) (4.5.0)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper==20231117) (1.12)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper==20231117) (2.8.8)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper==20231117) (3.1.2)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper==20231117) (2023.6.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper==20231117) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper==20231117) (3.6)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper==20231117) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken->openai-whisper==20231117) (2023.11.17)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->openai-whisper==20231117) (2.1.3)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->openai-whisper==20231117) (1.3.0)\n",
"Building wheels for collected packages: openai-whisper\n",
" Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for openai-whisper: filename=openai_whisper-20231117-py3-none-any.whl size=802825 sha256=0d634010736ae5cdae6d4a50fa67b0aad54f1a9d731133bcc16e491d7a356ffd\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-to581km2/wheels/8b/6c/d0/622666868c179f156cf595c8b6f06f88bc5d80c4b31dccaa03\n",
"Successfully built openai-whisper\n",
"Installing collected packages: tiktoken, openai-whisper\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"llmx 0.0.15a0 requires cohere, which is not installed.\n",
"llmx 0.0.15a0 requires openai, which is not installed.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed openai-whisper-20231117 tiktoken-0.5.2\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import subprocess\n",
"\n",
"# Ensure video_path variable exists\n",
"if 'video_path' in globals() and video_path is not None:\n",
" ffmpeg_command = f\"ffmpeg -i '{video_path}' -acodec pcm_s24le -ar 48000 -q:a 0 -map a -y 'output_audio.wav'\"\n",
" subprocess.run(ffmpeg_command, shell=True)\n",
"else:\n",
" print(\"No video uploaded. Please upload a video first.\")\n",
"\n",
"import whisper\n",
"\n",
"model = whisper.load_model(\"base\")\n",
"result = model.transcribe(\"output_audio.wav\")\n",
"\n",
"whisper_text = result[\"text\"]\n",
"whisper_language = result['language']\n",
"\n",
"print(\"Audio text:\", whisper_text)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dr3CCGHDsmY6",
"outputId": "00647db6-2a6e-42ab-ab04-e51b1413df59"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"No video uploaded. Please upload a video first.\n",
"Audio text: I hope they stop. You hope... Don't advertise. You don't want them to advertise? No. What do you mean? If somebody's gonna try to blackmail me with advertising, blackmail me with money, go f*** yourself. But... Go... F*** yourself. Is that clear? I hope it is. Hey, Bob. You're in the audience. Well, let me ask you then... That's all I feel. Don't advertise.\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"#Translation"
],
"metadata": {
"id": "AE25W4XDtcfp"
}
},
{
"cell_type": "code",
"source": [
"!pip install googletrans==4.0.0-rc1"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WZLu_BnZuGSg",
"outputId": "fb9a7eba-1658-4d8a-ae90-ef4fff7ede4b"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting googletrans==4.0.0-rc1\n",
" Downloading googletrans-4.0.0rc1.tar.gz (20 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)\n",
" Downloading httpx-0.13.3-py3-none-any.whl (55 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.1/55.1 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx==0.13.3->googletrans==4.0.0-rc1) (2023.11.17)\n",
"Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading hstspreload-2023.1.1-py3-none-any.whl (1.5 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m30.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx==0.13.3->googletrans==4.0.0-rc1) (1.3.0)\n",
"Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m133.4/133.4 kB\u001b[0m \u001b[31m19.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading idna-2.10-py2.py3-none-any.whl (58 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.8/58.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)\n",
"Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading httpcore-0.9.1-py3-none-any.whl (42 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.6/42.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting h11<0.10,>=0.8 (from httpcore==0.9.*->httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading h11-0.9.0-py2.py3-none-any.whl (53 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting h2==3.* (from httpcore==0.9.*->httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading h2-3.2.0-py2.py3-none-any.whl (65 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.0/65.0 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting hyperframe<6,>=5.2.0 (from h2==3.*->httpcore==0.9.*->httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading hyperframe-5.2.0-py2.py3-none-any.whl (12 kB)\n",
"Collecting hpack<4,>=3.0 (from h2==3.*->httpcore==0.9.*->httpx==0.13.3->googletrans==4.0.0-rc1)\n",
" Downloading hpack-3.0.0-py2.py3-none-any.whl (38 kB)\n",
"Building wheels for collected packages: googletrans\n",
" Building wheel for googletrans (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for googletrans: filename=googletrans-4.0.0rc1-py3-none-any.whl size=17396 sha256=b04c55e66034408ca3f9b7ac887b058b0213972050c9bc36c543d453621e044f\n",
" Stored in directory: /root/.cache/pip/wheels/c0/59/9f/7372f0cf70160fe61b528532e1a7c8498c4becd6bcffb022de\n",
"Successfully built googletrans\n",
"Installing collected packages: rfc3986, hyperframe, hpack, h11, chardet, idna, hstspreload, h2, httpcore, httpx, googletrans\n",
" Attempting uninstall: chardet\n",
" Found existing installation: chardet 5.2.0\n",
" Uninstalling chardet-5.2.0:\n",
" Successfully uninstalled chardet-5.2.0\n",
" Attempting uninstall: idna\n",
" Found existing installation: idna 3.6\n",
" Uninstalling idna-3.6:\n",
" Successfully uninstalled idna-3.6\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"tensorflow 2.15.0 requires numpy<2.0.0,>=1.23.5, but you have numpy 1.22.0 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed chardet-3.0.4 googletrans-4.0.0rc1 h11-0.9.0 h2-3.2.0 hpack-3.0.0 hstspreload-2023.1.1 httpcore-0.9.1 httpx-0.13.3 hyperframe-5.2.0 idna-2.10 rfc3986-1.5.0\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"target_language = \"French\" #@param [\"English\", \"Spanish\", \"French\", \"German\", \"Italian\", \"Portuguese\", \"Polish\", \"Turkish\", \"Russian\", \"Dutch\", \"Czech\", \"Arabic\", \"Chinese (Simplified)\"]\n",
"\n",
"# Mapping between full names and ISO 639-1 codes\n",
"language_mapping = {\n",
" 'English': 'en',\n",
" 'Spanish': 'es',\n",
" 'French': 'fr',\n",
" 'German': 'de',\n",
" 'Italian': 'it',\n",
" 'Portuguese': 'pt',\n",
" 'Polish': 'pl',\n",
" 'Turkish': 'tr',\n",
" 'Russian': 'ru',\n",
" 'Dutch': 'nl',\n",
" 'Czech': 'cs',\n",
" 'Arabic': 'ar',\n",
" 'Chinese (Simplified)': 'zh-cn'\n",
"}\n",
"\n",
"target_language_code = language_mapping[target_language]\n",
"from googletrans import Translator\n",
"translator = Translator()\n",
"translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text\n",
"print(\"Translated text:\", translated_text)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uvdpJLA9tnrf",
"outputId": "76cdfb4a-db77-40e0-c8ad-04b936ffe9cb"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Translated text: J'espère qu'ils s'arrêtent.Vous espérez ... ne faites pas de publicité.Vous ne voulez pas qu'ils font de la publicité?Non. Que voulez-vous dire?Si quelqu'un va essayer de me faire chanter avec de la publicité, me faire chanter avec de l'argent, allez-y.Mais ... Allez ... F *** vous-même.Est-ce clair?Je l'espère.Hé, Bob.Vous êtes dans le public.Eh bien, laissez-moi vous demander alors ... c'est tout ce que je ressens.Ne faites pas de publicité.\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"\n",
"\n",
"#Voice Synthesis"
],
"metadata": {
"id": "2Z78OCLjuV2n"
}
},
{
"cell_type": "markdown",
"source": [
"https://github.com/coqui-ai/TTS"
],
"metadata": {
"id": "7myhxlNxue2a"
}
},
{
"cell_type": "code",
"source": [
"from TTS.api import TTS\n",
"import torch\n",
"from IPython.display import Audio, display\n",
"\n",
"tts = TTS(\"tts_models/multilingual/multi-dataset/xtts_v2\", gpu=True).to(\"cuda\")\n",
"\n",
"#generate audio\n",
"tts.tts_to_file(translated_text,\n",
" speaker_wav='output_audio.wav',\n",
" file_path=\"output_synth.wav\",\n",
" language=target_language_code\n",
")\n",
"audio_widget = Audio(filename=\"output_synth.wav\", autoplay=False)\n",
"display(audio_widget)"
],
"metadata": {
"id": "COmPIk3Ku0TH"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"#**Important**\n",
"If you are on Google Colab free version (T4), Delete TTS and Whisper at this step"
],
"metadata": {
"id": "6iEqEoaBvBjy"
}
},
{
"cell_type": "code",
"source": [
"import torch\n",
"\n",
"try:\n",
" del tts\n",
"except NameError:\n",
" print(\"Voice model already deleted\")\n",
"\n",
"try:\n",
" del model\n",
"except NameError:\n",
" print(\"Whisper model already deleted\")\n",
"\n",
"torch.cuda.empty_cache()"
],
"metadata": {
"id": "5wRr7LZNvWqf"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"#Lip Sync - High Quality\n",
"Takes arround 15 mins approx for installation on free colab version (T4)"
],
"metadata": {
"id": "0ByUfwRZvZ3V"
}
},
{
"cell_type": "markdown",
"source": [
"https://github.com/OpenTalker/video-retalking"
],
"metadata": {
"id": "2Kxu-WiLwdU5"
}
},
{
"cell_type": "code",
"source": [
"# Dependencies\n",
"%cd /content/\n",
"\n",
"import locale\n",
"locale.getpreferredencoding = lambda: \"UTF-8\"\n",
"\n",
"!git clone https://github.com/vinthony/video-retalking.git &> /dev/null\n",
"\n",
"!sudo apt-get install -y libblas-dev liblapack-dev libx11-dev libopenblas-dev\n",
"\n",
"!git clone https://github.com/davisking/dlib.git\n",
"\n",
"!pip install basicsr==1.4.2 face-alignment==1.3.4 kornia==0.5.1 ninja==1.10.2.3 einops==0.4.1 facexlib==0.2.5 librosa==0.9.2 build\n",
"\n",
"!cd dlib && python setup.py install\n",
"\n",
"%cd /content/video-retalking\n",
"\n",
"!mkdir ./checkpoints\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/30_net_gen.pth -O ./checkpoints/30_net_gen.pth\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/BFM.zip -O ./checkpoints/BFM.zip\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/DNet.pt -O ./checkpoints/DNet.pt\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/ENet.pth -O ./checkpoints/ENet.pth\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/expression.mat -O ./checkpoints/expression.mat\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/face3d_pretrain_epoch_20.pth -O ./checkpoints/face3d_pretrain_epoch_20.pth\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/GFPGANv1.3.pth -O ./checkpoints/GFPGANv1.3.pth\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/GPEN-BFR-512.pth -O ./checkpoints/GPEN-BFR-512.pth\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/LNet.pth -O ./checkpoints/LNet.pth\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/ParseNet-latest.pth -O ./checkpoints/ParseNet-latest.pth\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/RetinaFace-R50.pth -O ./checkpoints/RetinaFace-R50.pth\n",
"!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/shape_predictor_68_face_landmarks.dat -O ./checkpoints/shape_predictor_68_face_landmarks.dat\n",
"!unzip -d ./checkpoints/BFM ./checkpoints/BFM.zip"
],
"metadata": {
"id": "f_a6Q_AwvpFQ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#Generate video\n",
"\n",
"%cd /content/video-retalking\n",
"\n",
"video_path_fix = f\"'../{video_path}'\"\n",
"\n",
"!python inference.py \\\n",
" --face $video_path_fix \\\n",
" --audio \"/content/output_synth.wav\" \\\n",
" --outfile '/content/output_high_qual.mp4'"
],
"metadata": {
"id": "2ecglT2Lwi61"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"#Lip Sync - Normal Quality\n",
"Takes arround 5 mins approx for installation on free colab version (T4)."
],
"metadata": {
"id": "ZXhEOsSZwm49"
}
},
{
"cell_type": "markdown",
"source": [
"https://github.com/justinjohn0306/Wav2Lip"
],
"metadata": {
"id": "phtcGJK-xLuj"
}
},
{
"cell_type": "code",
"source": [
"# Dependencies\n",
"%cd /content/\n",
"\n",
"import locale\n",
"locale.getpreferredencoding = lambda: \"UTF-8\"\n",
"\n",
"!git clone https://github.com/justinjohn0306/Wav2Lip\n",
"!cd Wav2Lip && pip install -r requirements_colab.txt\n",
"\n",
"%cd /content/Wav2Lip\n",
"\n",
"!wget \"https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth\" -O \"face_detection/detection/sfd/s3fd.pth\"\n",
"!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth' -O 'checkpoints/wav2lip.pth'\n",
"!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth' -O 'checkpoints/wav2lip_gan.pth'\n",
"!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth' -O 'checkpoints/resnet50.pth'\n",
"!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth' -O 'checkpoints/mobilenet.pth'\n",
"\n",
"!pip install batch-face"
],
"metadata": {
"id": "A_Tn2hKbxJJO",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "09d260bd-7d4b-49c6-b5c2-d176298fc228"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content\n",
"Cloning into 'Wav2Lip'...\n",
"remote: Enumerating objects: 502, done.\u001b[K\n",
"remote: Counting objects: 100% (81/81), done.\u001b[K\n",
"remote: Compressing objects: 100% (61/61), done.\u001b[K\n",
"remote: Total 502 (delta 24), reused 69 (delta 18), pack-reused 421\u001b[K\n",
"Receiving objects: 100% (502/502), 29.76 MiB | 10.76 MiB/s, done.\n",
"Resolving deltas: 100% (257/257), done.\n",
"Collecting numpy==1.23.4 (from -r requirements_colab.txt (line 1))\n",
" Downloading numpy-1.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m38.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from -r requirements_colab.txt (line 2)) (0.10.0)\n",
"Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from -r requirements_colab.txt (line 3)) (4.8.0.76)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from -r requirements_colab.txt (line 4)) (2.1.0+cu121)\n",
"Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from -r requirements_colab.txt (line 5)) (0.16.0+cu121)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from -r requirements_colab.txt (line 6)) (4.66.1)\n",
"Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from -r requirements_colab.txt (line 7)) (0.58.1)\n",
"Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (3.0.1)\n",
"Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (1.11.4)\n",
"Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (1.3.2)\n",
"Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (1.3.2)\n",
"Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (4.4.2)\n",
"Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (0.12.1)\n",
"Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (1.8.0)\n",
"Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (0.3.7)\n",
"Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (4.5.0)\n",
"Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (0.3)\n",
"Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->-r requirements_colab.txt (line 2)) (1.0.7)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements_colab.txt (line 4)) (3.13.1)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements_colab.txt (line 4)) (1.12)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements_colab.txt (line 4)) (2.8.8)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements_colab.txt (line 4)) (3.1.2)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements_colab.txt (line 4)) (2023.6.0)\n",
"Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements_colab.txt (line 4)) (2.1.0)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision->-r requirements_colab.txt (line 5)) (2.31.0)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->-r requirements_colab.txt (line 5)) (9.4.0)\n",
"Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->-r requirements_colab.txt (line 7)) (0.41.1)\n",
"Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa->-r requirements_colab.txt (line 2)) (4.1.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa->-r requirements_colab.txt (line 2)) (23.2)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->-r requirements_colab.txt (line 5)) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->-r requirements_colab.txt (line 5)) (2.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->-r requirements_colab.txt (line 5)) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->-r requirements_colab.txt (line 5)) (2023.11.17)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->-r requirements_colab.txt (line 2)) (3.2.0)\n",
"Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.1->librosa->-r requirements_colab.txt (line 2)) (1.16.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->-r requirements_colab.txt (line 4)) (2.1.3)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->-r requirements_colab.txt (line 4)) (1.3.0)\n",
"Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa->-r requirements_colab.txt (line 2)) (2.21)\n",
"Installing collected packages: numpy\n",
" Attempting uninstall: numpy\n",
" Found existing installation: numpy 1.24.0\n",
" Uninstalling numpy-1.24.0:\n",
" Successfully uninstalled numpy-1.24.0\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"lida 0.0.10 requires fastapi, which is not installed.\n",
"lida 0.0.10 requires kaleido, which is not installed.\n",
"lida 0.0.10 requires python-multipart, which is not installed.\n",
"lida 0.0.10 requires uvicorn, which is not installed.\n",
"tts 0.22.0 requires numpy==1.22.0; python_version <= \"3.10\", but you have numpy 1.23.4 which is incompatible.\n",
"tensorflow 2.15.0 requires numpy<2.0.0,>=1.23.5, but you have numpy 1.23.4 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed numpy-1.23.4\n",
"/content/Wav2Lip\n",
"--2023-12-27 05:05:57-- https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth\n",
"Resolving www.adrianbulat.com (www.adrianbulat.com)... 45.136.29.207\n",
"Connecting to www.adrianbulat.com (www.adrianbulat.com)|45.136.29.207|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 89843225 (86M) [application/octet-stream]\n",
"Saving to: ‘face_detection/detection/sfd/s3fd.pth’\n",
"\n",
"face_detection/dete 100%[===================>] 85.68M 17.3MB/s in 5.9s \n",
"\n",
"2023-12-27 05:06:04 (14.5 MB/s) - ‘face_detection/detection/sfd/s3fd.pth’ saved [89843225/89843225]\n",
"\n",
"--2023-12-27 05:06:04-- https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth\n",
"Resolving github.com (github.com)... 20.205.243.166\n",
"Connecting to github.com (github.com)|20.205.243.166|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/615543729/e18ec62e-10ae-4c65-9862-1c7a0fafe228?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231227%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231227T050604Z&X-Amz-Expires=300&X-Amz-Signature=487d3932b16b7d0a0e63e4f0054392154cd1b35075e4d71cd9bec6f96f3a1baf&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=615543729&response-content-disposition=attachment%3B%20filename%3Dwav2lip.pth&response-content-type=application%2Foctet-stream [following]\n",
"--2023-12-27 05:06:04-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/615543729/e18ec62e-10ae-4c65-9862-1c7a0fafe228?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231227%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231227T050604Z&X-Amz-Expires=300&X-Amz-Signature=487d3932b16b7d0a0e63e4f0054392154cd1b35075e4d71cd9bec6f96f3a1baf&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=615543729&response-content-disposition=attachment%3B%20filename%3Dwav2lip.pth&response-content-type=application%2Foctet-stream\n",
"Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
"Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 435807851 (416M) [application/octet-stream]\n",
"Saving to: ‘checkpoints/wav2lip.pth’\n",
"\n",
"checkpoints/wav2lip 100%[===================>] 415.62M 317MB/s in 1.3s \n",
"\n",
"2023-12-27 05:06:07 (317 MB/s) - ‘checkpoints/wav2lip.pth’ saved [435807851/435807851]\n",
"\n",
"--2023-12-27 05:06:07-- https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth\n",
"Resolving github.com (github.com)... 20.205.243.166\n",
"Connecting to github.com (github.com)|20.205.243.166|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/615543729/76281b9f-48b8-4cbf-9a05-edf61d847109?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231227%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231227T050607Z&X-Amz-Expires=300&X-Amz-Signature=736c42e711aaeaebeac61eb12b6b71096250177e64200258a6b4483552cda33f&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=615543729&response-content-disposition=attachment%3B%20filename%3Dwav2lip_gan.pth&response-content-type=application%2Foctet-stream [following]\n",
"--2023-12-27 05:06:07-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/615543729/76281b9f-48b8-4cbf-9a05-edf61d847109?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231227%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231227T050607Z&X-Amz-Expires=300&X-Amz-Signature=736c42e711aaeaebeac61eb12b6b71096250177e64200258a6b4483552cda33f&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=615543729&response-content-disposition=attachment%3B%20filename%3Dwav2lip_gan.pth&response-content-type=application%2Foctet-stream\n",
"Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\n",
"Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.110.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 435801865 (416M) [application/octet-stream]\n",
"Saving to: ‘checkpoints/wav2lip_gan.pth’\n",
"\n",
"checkpoints/wav2lip 100%[===================>] 415.61M 144MB/s in 2.9s \n",
"\n",
"2023-12-27 05:06:10 (144 MB/s) - ‘checkpoints/wav2lip_gan.pth’ saved [435801865/435801865]\n",
"\n",
"--2023-12-27 05:06:10-- https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth\n",
"Resolving github.com (github.com)... 20.205.243.166\n",
"Connecting to github.com (github.com)|20.205.243.166|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/615543729/e6d9110e-3336-450e-b785-bedbfc3b1708?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231227%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231227T050610Z&X-Amz-Expires=300&X-Amz-Signature=2c13a0785b7ba1c4681e43830a9f847f29a837674bb240000a8cd9753866b835&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=615543729&response-content-disposition=attachment%3B%20filename%3Dresnet50.pth&response-content-type=application%2Foctet-stream [following]\n",
"--2023-12-27 05:06:11-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/615543729/e6d9110e-3336-450e-b785-bedbfc3b1708?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231227%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231227T050610Z&X-Amz-Expires=300&X-Amz-Signature=2c13a0785b7ba1c4681e43830a9f847f29a837674bb240000a8cd9753866b835&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=615543729&response-content-disposition=attachment%3B%20filename%3Dresnet50.pth&response-content-type=application%2Foctet-stream\n",
"Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
"Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 109497761 (104M) [application/octet-stream]\n",
"Saving to: ‘checkpoints/resnet50.pth’\n",
"\n",
"checkpoints/resnet5 100%[===================>] 104.42M 300MB/s in 0.3s \n",
"\n",
"2023-12-27 05:06:12 (300 MB/s) - ‘checkpoints/resnet50.pth’ saved [109497761/109497761]\n",
"\n",
"--2023-12-27 05:06:12-- https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth\n",
"Resolving github.com (github.com)... 20.205.243.166\n",
"Connecting to github.com (github.com)|20.205.243.166|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/615543729/0f1702ef-4998-4acd-abc8-b80c52e838b9?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231227%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231227T050612Z&X-Amz-Expires=300&X-Amz-Signature=4b4f8dc8ff2e68679ece4e7ed3d30b12c072647257f6e1f87184026ef4937246&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=615543729&response-content-disposition=attachment%3B%20filename%3Dmobilenet.pth&response-content-type=application%2Foctet-stream [following]\n",
"--2023-12-27 05:06:12-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/615543729/0f1702ef-4998-4acd-abc8-b80c52e838b9?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231227%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231227T050612Z&X-Amz-Expires=300&X-Amz-Signature=4b4f8dc8ff2e68679ece4e7ed3d30b12c072647257f6e1f87184026ef4937246&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=615543729&response-content-disposition=attachment%3B%20filename%3Dmobilenet.pth&response-content-type=application%2Foctet-stream\n",
"Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
"Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 1789735 (1.7M) [application/octet-stream]\n",
"Saving to: ‘checkpoints/mobilenet.pth’\n",
"\n",
"checkpoints/mobilen 100%[===================>] 1.71M --.-KB/s in 0.01s \n",
"\n",
"2023-12-27 05:06:13 (140 MB/s) - ‘checkpoints/mobilenet.pth’ saved [1789735/1789735]\n",
"\n",
"Collecting batch-face\n",
" Downloading batch_face-1.4.0-py3-none-any.whl (30.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m30.6/30.6 MB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from batch-face) (1.23.4)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from batch-face) (2.1.0+cu121)\n",
"Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from batch-face) (0.16.0+cu121)\n",
"Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from batch-face) (4.8.0.76)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->batch-face) (3.13.1)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->batch-face) (4.5.0)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->batch-face) (1.12)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->batch-face) (2.8.8)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->batch-face) (3.1.2)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->batch-face) (2023.6.0)\n",
"Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->batch-face) (2.1.0)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision->batch-face) (2.31.0)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->batch-face) (9.4.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->batch-face) (2.1.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->batch-face) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->batch-face) (2.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->batch-face) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->batch-face) (2023.11.17)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->batch-face) (1.3.0)\n",
"Installing collected packages: batch-face\n",
"Successfully installed batch-face-1.4.0\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#Generate video\n",
"\n",
"%cd /content/Wav2Lip\n",
"\n",
"#This is the detection box padding, if you see it doesnt sit quite right, just adjust the values a bit. Usually the bottom one is the biggest issue\n",
"pad_top = 0\n",
"pad_bottom = 15\n",
"pad_left = 0\n",
"pad_right = 0\n",
"rescaleFactor = 1\n",
"\n",
"video_path_fix = f\"'../{video_path}'\"\n",
"\n",
"!python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face $video_path_fix --audio \"/content/output_synth.wav\" --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth --outfile '/content/output_video.mp4'\n"
],
"metadata": {
"id": "N7SaNwEuxOnM",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d71a0827-2094-4475-db9f-af7932902533"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/Wav2Lip\n",
"Using cuda for inference.\n",
"Load checkpoint from: checkpoints/wav2lip_gan.pth\n",
"Models loaded\n",
"Reading video frames...\n",
"Number of frames available for inference: 1022\n",
"(80, 5527)\n",
"Length of mel chunks: 1724\n",
" 0% 0/14 [00:00<?, ?it/s]face detect time: 125.64418029785156\n",
"100% 14/14 [02:31<00:00, 10.80s/it]\n",
"wav2lip prediction time: 151.15052485466003\n",
"ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers\n",
" built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)\n",
" configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared\n",
" libavutil 56. 70.100 / 56. 70.100\n",
" libavcodec 58.134.100 / 58.134.100\n",
" libavformat 58. 76.100 / 58. 76.100\n",
" libavdevice 58. 13.100 / 58. 13.100\n",
" libavfilter 7.110.100 / 7.110.100\n",
" libswscale 5. 9.100 / 5. 9.100\n",
" libswresample 3. 9.100 / 3. 9.100\n",
" libpostproc 55. 9.100 / 55. 9.100\n",
"Input #0, avi, from 'temp/result.avi':\n",
" Metadata:\n",
" software : Lavf59.27.100\n",
" Duration: 00:01:08.96, start: 0.000000, bitrate: 1618 kb/s\n",
" Stream #0:0: Video: mpeg4 (Simple Profile) (DIVX / 0x58564944), yuv420p, 852x480 [SAR 1:1 DAR 71:40], 1614 kb/s, 25 fps, 25 tbr, 25 tbn, 25 tbc\n",
"\u001b[0;33mGuessed Channel Layout for Input Stream #1.0 : mono\n",
"\u001b[0mInput #1, wav, from '/content/output_synth.wav':\n",
" Duration: 00:01:09.08, bitrate: 384 kb/s\n",
" Stream #1:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, mono, s16, 384 kb/s\n",
"Stream mapping:\n",
" Stream #0:0 -> #0:0 (mpeg4 (native) -> h264 (libx264))\n",
" Stream #1:0 -> #0:1 (pcm_s16le (native) -> aac (native))\n",
"Press [q] to stop, [?] for help\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0musing SAR=1/1\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0musing cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2 AVX512\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mprofile High, level 3.0, 4:2:0, 8-bit\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0m264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=3 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00\n",
"Output #0, mp4, to '/content/output_video.mp4':\n",
" Metadata:\n",
" software : Lavf59.27.100\n",
" encoder : Lavf58.76.100\n",
" Stream #0:0: Video: h264 (avc1 / 0x31637661), yuv420p(progressive), 852x480 [SAR 1:1 DAR 71:40], q=2-31, 25 fps, 12800 tbn\n",
" Metadata:\n",
" encoder : Lavc58.134.100 libx264\n",
" Side data:\n",
" cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: N/A\n",
" Stream #0:1: Audio: aac (LC) (mp4a / 0x6134706D), 24000 Hz, mono, fltp, 69 kb/s\n",
" Metadata:\n",
" encoder : Lavc58.134.100 aac\n",
"frame= 1724 fps= 57 q=-1.0 Lsize= 4829kB time=00:01:09.03 bitrate= 573.0kbits/s speed=2.27x \n",
"video:4224kB audio:564kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.843633%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mframe I:10 Avg QP:16.90 size: 26924\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mframe P:794 Avg QP:20.76 size: 4232\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mframe B:920 Avg QP:23.56 size: 756\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mconsecutive B-frames: 23.0% 14.3% 10.1% 52.7%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mmb I I16..4: 28.1% 57.3% 14.6%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mmb P I16..4: 2.3% 6.4% 0.2% P16..4: 22.0% 6.9% 4.3% 0.0% 0.0% skip:57.9%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mmb B I16..4: 0.3% 0.8% 0.0% B16..8: 24.4% 1.5% 0.2% direct: 0.7% skip:72.2% L0:47.8% L1:47.0% BI: 5.2%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0m8x8 transform intra:70.2% inter:69.3%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mcoded y,uvDC,uvAC intra: 25.0% 39.5% 10.7% inter: 5.4% 5.6% 0.6%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mi16 v,h,dc,p: 26% 53% 16% 5%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mi8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 20% 29% 46% 1% 1% 1% 1% 1% 1%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mi4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 32% 26% 15% 4% 4% 6% 4% 4% 4%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mi8c dc,h,v,p: 45% 37% 16% 3%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mWeighted P-Frames: Y:1.9% UV:0.1%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mref P L0: 70.7% 10.1% 14.3% 4.9% 0.1%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mref B L0: 83.1% 14.2% 2.7%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mref B L1: 95.5% 4.5%\n",
"\u001b[1;36m[libx264 @ 0x5a180a42e640] \u001b[0mkb/s:501.73\n",
"\u001b[1;36m[aac @ 0x5a180a430080] \u001b[0mQavg: 8364.447\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"#Download videos"
],
"metadata": {
"id": "NPzHEZxAxSxc"
}
},
{
"cell_type": "code",
"source": [
"from google.colab import files\n",
"from IPython.core.display import display, HTML\n",
"import ipywidgets as widgets\n",
"import base64\n",
"import os\n",
"\n",
"# List of video paths to check\n",
"video_paths = [\"/content/output_video.mp4\", \"/content/output_high_qual.mp4\"]\n",
"\n",
"def download_video(b):\n",
" files.download(b.video_path)\n",
"\n",
"download_buttons = []\n",
"\n",
"# Layout definition for button\n",
"button_layout = widgets.Layout(width='250px')\n",
"\n",
"# Loop through each video path to check for existence and display\n",
"for video_path in video_paths:\n",
" if os.path.exists(video_path):\n",
" # Encode video base64\n",
" with open(video_path, \"rb\") as video_file:\n",
" video_base64 = base64.b64encode(video_file.read()).decode()\n",
"\n",
" # HTML widget for video\n",
" video_html = HTML(data=f\"\"\"\n",
" <video width=400 controls>\n",
" <source src=\"data:video/mp4;base64,{video_base64}\" type=\"video/mp4\" />\n",
" </video>\n",
" \"\"\")\n",
"\n",
" # button widget for download and link to the video path\n",
" download_button = widgets.Button(description=f\"Download {os.path.basename(video_path)}\",\n",
" layout=button_layout)\n",
" download_button.video_path = video_path\n",
" download_button.on_click(download_video)\n",
" download_buttons.append(download_button)\n",
"\n",
" # Display widgets\n",
" display(video_html)\n",
" display(download_button)\n"
],
"metadata": {
"id": "KB0TmaBHxYcQ"
},
"execution_count": null,
"outputs": []
}
]
}
gitextract_a9rgujrs/ ├── README.md └── Video_Translation_&_Lipsync.ipynb
Condensed preview — 2 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (115K chars).
[
{
"path": "README.md",
"chars": 659,
"preview": "# AI-Video-Translation\n\n(This project is not maintained anymore, feel free to fork and modify it!)<br>\n\nA simple Google "
},
{
"path": "Video_Translation_&_Lipsync.ipynb",
"chars": 106637,
"preview": "{\n \"nbformat\": 4,\n \"nbformat_minor\": 0,\n \"metadata\": {\n \"colab\": {\n \"provenance\": [],\n \"collapsed_sectio"
}
]
About this extraction
This page contains the full source code of the pranauv1/AI-Video-Translation GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 2 files (104.8 KB), approximately 34.9k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.