[
  {
    "path": ".gitattributes",
    "content": "assets/demo.gif filter=lfs diff=lfs merge=lfs -text\nassets/demo.m4v filter=lfs diff=lfs merge=lfs -text\n"
  },
  {
    "path": ".github/workflows/sync_to_huggingface_space.yml",
    "content": "name: Sync to Hugging Face hub\non:\n  push:\n    branches: [main]\n\n  workflow_dispatch:\n\njobs:\n  sync-to-hub:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v3\n        with:\n          fetch-depth: 0\n          lfs: true\n      - name: Set git committer identity\n        run: |\n          git config --global user.email \"danilotpnta@gmail.com\"\n          git config --global user.name \"danilotpnta\"\n      - name: Pull latest changes from Hugging Face with rebase\n        env:\n          HF_TOKEN: ${{ secrets.HF_TOKEN }}\n        run: |\n          git pull --rebase https://huggingface.co/spaces/danilotpnta/Youtube-Whisper main\n      - name: Push to hub\n        env:\n          HF_TOKEN: ${{ secrets.HF_TOKEN }}\n        run: git push https://danilotpnta:$HF_TOKEN@huggingface.co/spaces/danilotpnta/Youtube-Whisper main\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#pdm.lock\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\n#   in version control.\n#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control\n.pdm.toml\n.pdm-python\n.pdm-build/\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n#.idea/\n\n\n*.mp3\n.DS_Store\n*.mp4\n*.m4v\nthumbnail.jpg\n"
  },
  {
    "path": "Dockerfile",
    "content": "# Dockerfile for Youtube-Whisper\n# Production-ready Gradio application with audio processing\n\nFROM python:3.10-slim\n\n# Install system dependencies\n# ffmpeg: Required for audio/video processing\n# curl: For health checks\nRUN apt-get update && apt-get install -y \\\n    ffmpeg \\\n    curl \\\n    git \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Create non-root user\nRUN useradd -m -u 1000 appuser && \\\n    mkdir -p /app && \\\n    chown -R appuser:appuser /app\n\n# Set working directory\nWORKDIR /app\n\n# Copy requirements and install dependencies\nCOPY requirements.txt .\nRUN pip install --no-cache-dir -r requirements.txt\n\n# Copy application code with correct ownership\nCOPY --chown=appuser:appuser . .\n\n# Switch to non-root user\nUSER appuser\n\n# Expose Gradio port\nEXPOSE 7860\n\n# Health check\nHEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \\\n  CMD curl -f http://localhost:7860/ || exit 1\n\n# Run Gradio app\n# Gradio automatically binds to 0.0.0.0 by default\nCMD [\"python\", \"app.py\"]"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2024 Danilo Toapanta\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "---\ntitle: Youtube Whisper\nemoji: 🐢\ncolorFrom: purple\ncolorTo: pink\nsdk: docker\nsdk_version: 4.44.0\napp_file: app.py\npinned: false\nlicense: mit\n---\n\n# Youtube-Whisper\nA simple Gradio app that transcribes YouTube videos by extracting audio and using OpenAI’s Whisper model for transcription. Paste a YouTube link and get the video’s audio transcribed into text.\n\n![Demo](assets/demo.gif)\n\n## Requirements\n\n- Conda installed (for managing environments)\n- Python 3.9 or above\n- **FFmpeg** installed (required for audio conversion)\n\n## Installation\n\n### Step 1: Clone the Repository\n\n```bash\ngit clone https://github.com/danilotpnta/Youtube-Whisper.git\ncd Youtube-Whisper\n```\n\n### Step 2: Install FFmpeg\n\nYou need FFmpeg for processing the audio. Install it based on your operating system:\n\n- **macOS**: Install FFmpeg via Homebrew:\n  ```bash\n  brew install ffmpeg\n  ```\n\n- **Ubuntu/Linux**: Install FFmpeg via apt:\n  ```bash\n  sudo apt update\n  sudo apt install ffmpeg\n  ```\n\n- **Windows**: \n  - Download FFmpeg from the official website: [FFmpeg Download](https://ffmpeg.org/download.html).\n  - Extract the files and add the `bin` folder to your system’s PATH environment variable. For detailed instructions on adding FFmpeg to PATH, you can follow [this guide](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/).\n\nVerify the installation by running:\n```bash\nffmpeg -version\n```\n\n### Step 3: Create and Activate the Conda Environment\n\nTo set up the environment using the provided `environment.yml` file:\n\n```bash\nconda env create -f environment.yml\n```\n\nOnce the environment is created, activate it with:\n\n```bash\nconda activate yt-whisper\n```\n\n### Step 4: Run the App\n\nOnce the environment is active, you can launch the Gradio app with:\n\n```bash\npython app.py\n```\n\nThis will start a local server for the app, and you can access it by visiting the URL printed in the terminal (usually `http://localhost:7860/`).\n\n### Troubleshooting\n\n1. **FFmpeg Not Found**: \n   If you see an error related to `ffmpeg not found`, ensure FFmpeg is installed and added to your system's PATH. You can also specify its location manually in the script by setting `ffmpeg_location`.\n\n2. **Pytube Errors**:\n   If you encounter issues with `pytube`, ensure you’re using the `yt-dlp` version and that your URL is correctly formatted.\n\n3. **Update Dependencies**:\n   Ensure that `pip` and `conda` are up to date:\n   ```bash\n   conda update conda\n   pip install --upgrade pip\n   ```\n\n## License\n\nThis project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.\n"
  },
  {
    "path": "app.py",
    "content": "import os\nimport whisper\nimport gradio as gr\nfrom download_video import download_mp3_yt_dlp \n\nimport warnings\nwarnings.filterwarnings(\"ignore\", category=FutureWarning, module=\"torch\")\n\n# Function to download the audio, title, and thumbnail from YouTube\ndef download_video_info(url):\n    try:\n        # Call the function to download video and get title, thumbnail\n        title, thumbnail_url = download_mp3_yt_dlp(url)\n        audio_file = \"downloaded_video.mp3\"  # Path to the downloaded audio (MP3)\n\n        return audio_file, title, thumbnail_url\n    except Exception as e:\n        return None, None, None, str(e)\n\n# Function to transcribe the downloaded audio using Whisper\ndef transcribe_audio(audio_path, model_size=\"base\", language=\"en\"):\n    model = whisper.load_model(model_size)\n    result = model.transcribe(audio_path, language=language)\n    return result['text']\n\n# Split logic: First fetch title and thumbnail, then transcribe\ndef get_video_info_and_transcribe(youtube_url, model_size=\"base\", language=\"en\"):\n    # Fetch title and thumbnail first\n    audio_path, title, thumbnail_url = download_video_info(youtube_url)\n    \n    # If fetching video info fails\n    if not audio_path or not os.path.exists(audio_path):\n        return gr.update(value=\"Error fetching video.\"), None, None\n\n    # Show title and thumbnail to the user while the transcription is happening\n    title_output = gr.update(value=title)\n    \n    # Show the thumbnail if available\n    if thumbnail_url:\n        thumbnail_output = gr.update(value=thumbnail_url)\n    else:\n        thumbnail_output = gr.update(visible=False)  # Hide if no thumbnail\n    \n    # Start transcription\n    transcription = transcribe_audio(audio_path, model_size, language)\n\n    return title_output, thumbnail_output, gr.update(value=transcription)\n\n# Gradio interface setup using gradio.components\nwith gr.Blocks() as demo:\n\n    title = \"<center><h1>YouTube Whisper ⚡️ </h1></center>\"\n    gr.HTML(title)\n\n    gr.Markdown(\n    \"\"\"\n    This tool lets you transcribe YouTube videos in multiple languages using **[Whisper](https://openai.com/research/whisper)**, an open-source speech recognition (ASR) model developed by OpenAI.\n\n\n    ### Key Features:\n    - **Fast transcription**: Using the **base** model, transcribing a **3 minute** video takes approximately **30 seconds**.\n    - **Multiple language support**: Choose from **English**, **Spanish**, **French**, and more!\n    - **Simple workflow**: \n        1. Paste a YouTube link.\n        2. Select the model size and language.\n        3. Click \"Transcribe\" to get the text from the video.\n\n    _Transcription times may vary based on model size and video length._\n    \"\"\")\n\n    with gr.Row():\n        youtube_url = gr.Textbox(label=\"YouTube Link\", elem_id=\"yt_link\", scale=5)\n        model_size = gr.Dropdown(choices=[\"tiny\", \"base\", \"small\", \"medium\", \"large\"], label=\"Model Size\", value=\"base\", scale=1)\n        language = gr.Dropdown(choices=[\"en\", \"es\", \"fr\", \"de\", \"it\", \"ja\"], label=\"Language\", value=\"en\", scale=1)\n    \n    title_output = gr.Textbox(label=\"Video Title\", interactive=False)\n\n    with gr.Row():\n        thumbnail_output = gr.Image(label=\"Thumbnail\", interactive=False, scale=1)\n        transcription_output = gr.Textbox(label=\"Transcription\", interactive=False, scale=1)\n    \n    transcribe_button = gr.Button(\"Transcribe\")\n\n    transcribe_button.click(\n        get_video_info_and_transcribe, \n        inputs=[youtube_url, model_size, language],\n        outputs=[title_output, thumbnail_output, transcription_output]\n    )\n\n# Launch the app\nif __name__ == \"__main__\":\n    demo.launch(server_name=\"0.0.0.0\", server_port=7860)\n"
  },
  {
    "path": "download_video.py",
    "content": "import yt_dlp as youtube_dl\nimport requests\n\ndef download_mp3_yt_dlp(youtube_url):\n    # Set up yt-dlp options\n    ydl_opts = {\n        'format': 'bestaudio/best',\n        'postprocessors': [{\n            'key': 'FFmpegExtractAudio',\n            'preferredcodec': 'mp3',\n            'preferredquality': '192',\n        }],\n        'outtmpl': 'downloaded_video.%(ext)s',\n        'quiet': False,\n        'no_warnings': True,\n        'progress_hooks': [lambda d: print(f\"Downloading {d['filename']}: {d['_percent_str']}\")],\n    }\n\n    # Extract video info including title and thumbnail\n    with youtube_dl.YoutubeDL() as ydl:\n        info_dict = ydl.extract_info(youtube_url, download=False)\n        title = info_dict.get('title', 'Unknown Title')\n        thumbnail_url = info_dict.get('thumbnail', None)\n\n    # Download the MP3 using yt-dlp\n    with youtube_dl.YoutubeDL(ydl_opts) as ydl:\n        ydl.download([youtube_url])\n\n    # Fetch the thumbnail for display\n    if thumbnail_url:\n        response = requests.get(thumbnail_url)\n        if response.status_code == 200:\n            with open('thumbnail.jpg', 'wb') as f:\n                f.write(response.content)\n            print(f\"Thumbnail downloaded successfully.\")\n        else:\n            print(f\"Failed to download thumbnail. HTTP Status Code: {response.status_code}\")\n\n    # Return the title and thumbnail URL\n    return title, thumbnail_url\n\n# Example usage:\n# youtube_url = \"https://youtu.be/MAZyQ-38b8M?si=q0dai-wF6FQz6MGN\"\n# title, thumbnail_url = download_mp3_yt_dlp(youtube_url)\n# print(f\"Title: {title}\")\n# print(f\"Thumbnail: {thumbnail_url}\")\n"
  },
  {
    "path": "environment.yml",
    "content": "name: yt-whisper\nchannels:\n  - defaults\n  - conda-forge\ndependencies:\n  - python=3.9\n  - pip\n  - pip:\n      - requests\n      - gradio\n      - openai-whisper @ git+https://github.com/openai/whisper.git\n      - yt_dlp\n      - tqdm"
  },
  {
    "path": "packages.txt",
    "content": "chromium-driver"
  },
  {
    "path": "requirements.txt",
    "content": "requests\ngradio\nopenai-whisper @ git+https://github.com/openai/whisper.git\ntqdm\nyt_dlp"
  }
]