Repository: danilotpnta/Youtube-Whisper
Branch: main
Commit: aa30609c55c7
Files: 11
Total size: 14.0 KB

Directory structure:
gitextract_dddsbxo9/

├── .gitattributes
├── .github/
│   └── workflows/
│       └── sync_to_huggingface_space.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── app.py
├── download_video.py
├── environment.yml
├── packages.txt
└── requirements.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
assets/demo.gif filter=lfs diff=lfs merge=lfs -text
assets/demo.m4v filter=lfs diff=lfs merge=lfs -text


================================================
FILE: .github/workflows/sync_to_huggingface_space.yml
================================================
name: Sync to Hugging Face hub
on:
  push:
    branches: [main]

  workflow_dispatch:

jobs:
  sync-to-hub:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 0
          lfs: true
      - name: Set git committer identity
        run: |
          git config --global user.email "danilotpnta@gmail.com"
          git config --global user.name "danilotpnta"
      - name: Pull latest changes from Hugging Face with rebase
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          git pull --rebase https://huggingface.co/spaces/danilotpnta/Youtube-Whisper main
      - name: Push to hub
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: git push https://danilotpnta:$HF_TOKEN@huggingface.co/spaces/danilotpnta/Youtube-Whisper main


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


*.mp3
.DS_Store
*.mp4
*.m4v
thumbnail.jpg


================================================
FILE: Dockerfile
================================================
# Dockerfile for Youtube-Whisper
# Production-ready Gradio application with audio processing

FROM python:3.10-slim

# Install system dependencies
# ffmpeg: Required for audio/video processing
# curl: For health checks
RUN apt-get update && apt-get install -y \
    ffmpeg \
    curl \
    git \
    && rm -rf /var/lib/apt/lists/*

# Create non-root user
RUN useradd -m -u 1000 appuser && \
    mkdir -p /app && \
    chown -R appuser:appuser /app

# Set working directory
WORKDIR /app

# Copy requirements and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code with correct ownership
COPY --chown=appuser:appuser . .

# Switch to non-root user
USER appuser

# Expose Gradio port
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
  CMD curl -f http://localhost:7860/ || exit 1

# Run Gradio app
# Gradio automatically binds to 0.0.0.0 by default
CMD ["python", "app.py"]

================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2024 Danilo Toapanta

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
---
title: Youtube Whisper
emoji: 🐢
colorFrom: purple
colorTo: pink
sdk: docker
sdk_version: 4.44.0
app_file: app.py
pinned: false
license: mit
---

# Youtube-Whisper
A simple Gradio app that transcribes YouTube videos by extracting audio and using OpenAI’s Whisper model for transcription. Paste a YouTube link and get the video’s audio transcribed into text.

![Demo](assets/demo.gif)

## Requirements

- Conda installed (for managing environments)
- Python 3.9 or above
- **FFmpeg** installed (required for audio conversion)

## Installation

### Step 1: Clone the Repository

```bash
git clone https://github.com/danilotpnta/Youtube-Whisper.git
cd Youtube-Whisper
```

### Step 2: Install FFmpeg

You need FFmpeg for processing the audio. Install it based on your operating system:

- **macOS**: Install FFmpeg via Homebrew:
  ```bash
  brew install ffmpeg
  ```

- **Ubuntu/Linux**: Install FFmpeg via apt:
  ```bash
  sudo apt update
  sudo apt install ffmpeg
  ```

- **Windows**: 
  - Download FFmpeg from the official website: [FFmpeg Download](https://ffmpeg.org/download.html).
  - Extract the files and add the `bin` folder to your system’s PATH environment variable. For detailed instructions on adding FFmpeg to PATH, you can follow [this guide](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/).

Verify the installation by running:
```bash
ffmpeg -version
```

### Step 3: Create and Activate the Conda Environment

To set up the environment using the provided `environment.yml` file:

```bash
conda env create -f environment.yml
```

Once the environment is created, activate it with:

```bash
conda activate yt-whisper
```

### Step 4: Run the App

Once the environment is active, you can launch the Gradio app with:

```bash
python app.py
```

This will start a local server for the app, and you can access it by visiting the URL printed in the terminal (usually `http://localhost:7860/`).

### Troubleshooting

1. **FFmpeg Not Found**: 
   If you see an error related to `ffmpeg not found`, ensure FFmpeg is installed and added to your system's PATH. You can also specify its location manually in the script by setting `ffmpeg_location`.

2. **Pytube Errors**:
   If you encounter issues with `pytube`, ensure you’re using the `yt-dlp` version and that your URL is correctly formatted.

3. **Update Dependencies**:
   Ensure that `pip` and `conda` are up to date:
   ```bash
   conda update conda
   pip install --upgrade pip
   ```

## License

This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.


================================================
FILE: app.py
================================================
import os
import whisper
import gradio as gr
from download_video import download_mp3_yt_dlp 

import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="torch")

# Function to download the audio, title, and thumbnail from YouTube
def download_video_info(url):
    try:
        # Call the function to download video and get title, thumbnail
        title, thumbnail_url = download_mp3_yt_dlp(url)
        audio_file = "downloaded_video.mp3"  # Path to the downloaded audio (MP3)

        return audio_file, title, thumbnail_url
    except Exception as e:
        return None, None, None, str(e)

# Function to transcribe the downloaded audio using Whisper
def transcribe_audio(audio_path, model_size="base", language="en"):
    model = whisper.load_model(model_size)
    result = model.transcribe(audio_path, language=language)
    return result['text']

# Split logic: First fetch title and thumbnail, then transcribe
def get_video_info_and_transcribe(youtube_url, model_size="base", language="en"):
    # Fetch title and thumbnail first
    audio_path, title, thumbnail_url = download_video_info(youtube_url)
    
    # If fetching video info fails
    if not audio_path or not os.path.exists(audio_path):
        return gr.update(value="Error fetching video."), None, None

    # Show title and thumbnail to the user while the transcription is happening
    title_output = gr.update(value=title)
    
    # Show the thumbnail if available
    if thumbnail_url:
        thumbnail_output = gr.update(value=thumbnail_url)
    else:
        thumbnail_output = gr.update(visible=False)  # Hide if no thumbnail
    
    # Start transcription
    transcription = transcribe_audio(audio_path, model_size, language)

    return title_output, thumbnail_output, gr.update(value=transcription)

# Gradio interface setup using gradio.components
with gr.Blocks() as demo:

    title = "<center><h1>YouTube Whisper ⚡️ </h1></center>"
    gr.HTML(title)

    gr.Markdown(
    """
    This tool lets you transcribe YouTube videos in multiple languages using **[Whisper](https://openai.com/research/whisper)**, an open-source speech recognition (ASR) model developed by OpenAI.


    ### Key Features:
    - **Fast transcription**: Using the **base** model, transcribing a **3 minute** video takes approximately **30 seconds**.
    - **Multiple language support**: Choose from **English**, **Spanish**, **French**, and more!
    - **Simple workflow**: 
        1. Paste a YouTube link.
        2. Select the model size and language.
        3. Click "Transcribe" to get the text from the video.

    _Transcription times may vary based on model size and video length._
    """)

    with gr.Row():
        youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5)
        model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1)
        language = gr.Dropdown(choices=["en", "es", "fr", "de", "it", "ja"], label="Language", value="en", scale=1)
    
    title_output = gr.Textbox(label="Video Title", interactive=False)

    with gr.Row():
        thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1)
        transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1)
    
    transcribe_button = gr.Button("Transcribe")

    transcribe_button.click(
        get_video_info_and_transcribe, 
        inputs=[youtube_url, model_size, language],
        outputs=[title_output, thumbnail_output, transcription_output]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)


================================================
FILE: download_video.py
================================================
import yt_dlp as youtube_dl
import requests

def download_mp3_yt_dlp(youtube_url):
    # Set up yt-dlp options
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': 'downloaded_video.%(ext)s',
        'quiet': False,
        'no_warnings': True,
        'progress_hooks': [lambda d: print(f"Downloading {d['filename']}: {d['_percent_str']}")],
    }

    # Extract video info including title and thumbnail
    with youtube_dl.YoutubeDL() as ydl:
        info_dict = ydl.extract_info(youtube_url, download=False)
        title = info_dict.get('title', 'Unknown Title')
        thumbnail_url = info_dict.get('thumbnail', None)

    # Download the MP3 using yt-dlp
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([youtube_url])

    # Fetch the thumbnail for display
    if thumbnail_url:
        response = requests.get(thumbnail_url)
        if response.status_code == 200:
            with open('thumbnail.jpg', 'wb') as f:
                f.write(response.content)
            print(f"Thumbnail downloaded successfully.")
        else:
            print(f"Failed to download thumbnail. HTTP Status Code: {response.status_code}")

    # Return the title and thumbnail URL
    return title, thumbnail_url

# Example usage:
# youtube_url = "https://youtu.be/MAZyQ-38b8M?si=q0dai-wF6FQz6MGN"
# title, thumbnail_url = download_mp3_yt_dlp(youtube_url)
# print(f"Title: {title}")
# print(f"Thumbnail: {thumbnail_url}")


================================================
FILE: environment.yml
================================================
name: yt-whisper
channels:
  - defaults
  - conda-forge
dependencies:
  - python=3.9
  - pip
  - pip:
      - requests
      - gradio
      - openai-whisper @ git+https://github.com/openai/whisper.git
      - yt_dlp
      - tqdm

================================================
FILE: packages.txt
================================================
chromium-driver

================================================
FILE: requirements.txt
================================================
requests
gradio
openai-whisper @ git+https://github.com/openai/whisper.git
tqdm
yt_dlp