Repository: danilotpnta/Youtube-Whisper Branch: main Commit: aa30609c55c7 Files: 11 Total size: 14.0 KB Directory structure: gitextract_dddsbxo9/ ├── .gitattributes ├── .github/ │ └── workflows/ │ └── sync_to_huggingface_space.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app.py ├── download_video.py ├── environment.yml ├── packages.txt └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ assets/demo.gif filter=lfs diff=lfs merge=lfs -text assets/demo.m4v filter=lfs diff=lfs merge=lfs -text ================================================ FILE: .github/workflows/sync_to_huggingface_space.yml ================================================ name: Sync to Hugging Face hub on: push: branches: [main] workflow_dispatch: jobs: sync-to-hub: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: fetch-depth: 0 lfs: true - name: Set git committer identity run: | git config --global user.email "danilotpnta@gmail.com" git config --global user.name "danilotpnta" - name: Pull latest changes from Hugging Face with rebase env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | git pull --rebase https://huggingface.co/spaces/danilotpnta/Youtube-Whisper main - name: Push to hub env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: git push https://danilotpnta:$HF_TOKEN@huggingface.co/spaces/danilotpnta/Youtube-Whisper main ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ *.mp3 .DS_Store *.mp4 *.m4v thumbnail.jpg ================================================ FILE: Dockerfile ================================================ # Dockerfile for Youtube-Whisper # Production-ready Gradio application with audio processing FROM python:3.10-slim # Install system dependencies # ffmpeg: Required for audio/video processing # curl: For health checks RUN apt-get update && apt-get install -y \ ffmpeg \ curl \ git \ && rm -rf /var/lib/apt/lists/* # Create non-root user RUN useradd -m -u 1000 appuser && \ mkdir -p /app && \ chown -R appuser:appuser /app # Set working directory WORKDIR /app # Copy requirements and install dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application code with correct ownership COPY --chown=appuser:appuser . . # Switch to non-root user USER appuser # Expose Gradio port EXPOSE 7860 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD curl -f http://localhost:7860/ || exit 1 # Run Gradio app # Gradio automatically binds to 0.0.0.0 by default CMD ["python", "app.py"] ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2024 Danilo Toapanta Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ --- title: Youtube Whisper emoji: 🐢 colorFrom: purple colorTo: pink sdk: docker sdk_version: 4.44.0 app_file: app.py pinned: false license: mit --- # Youtube-Whisper A simple Gradio app that transcribes YouTube videos by extracting audio and using OpenAI’s Whisper model for transcription. Paste a YouTube link and get the video’s audio transcribed into text. ![Demo](assets/demo.gif) ## Requirements - Conda installed (for managing environments) - Python 3.9 or above - **FFmpeg** installed (required for audio conversion) ## Installation ### Step 1: Clone the Repository ```bash git clone https://github.com/danilotpnta/Youtube-Whisper.git cd Youtube-Whisper ``` ### Step 2: Install FFmpeg You need FFmpeg for processing the audio. Install it based on your operating system: - **macOS**: Install FFmpeg via Homebrew: ```bash brew install ffmpeg ``` - **Ubuntu/Linux**: Install FFmpeg via apt: ```bash sudo apt update sudo apt install ffmpeg ``` - **Windows**: - Download FFmpeg from the official website: [FFmpeg Download](https://ffmpeg.org/download.html). - Extract the files and add the `bin` folder to your system’s PATH environment variable. For detailed instructions on adding FFmpeg to PATH, you can follow [this guide](https://www.geeksforgeeks.org/how-to-install-ffmpeg-on-windows/). Verify the installation by running: ```bash ffmpeg -version ``` ### Step 3: Create and Activate the Conda Environment To set up the environment using the provided `environment.yml` file: ```bash conda env create -f environment.yml ``` Once the environment is created, activate it with: ```bash conda activate yt-whisper ``` ### Step 4: Run the App Once the environment is active, you can launch the Gradio app with: ```bash python app.py ``` This will start a local server for the app, and you can access it by visiting the URL printed in the terminal (usually `http://localhost:7860/`). ### Troubleshooting 1. **FFmpeg Not Found**: If you see an error related to `ffmpeg not found`, ensure FFmpeg is installed and added to your system's PATH. You can also specify its location manually in the script by setting `ffmpeg_location`. 2. **Pytube Errors**: If you encounter issues with `pytube`, ensure you’re using the `yt-dlp` version and that your URL is correctly formatted. 3. **Update Dependencies**: Ensure that `pip` and `conda` are up to date: ```bash conda update conda pip install --upgrade pip ``` ## License This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details. ================================================ FILE: app.py ================================================ import os import whisper import gradio as gr from download_video import download_mp3_yt_dlp import warnings warnings.filterwarnings("ignore", category=FutureWarning, module="torch") # Function to download the audio, title, and thumbnail from YouTube def download_video_info(url): try: # Call the function to download video and get title, thumbnail title, thumbnail_url = download_mp3_yt_dlp(url) audio_file = "downloaded_video.mp3" # Path to the downloaded audio (MP3) return audio_file, title, thumbnail_url except Exception as e: return None, None, None, str(e) # Function to transcribe the downloaded audio using Whisper def transcribe_audio(audio_path, model_size="base", language="en"): model = whisper.load_model(model_size) result = model.transcribe(audio_path, language=language) return result['text'] # Split logic: First fetch title and thumbnail, then transcribe def get_video_info_and_transcribe(youtube_url, model_size="base", language="en"): # Fetch title and thumbnail first audio_path, title, thumbnail_url = download_video_info(youtube_url) # If fetching video info fails if not audio_path or not os.path.exists(audio_path): return gr.update(value="Error fetching video."), None, None # Show title and thumbnail to the user while the transcription is happening title_output = gr.update(value=title) # Show the thumbnail if available if thumbnail_url: thumbnail_output = gr.update(value=thumbnail_url) else: thumbnail_output = gr.update(visible=False) # Hide if no thumbnail # Start transcription transcription = transcribe_audio(audio_path, model_size, language) return title_output, thumbnail_output, gr.update(value=transcription) # Gradio interface setup using gradio.components with gr.Blocks() as demo: title = "

YouTube Whisper ⚡️

" gr.HTML(title) gr.Markdown( """ This tool lets you transcribe YouTube videos in multiple languages using **[Whisper](https://openai.com/research/whisper)**, an open-source speech recognition (ASR) model developed by OpenAI. ### Key Features: - **Fast transcription**: Using the **base** model, transcribing a **3 minute** video takes approximately **30 seconds**. - **Multiple language support**: Choose from **English**, **Spanish**, **French**, and more! - **Simple workflow**: 1. Paste a YouTube link. 2. Select the model size and language. 3. Click "Transcribe" to get the text from the video. _Transcription times may vary based on model size and video length._ """) with gr.Row(): youtube_url = gr.Textbox(label="YouTube Link", elem_id="yt_link", scale=5) model_size = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], label="Model Size", value="base", scale=1) language = gr.Dropdown(choices=["en", "es", "fr", "de", "it", "ja"], label="Language", value="en", scale=1) title_output = gr.Textbox(label="Video Title", interactive=False) with gr.Row(): thumbnail_output = gr.Image(label="Thumbnail", interactive=False, scale=1) transcription_output = gr.Textbox(label="Transcription", interactive=False, scale=1) transcribe_button = gr.Button("Transcribe") transcribe_button.click( get_video_info_and_transcribe, inputs=[youtube_url, model_size, language], outputs=[title_output, thumbnail_output, transcription_output] ) # Launch the app if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860) ================================================ FILE: download_video.py ================================================ import yt_dlp as youtube_dl import requests def download_mp3_yt_dlp(youtube_url): # Set up yt-dlp options ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], 'outtmpl': 'downloaded_video.%(ext)s', 'quiet': False, 'no_warnings': True, 'progress_hooks': [lambda d: print(f"Downloading {d['filename']}: {d['_percent_str']}")], } # Extract video info including title and thumbnail with youtube_dl.YoutubeDL() as ydl: info_dict = ydl.extract_info(youtube_url, download=False) title = info_dict.get('title', 'Unknown Title') thumbnail_url = info_dict.get('thumbnail', None) # Download the MP3 using yt-dlp with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([youtube_url]) # Fetch the thumbnail for display if thumbnail_url: response = requests.get(thumbnail_url) if response.status_code == 200: with open('thumbnail.jpg', 'wb') as f: f.write(response.content) print(f"Thumbnail downloaded successfully.") else: print(f"Failed to download thumbnail. HTTP Status Code: {response.status_code}") # Return the title and thumbnail URL return title, thumbnail_url # Example usage: # youtube_url = "https://youtu.be/MAZyQ-38b8M?si=q0dai-wF6FQz6MGN" # title, thumbnail_url = download_mp3_yt_dlp(youtube_url) # print(f"Title: {title}") # print(f"Thumbnail: {thumbnail_url}") ================================================ FILE: environment.yml ================================================ name: yt-whisper channels: - defaults - conda-forge dependencies: - python=3.9 - pip - pip: - requests - gradio - openai-whisper @ git+https://github.com/openai/whisper.git - yt_dlp - tqdm ================================================ FILE: packages.txt ================================================ chromium-driver ================================================ FILE: requirements.txt ================================================ requests gradio openai-whisper @ git+https://github.com/openai/whisper.git tqdm yt_dlp